Coverage Report

Created: 2023-09-25 07:43

/src/libhevc/encoder/ihevce_enc_loop_utils.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*!
22
******************************************************************************
23
* \file ihevce_enc_loop_utils.c
24
*
25
* \brief
26
*    This file contains utility functions of Encode loop
27
*
28
* \date
29
*    18/09/2012
30
*
31
* \author
32
*    Ittiam
33
*
34
*
35
* List of Functions
36
*
37
*
38
******************************************************************************
39
*/
40
41
/*****************************************************************************/
42
/* File Includes                                                             */
43
/*****************************************************************************/
44
/* System include files */
45
#include <stdio.h>
46
#include <string.h>
47
#include <stdlib.h>
48
#include <assert.h>
49
#include <stdarg.h>
50
#include <math.h>
51
#include <limits.h>
52
53
/* User include files */
54
#include "ihevc_typedefs.h"
55
#include "itt_video_api.h"
56
#include "ihevce_api.h"
57
58
#include "rc_cntrl_param.h"
59
#include "rc_frame_info_collector.h"
60
#include "rc_look_ahead_params.h"
61
62
#include "ihevc_defs.h"
63
#include "ihevc_macros.h"
64
#include "ihevc_debug.h"
65
#include "ihevc_structs.h"
66
#include "ihevc_platform_macros.h"
67
#include "ihevc_deblk.h"
68
#include "ihevc_itrans_recon.h"
69
#include "ihevc_chroma_itrans_recon.h"
70
#include "ihevc_chroma_intra_pred.h"
71
#include "ihevc_intra_pred.h"
72
#include "ihevc_inter_pred.h"
73
#include "ihevc_mem_fns.h"
74
#include "ihevc_padding.h"
75
#include "ihevc_weighted_pred.h"
76
#include "ihevc_sao.h"
77
#include "ihevc_resi_trans.h"
78
#include "ihevc_quant_iquant_ssd.h"
79
#include "ihevc_cabac_tables.h"
80
#include "ihevc_common_tables.h"
81
82
#include "ihevce_defs.h"
83
#include "ihevce_hle_interface.h"
84
#include "ihevce_lap_enc_structs.h"
85
#include "ihevce_multi_thrd_structs.h"
86
#include "ihevce_multi_thrd_funcs.h"
87
#include "ihevce_me_common_defs.h"
88
#include "ihevce_had_satd.h"
89
#include "ihevce_error_codes.h"
90
#include "ihevce_bitstream.h"
91
#include "ihevce_cabac.h"
92
#include "ihevce_rdoq_macros.h"
93
#include "ihevce_function_selector.h"
94
#include "ihevce_enc_structs.h"
95
#include "ihevce_entropy_structs.h"
96
#include "ihevce_cmn_utils_instr_set_router.h"
97
#include "ihevce_ipe_instr_set_router.h"
98
#include "ihevce_decomp_pre_intra_structs.h"
99
#include "ihevce_decomp_pre_intra_pass.h"
100
#include "ihevce_enc_loop_structs.h"
101
#include "ihevce_nbr_avail.h"
102
#include "ihevce_enc_loop_utils.h"
103
#include "ihevce_sub_pic_rc.h"
104
#include "ihevce_global_tables.h"
105
#include "ihevce_bs_compute_ctb.h"
106
#include "ihevce_cabac_rdo.h"
107
#include "ihevce_deblk.h"
108
#include "ihevce_frame_process.h"
109
#include "ihevce_rc_enc_structs.h"
110
#include "hme_datatype.h"
111
#include "hme_interface.h"
112
#include "hme_common_defs.h"
113
#include "hme_defs.h"
114
#include "hme_common_utils.h"
115
#include "ihevce_me_instr_set_router.h"
116
#include "ihevce_enc_subpel_gen.h"
117
#include "ihevce_inter_pred.h"
118
#include "ihevce_mv_pred.h"
119
#include "ihevce_mv_pred_merge.h"
120
#include "ihevce_enc_loop_inter_mode_sifter.h"
121
#include "ihevce_enc_cu_recursion.h"
122
#include "ihevce_enc_loop_pass.h"
123
#include "ihevce_common_utils.h"
124
#include "ihevce_dep_mngr_interface.h"
125
#include "ihevce_sao.h"
126
#include "ihevce_tile_interface.h"
127
#include "ihevce_profile.h"
128
#include "ihevce_stasino_helpers.h"
129
#include "ihevce_tu_tree_selector.h"
130
131
/*****************************************************************************/
132
/* Globals                                                                   */
133
/*****************************************************************************/
134
135
extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
136
extern const UWORD8 gu1_hevce_scan4x4[3][16];
137
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
138
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
139
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
140
141
/*****************************************************************************/
142
/* Constant Macros                                                           */
143
/*****************************************************************************/
144
#define ENABLE_ZERO_CBF 1
145
#define DISABLE_RDOQ_INTRA 0
146
147
/*****************************************************************************/
148
/* Function Definitions                                                      */
149
/*****************************************************************************/
150
void *ihevce_tu_tree_update(
151
    tu_prms_t *ps_tu_prms,
152
    WORD32 *pnum_tu_in_cu,
153
    WORD32 depth,
154
    WORD32 tu_split_flag,
155
    WORD32 tu_early_cbf,
156
    WORD32 i4_x_off,
157
    WORD32 i4_y_off)
158
0
{
159
    //WORD32 tu_split_flag = p_tu_split_flag[0];
160
0
    WORD32 p_tu_split_flag[4];
161
0
    WORD32 p_tu_early_cbf[4];
162
163
0
    WORD32 tu_size = ps_tu_prms->u1_tu_size;
164
165
0
    if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
166
0
    {
167
0
        if((tu_size >> depth) == 32)
168
0
        {
169
            /* Get the individual TU split flags */
170
0
            p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
171
0
            p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
172
0
            p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
173
0
            p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
174
175
            /* Get the early CBF flags */
176
0
            p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
177
0
            p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
178
0
            p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
179
0
            p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
180
0
        }
181
0
        else
182
0
        {
183
            /* Get the individual TU split flags */
184
0
            p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
185
0
            p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
186
0
            p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
187
0
            p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
188
189
            /* Get the early CBF flags */
190
0
            p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
191
0
            p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
192
0
            p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
193
0
            p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
194
0
        }
195
196
0
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
197
0
            ps_tu_prms,
198
0
            pnum_tu_in_cu,
199
0
            depth + 1,
200
0
            p_tu_split_flag[0],
201
0
            p_tu_early_cbf[0],
202
0
            i4_x_off,
203
0
            i4_y_off);
204
205
0
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
206
0
            ps_tu_prms,
207
0
            pnum_tu_in_cu,
208
0
            depth + 1,
209
0
            p_tu_split_flag[1],
210
0
            p_tu_early_cbf[1],
211
0
            (i4_x_off + (tu_size >> (depth + 1))),
212
0
            i4_y_off);
213
214
0
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
215
0
            ps_tu_prms,
216
0
            pnum_tu_in_cu,
217
0
            depth + 1,
218
0
            p_tu_split_flag[2],
219
0
            p_tu_early_cbf[2],
220
0
            i4_x_off,
221
0
            (i4_y_off + (tu_size >> (depth + 1))));
222
223
0
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
224
0
            ps_tu_prms,
225
0
            pnum_tu_in_cu,
226
0
            depth + 1,
227
0
            p_tu_split_flag[3],
228
0
            p_tu_early_cbf[3],
229
0
            (i4_x_off + (tu_size >> (depth + 1))),
230
0
            (i4_y_off + (tu_size >> (depth + 1))));
231
0
    }
232
0
    else
233
0
    {
234
0
        if(tu_split_flag & 0x1)
235
0
        {
236
            /* This piece of code will be entered for the 8x8, if it is split
237
            Update the 4 child TU's accordingly. */
238
239
0
            (*pnum_tu_in_cu) += 4;
240
241
            /* TL TU update */
242
0
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
243
244
0
            ps_tu_prms->u1_x_off = i4_x_off;
245
246
0
            ps_tu_prms->u1_y_off = i4_y_off;
247
248
            /* Early CBF is not done for 4x4 transforms */
249
0
            ps_tu_prms->i4_early_cbf = 1;
250
251
0
            ps_tu_prms++;
252
253
            /* TR TU update */
254
0
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
255
256
0
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
257
258
0
            ps_tu_prms->u1_y_off = i4_y_off;
259
260
            /* Early CBF is not done for 4x4 transforms */
261
0
            ps_tu_prms->i4_early_cbf = 1;
262
263
0
            ps_tu_prms++;
264
265
            /* BL TU update */
266
0
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
267
268
0
            ps_tu_prms->u1_x_off = i4_x_off;
269
270
0
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
271
272
            /* Early CBF is not done for 4x4 transforms */
273
0
            ps_tu_prms->i4_early_cbf = 1;
274
275
0
            ps_tu_prms++;
276
277
            /* BR TU update */
278
0
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
279
280
0
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
281
282
0
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
283
284
            /* Early CBF is not done for 4x4 transforms */
285
0
            ps_tu_prms->i4_early_cbf = 1;
286
0
        }
287
0
        else
288
0
        {
289
            /* Update the TU params */
290
0
            ps_tu_prms->u1_tu_size = tu_size >> depth;
291
292
0
            ps_tu_prms->u1_x_off = i4_x_off;
293
294
0
            ps_tu_prms->u1_y_off = i4_y_off;
295
296
0
            (*pnum_tu_in_cu)++;
297
298
            /* Early CBF update for current TU */
299
0
            ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
300
0
        }
301
0
        if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
302
0
        {
303
0
            ps_tu_prms++;
304
305
0
            ps_tu_prms->u1_tu_size = tu_size;
306
0
        }
307
0
    }
308
309
0
    return ps_tu_prms;
310
0
}
311
312
/*!
313
******************************************************************************
314
* \if Function name : ihevce_compute_quant_rel_param \endif
315
*
316
* \brief
317
*    This function updates quantization related parameters like qp_mod_6 etc in
318
*       context according to new qp
319
*
320
* \date
321
*    08/01/2013
322
*
323
* \author
324
*    Ittiam
325
*
326
* \return
327
*
328
* List of Functions
329
*
330
*
331
******************************************************************************
332
*/
333
void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
334
0
{
335
0
    WORD32 i4_div_factor;
336
337
0
    ps_ctxt->i4_chrm_cu_qp =
338
0
        (ps_ctxt->u1_chroma_array_type == 2)
339
0
            ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
340
0
            : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
341
0
    ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
342
0
    i4_div_factor = (i1_cu_qp + 3) / 6;
343
0
    i4_div_factor = CLIP3(i4_div_factor, 3, 6);
344
0
    ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
345
0
    ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
346
0
    ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
347
348
0
#define INTER_RND_QP_BY_6
349
0
#ifdef INTER_RND_QP_BY_6
350
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
351
0
    {
352
0
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
353
0
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
354
0
    }
355
#else
356
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
357
    ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
358
#endif
359
360
0
    if(ISLICE == ps_ctxt->i1_slice_type)
361
0
    {
362
        /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
363
0
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
364
0
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
365
0
    }
366
0
    else
367
0
    {
368
0
        if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
369
0
        {
370
            /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
371
0
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
372
0
                (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
373
0
        }
374
0
        else
375
0
        {
376
            /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
377
0
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
378
0
                ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
379
            /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
380
0
        }
381
0
    }
382
0
}
383
384
/*!
385
******************************************************************************
386
* \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
387
*
388
* \brief
389
*    Function whihc calculates the Lambda params for current picture
390
*
391
* \param[in] ps_enc_ctxt : encoder ctxt pointer
392
* \param[in] ps_cur_pic_ctxt : current pic ctxt
393
* \param[in] i4_cur_frame_qp : current pic QP
394
* \param[in] first_field : is first field flag
395
* \param[in] i4_temporal_lyr_id : Current picture layer id
396
*
397
* \return
398
*    None
399
*
400
* \author
401
*  Ittiam
402
*
403
*****************************************************************************
404
*/
405
void ihevce_populate_cl_cu_lambda_prms(
406
    ihevce_enc_loop_ctxt_t *ps_ctxt,
407
    frm_lambda_ctxt_t *ps_frm_lamda,
408
    WORD32 i4_slice_type,
409
    WORD32 i4_temporal_lyr_id,
410
    WORD32 i4_lambda_type)
411
0
{
412
0
    WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
413
0
    double lambda_modifier;
414
0
    double lambda_uv_modifier;
415
0
    double lambda;
416
0
    double lambda_uv;
417
418
0
    WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
419
420
    /*Populate lamda modifier */
421
0
    ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
422
0
    ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
423
0
    ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
424
425
0
    for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
426
0
        i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
427
0
        i4_curr_cu_qp++)
428
0
    {
429
0
        WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
430
0
                               ? MIN(i4_curr_cu_qp, 51)
431
0
                               : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
432
433
0
        i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
434
435
0
        lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
436
0
        lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
437
438
0
        if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
439
0
        {
440
0
            lambda_modifier = ps_frm_lamda->lambda_modifier *
441
0
                              CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
442
0
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
443
0
                                 CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
444
0
        }
445
0
        else
446
0
        {
447
0
            lambda_modifier = ps_frm_lamda->lambda_modifier;
448
0
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
449
0
        }
450
0
        if(ps_ctxt->i4_use_const_lamda_modifier)
451
0
        {
452
0
            if(ISLICE == ps_ctxt->i1_slice_type)
453
0
            {
454
0
                lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
455
0
                lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
456
0
            }
457
0
            else
458
0
            {
459
0
                lambda_modifier = CONST_LAMDA_MOD_VAL;
460
0
                lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
461
0
            }
462
0
        }
463
0
        switch(i4_lambda_type)
464
0
        {
465
0
        case 0:
466
0
        {
467
0
            i4_qp_bdoffset = 0;
468
469
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
470
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
471
472
0
            lambda *= lambda_modifier;
473
0
            lambda_uv *= lambda_uv_modifier;
474
475
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
476
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
477
478
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
479
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
480
481
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
482
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
483
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
484
0
            {
485
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
486
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
487
0
            }
488
0
            else
489
0
            {
490
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
491
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
492
0
            }
493
494
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
495
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
496
497
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
498
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
499
500
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
501
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
502
503
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
504
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
505
506
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
507
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
508
509
0
            break;
510
0
        }
511
0
        case 1:
512
0
        {
513
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
514
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
515
516
0
            lambda *= lambda_modifier;
517
0
            lambda_uv *= lambda_uv_modifier;
518
519
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
520
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
521
522
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
523
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
524
525
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
526
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
527
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
528
0
            {
529
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
530
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
531
0
            }
532
0
            else
533
0
            {
534
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
535
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
536
0
            }
537
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
538
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
539
540
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
541
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
542
543
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
544
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
545
546
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
547
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
548
549
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
550
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
551
552
0
            break;
553
0
        }
554
0
        case 2:
555
0
        {
556
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
557
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
558
559
0
            lambda *= lambda_modifier;
560
0
            lambda_uv *= lambda_uv_modifier;
561
562
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
563
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
564
565
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
566
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
567
568
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
569
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
570
571
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
572
0
            {
573
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
574
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
575
0
            }
576
0
            else
577
0
            {
578
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
579
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
580
0
            }
581
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
582
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
583
584
            /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
585
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
586
0
            lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
587
588
0
            lambda *= lambda_modifier;
589
0
            lambda_uv *= lambda_uv_modifier;
590
591
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
592
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
593
594
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
595
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
596
597
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
598
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
599
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
600
0
            {
601
0
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
602
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
603
0
            }
604
0
            else
605
0
            {
606
0
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
607
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
608
0
            }
609
610
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
611
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
612
613
0
            break;
614
0
        }
615
0
        default:
616
0
        {
617
            /* Intended to be a barren wasteland! */
618
0
            ASSERT(0);
619
0
        }
620
0
        }
621
0
    }
622
0
}
623
624
/*!
625
******************************************************************************
626
* \if Function name : ihevce_get_cl_cu_lambda_prms \endif
627
*
628
* \brief
629
*    Function whihc calculates the Lambda params for current picture
630
*
631
* \param[in] ps_enc_ctxt : encoder ctxt pointer
632
* \param[in] ps_cur_pic_ctxt : current pic ctxt
633
* \param[in] i4_cur_frame_qp : current pic QP
634
* \param[in] first_field : is first field flag
635
* \param[in] i4_temporal_lyr_id : Current picture layer id
636
*
637
* \return
638
*    None
639
*
640
* \author
641
*  Ittiam
642
*
643
*****************************************************************************
644
*/
645
void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
646
0
{
647
0
    WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
648
0
                           ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
649
0
                           : gai1_ihevc_chroma_qp_scale
650
0
                                 [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
651
652
    /* closed loop ssd lambda is same as final lambda */
653
0
    ps_ctxt->i8_cl_ssd_lambda_qf =
654
0
        ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
655
0
    ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
656
0
        ps_ctxt
657
0
            ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
658
0
    ps_ctxt->u4_chroma_cost_weighing_factor =
659
0
        ps_ctxt->au4_chroma_cost_weighing_factor_array
660
0
            [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
661
    /* --- Initialized the lambda for SATD computations --- */
662
    /* --- 0.95 is the multiplication factor as per HM --- */
663
    /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
664
0
    ps_ctxt->i4_satd_lamda =
665
0
        ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
666
0
    ps_ctxt->i4_sad_lamda =
667
0
        ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
668
0
}
669
670
/*!
671
******************************************************************************
672
* \if Function name : ihevce_update_pred_qp \endif
673
*
674
* \brief
675
*    Computes pred qp for the given CU
676
*
677
* \param[in]
678
*
679
* \return
680
*
681
*
682
* \author
683
*  Ittiam
684
*
685
*****************************************************************************
686
*/
687
void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
688
0
{
689
0
    WORD32 i4_pred_qp = 0x7FFFFFFF;
690
0
    WORD32 i4_top, i4_left;
691
0
    if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
692
0
    {
693
0
        i4_pred_qp = ps_ctxt->i4_prev_QP;
694
0
    }
695
0
    else
696
0
    {
697
0
        if(cu_pos_y == 0) /*CTB boundary*/
698
0
        {
699
0
            i4_top = ps_ctxt->i4_prev_QP;
700
0
        }
701
0
        else /*within CTB*/
702
0
        {
703
0
            i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
704
0
        }
705
0
        if(cu_pos_x == 0) /*CTB boundary*/
706
0
        {
707
0
            i4_left = ps_ctxt->i4_prev_QP;
708
0
        }
709
0
        else /*within CTB*/
710
0
        {
711
0
            i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
712
0
        }
713
0
        i4_pred_qp = (i4_left + i4_top + 1) >> 1;
714
0
    }
715
0
    ps_ctxt->i4_pred_qp = i4_pred_qp;
716
0
    return;
717
0
}
718
/*!
719
******************************************************************************
720
* \if Function name : ihevce_compute_cu_level_QP \endif
721
*
722
* \brief
723
*    Computes cu level QP with Traqo,Spatial Mod and In-frame RC
724
*
725
* \param[in]
726
*
727
* \return
728
*
729
*
730
* \author
731
*  Ittiam
732
*
733
*****************************************************************************
734
*/
735
void ihevce_compute_cu_level_QP(
736
    ihevce_enc_loop_ctxt_t *ps_ctxt,
737
    WORD32 i4_activity_for_qp,
738
    WORD32 i4_activity_for_lamda,
739
    WORD32 i4_reduce_qp)
740
0
{
741
    /*modify quant related param in ctxt based on current cu qp*/
742
0
    WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
743
0
    WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
744
745
0
    WORD32 i4_max_qp_allowed;
746
0
    WORD32 i4_min_qp_allowed;
747
0
    WORD32 i4_pred_qp;
748
749
0
    i4_pred_qp = ps_ctxt->i4_pred_qp;
750
751
0
    if(ps_ctxt->i4_sub_pic_level_rc)
752
0
    {
753
0
        i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
754
0
        i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
755
0
    }
756
0
    else
757
0
    {
758
0
        i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
759
0
        i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
760
0
    }
761
0
    if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
762
0
        return;
763
764
#if LAMDA_BASED_ON_QUANT
765
    i4_activity_for_lamda = i4_activity_for_qp;
766
#endif
767
768
0
    if(i4_activity_for_qp != -1)
769
0
    {
770
0
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
771
0
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
772
0
        if(ps_ctxt->i4_qp_mod)
773
0
        {
774
            /*Recompute the Qp as per enc thread's frame level Qp*/
775
0
            ASSERT(i4_activity_for_qp > 0);
776
0
            cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
777
0
                    QP_LEVEL_MOD_ACT_FACTOR;
778
0
        }
779
780
        // To avoid access of uninitialised Qscale to qp conversion table
781
0
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
782
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
783
0
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
784
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
785
786
0
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
787
788
0
        if((1 == i4_reduce_qp) && (cu_qp > 1))
789
0
            cu_qp--;
790
791
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
792
0
        if(cu_qp > i4_max_qp_allowed)
793
0
            cu_qp = i4_max_qp_allowed;
794
0
        else if(cu_qp < i4_min_qp_allowed)
795
0
            cu_qp = i4_min_qp_allowed;
796
797
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
798
0
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
799
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
800
0
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
801
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
802
803
        /*cu qp must be populated in cu_analyse_t struct*/
804
0
        ps_ctxt->i4_cu_qp = cu_qp;
805
        /*recompute quant related param at every cu level*/
806
0
        ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
807
0
    }
808
809
    /*Decoupling qp and lamda calculation */
810
0
    if(i4_activity_for_lamda != -1)
811
0
    {
812
0
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
813
0
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
814
815
0
        if(ps_ctxt->i4_qp_mod)
816
0
        {
817
0
#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
818
            /*Recompute the Qp as per enc thread's frame level Qp*/
819
0
            ASSERT(i4_activity_for_lamda > 0);
820
0
            cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
821
0
                    QP_LEVEL_MOD_ACT_FACTOR;
822
0
#endif
823
0
        }
824
0
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
825
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
826
0
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
827
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
828
829
0
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
830
831
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
832
0
        if(cu_qp > i4_max_qp_allowed)
833
0
            cu_qp = i4_max_qp_allowed;
834
0
        else if(cu_qp < i4_min_qp_allowed)
835
0
            cu_qp = i4_min_qp_allowed;
836
837
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
838
0
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
839
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
840
0
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
841
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
842
        /* get frame level lambda params */
843
0
        ihevce_get_cl_cu_lambda_prms(
844
0
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
845
0
    }
846
0
}
847
848
void ihevce_update_cu_level_qp_lamda(
849
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra)
850
0
{
851
0
    WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
852
853
0
    if(ps_cu_analyse->u1_cu_size == 64)
854
0
    {
855
0
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
856
0
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
857
0
        i4_act_counter_lamda = 3;
858
0
    }
859
0
    else if(ps_cu_analyse->u1_cu_size == 32)
860
0
    {
861
0
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
862
0
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
863
0
        i4_act_counter_lamda = 0;
864
0
    }
865
0
    else if(ps_cu_analyse->u1_cu_size == 16)
866
0
    {
867
0
        ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
868
0
        i4_act_counter = (trans_size == 8) || (trans_size == 4);
869
0
        i4_act_counter_lamda = 0;
870
0
    }
871
0
    else if(ps_cu_analyse->u1_cu_size == 8)
872
0
    {
873
0
        ASSERT((trans_size == 8) || (trans_size == 4));
874
0
        i4_act_counter = 1;
875
0
        i4_act_counter_lamda = 0;
876
0
    }
877
0
    else
878
0
    {
879
0
        ASSERT(0);
880
0
    }
881
882
0
    if(ps_ctxt->i4_use_ctb_level_lamda)
883
0
    {
884
0
        ihevce_compute_cu_level_QP(
885
0
            ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0);
886
0
    }
887
0
    else
888
0
    {
889
0
        ihevce_compute_cu_level_QP(
890
0
            ps_ctxt,
891
0
            ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra],
892
0
            ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra],
893
0
            0);
894
0
    }
895
896
0
    ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
897
0
}
898
899
/**
900
*******************************************************************************
901
* \if Function name : ihevce_scan_coeffs \endif
902
*
903
* @brief * Computes the coeff buffer for a coded TU for entropy coding
904
*
905
* @par   Description
906
* Computes the coeff buffer for a coded TU for entropy coding
907
*
908
* \param[in] pi2_quan_coeffs Quantized coefficient context
909
*
910
* \param[in] scan_idx Scan index specifying the scan order
911
*
912
* \param[in] trans_size Transform unit size
913
*
914
* \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
915
*
916
* \param[in] pu1_csbf_buf csb flag buffer
917
*
918
* @returns num_bytes
919
* Number of bytes written to pu1_out_data
920
*
921
* @remarks
922
*
923
* \author
924
*  Ittiam
925
*
926
*******************************************************************************
927
*/
928
929
WORD32 ihevce_scan_coeffs(
930
    WORD16 *pi2_quant_coeffs,
931
    WORD32 *pi4_subBlock2csbfId_map,
932
    WORD32 scan_idx,
933
    WORD32 trans_size,
934
    UWORD8 *pu1_out_data,
935
    UWORD8 *pu1_csbf_buf,
936
    WORD32 i4_csbf_stride)
937
0
{
938
0
    WORD32 i, trans_unit_idx, num_gt1_flag;
939
0
    UWORD16 u2_csbf0flags;
940
0
    WORD32 num_bytes = 0;
941
0
    UWORD8 *pu1_trans_table;
942
0
    UWORD8 *pu1_csb_table;
943
0
    WORD32 shift_value, mask_value;
944
0
    UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
945
0
    UWORD16 u2_sign_flags;
946
0
    UWORD16 u2_abs_coeff_remaining[16];
947
0
    WORD32 blk_row, blk_col;
948
949
0
    UWORD8 *pu1_out_data_header;
950
0
    UWORD16 *pu2_out_data_coeff;
951
952
0
    WORD32 x_pos, y_pos;
953
0
    WORD32 quant_coeff;
954
955
0
    WORD32 num_gt0_flag;
956
0
    (void)i4_csbf_stride;
957
0
    pu1_out_data_header = pu1_out_data;
958
    /* Need only last 3 bits, rest are reserved for debugging and making */
959
    /* WORD alignment */
960
0
    u2_csbf0flags = 0xBAD0;
961
962
    /* Select proper order for your transform unit and csb based on scan_idx*/
963
    /* and the trans_size */
964
965
    /* scan order inside a csb */
966
0
    pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
967
    /* GETRANGE will give the log_2 of trans_size to shift_value */
968
0
    GETRANGE(shift_value, trans_size);
969
0
    shift_value = shift_value - 3; /* for finding. row no. from scan index */
970
0
    mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
971
0
    switch(trans_size)
972
0
    {
973
0
    case 32:
974
0
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
975
0
        break;
976
0
    case 16:
977
0
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
978
0
        break;
979
0
    case 8:
980
0
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
981
0
        break;
982
0
    case 4:
983
0
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
984
0
        break;
985
0
    default:
986
0
        DBG_PRINTF("Invalid Trans Size\n");
987
0
        return -1;
988
0
        break;
989
0
    }
990
991
    /*go through each csb in the scan order for first non-zero coded sub-block*/
992
0
    for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
993
0
    {
994
        /* check for the first csb flag in our scan order */
995
0
        if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
996
0
        {
997
0
            UWORD8 u1_last_x, u1_last_y;
998
            /* row of csb */
999
0
            blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
1000
            /* col of csb */
1001
0
            blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
1002
1003
            /*check for the 1st non-0 values inside the csb in our scan order*/
1004
0
            for(i = 15; i >= 0; i--)
1005
0
            {
1006
0
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1007
0
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1008
1009
0
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1010
1011
0
                if(quant_coeff != 0)
1012
0
                    break;
1013
0
            }
1014
1015
0
            ASSERT(i >= 0);
1016
1017
0
            u1_last_x = x_pos;
1018
0
            u1_last_y = y_pos;
1019
1020
            /* storing last_x and last_y */
1021
0
            *pu1_out_data_header = u1_last_x;
1022
0
            pu1_out_data_header++;
1023
0
            num_bytes++;
1024
0
            *pu1_out_data_header = u1_last_y;
1025
0
            pu1_out_data_header++;
1026
0
            num_bytes++;
1027
1028
            /* storing the scan order */
1029
0
            *pu1_out_data_header = scan_idx;
1030
0
            pu1_out_data_header++;
1031
0
            num_bytes++;
1032
            /* storing last_sub_block pos. in scan order count */
1033
0
            *pu1_out_data_header = trans_unit_idx;
1034
0
            pu1_out_data_header++;
1035
0
            num_bytes++;
1036
1037
            /*stored the first 4 bytes, now all are word16. So word16 pointer*/
1038
0
            pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
1039
1040
            /* u2_csbf0flags word */
1041
0
            u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
1042
            /* storing u2_csbf0flags word */
1043
0
            *pu2_out_data_coeff = u2_csbf0flags;
1044
0
            pu2_out_data_coeff++;
1045
0
            num_bytes += 2;
1046
1047
0
            num_gt0_flag = 1;
1048
0
            num_gt1_flag = 0;
1049
0
            u2_sign_flags = 0;
1050
1051
            /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1052
0
            u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
1053
0
            if(abs(quant_coeff) > 1)
1054
0
            {
1055
                /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1056
0
                u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
1057
                /* update u2_abs_coeff_remaining */
1058
0
                u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1059
1060
0
                num_gt1_flag++;
1061
0
            }
1062
1063
0
            if(quant_coeff < 0)
1064
0
            {
1065
                /* set the i th bit of u2_sign_flags */
1066
0
                u2_sign_flags = u2_sign_flags | (1 << i);
1067
0
            }
1068
1069
            /* Test remaining elements in our scan order */
1070
            /* Can optimize further by CLZ macro */
1071
0
            for(i = i - 1; i >= 0; i--)
1072
0
            {
1073
0
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1074
0
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1075
1076
0
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1077
1078
0
                if(quant_coeff != 0)
1079
0
                {
1080
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1081
0
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1082
1083
0
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1084
0
                    {
1085
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1086
0
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1087
1088
                        /* update u2_abs_coeff_remaining */
1089
0
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1090
1091
0
                        num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
1092
0
                    }
1093
1094
0
                    if(quant_coeff < 0)
1095
0
                    {
1096
                        /* set the i th bit of u2_sign_flags */
1097
0
                        u2_sign_flags |= (1 << i);
1098
0
                    }
1099
1100
0
                    num_gt0_flag++;
1101
0
                }
1102
0
            }
1103
1104
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1105
0
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1106
0
            pu2_out_data_coeff++;
1107
0
            num_bytes += 2;
1108
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1109
0
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1110
0
            pu2_out_data_coeff++;
1111
0
            num_bytes += 2;
1112
            /* storing u2_sign_flags 2 bytes */
1113
0
            *pu2_out_data_coeff = u2_sign_flags;
1114
0
            pu2_out_data_coeff++;
1115
0
            num_bytes += 2;
1116
1117
            /* Store the u2_abs_coeff_remaining[] */
1118
0
            for(i = 0; i < num_gt1_flag; i++)
1119
0
            {
1120
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1121
0
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1122
0
                pu2_out_data_coeff++;
1123
0
                num_bytes += 2;
1124
0
            }
1125
1126
0
            break; /*We just need this loop for finding 1st non-zero csb only*/
1127
0
        }
1128
0
    }
1129
1130
    /* go through remaining csb in the scan order */
1131
0
    for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
1132
0
    {
1133
0
        blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
1134
0
        blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
1135
1136
        /* u2_csbf0flags word */
1137
0
        u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
1138
0
                        (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
1139
1140
        /********************************************************************/
1141
        /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
1142
        /* block0, instead sig coeff map is directly signalled. This is     */
1143
        /* taken care by forcing csbf for block0 to be 1 even if it is 0    */
1144
        /********************************************************************/
1145
0
        if(0 == trans_unit_idx)
1146
0
        {
1147
0
            u2_csbf0flags |= 1;
1148
0
        }
1149
1150
0
        if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
1151
0
        {
1152
0
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
1153
0
            {
1154
                /* set the 2nd bit of u2_csbf0flags for right csbf */
1155
0
                u2_csbf0flags = u2_csbf0flags | (1 << 1);
1156
0
            }
1157
0
        }
1158
0
        if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
1159
0
        {
1160
0
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
1161
0
            {
1162
                /* set the 3rd bit of u2_csbf0flags  for bottom csbf */
1163
0
                u2_csbf0flags = u2_csbf0flags | (1 << 2);
1164
0
            }
1165
0
        }
1166
1167
        /* storing u2_csbf0flags word */
1168
0
        *pu2_out_data_coeff = u2_csbf0flags;
1169
0
        pu2_out_data_coeff++;
1170
0
        num_bytes += 2;
1171
1172
        /* check for the csb flag in our scan order */
1173
0
        if(u2_csbf0flags & 0x1)
1174
0
        {
1175
0
            u2_sig_coeff_abs_gt0_flags = 0;
1176
0
            u2_sig_coeff_abs_gt1_flags = 0;
1177
0
            u2_sign_flags = 0;
1178
1179
0
            num_gt0_flag = 0;
1180
0
            num_gt1_flag = 0;
1181
            /* check for the non-0 values inside the csb in our scan order */
1182
            /* Can optimize further by CLZ macro */
1183
0
            for(i = 15; i >= 0; i--)
1184
0
            {
1185
0
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1186
0
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1187
1188
0
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1189
1190
0
                if(quant_coeff != 0)
1191
0
                {
1192
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1193
0
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1194
1195
0
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1196
0
                    {
1197
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1198
0
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1199
1200
                        /* update u2_abs_coeff_remaining */
1201
0
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1202
1203
0
                        num_gt1_flag++;
1204
0
                    }
1205
1206
0
                    if(quant_coeff < 0)
1207
0
                    {
1208
                        /* set the i th bit of u2_sign_flags */
1209
0
                        u2_sign_flags = u2_sign_flags | (1 << i);
1210
0
                    }
1211
1212
0
                    num_gt0_flag++;
1213
0
                }
1214
0
            }
1215
1216
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1217
0
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1218
0
            pu2_out_data_coeff++;
1219
0
            num_bytes += 2;
1220
1221
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1222
0
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1223
0
            pu2_out_data_coeff++;
1224
0
            num_bytes += 2;
1225
1226
            /* storing u2_sign_flags 2 bytes */
1227
0
            *pu2_out_data_coeff = u2_sign_flags;
1228
0
            pu2_out_data_coeff++;
1229
0
            num_bytes += 2;
1230
1231
            /* Store the u2_abs_coeff_remaining[] */
1232
0
            for(i = 0; i < num_gt1_flag; i++)
1233
0
            {
1234
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1235
0
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1236
0
                pu2_out_data_coeff++;
1237
0
                num_bytes += 2;
1238
0
            }
1239
0
        }
1240
0
    }
1241
1242
0
    return num_bytes; /* Return the number of bytes written to out_data */
1243
0
}
1244
1245
/**
1246
*******************************************************************************
1247
* \if Function name : ihevce_populate_intra_pred_mode \endif
1248
*
1249
* \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
1250
* b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
1251
*
1252
* \par   Description
1253
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1254
* for a CU
1255
*
1256
* \param[in] top_intra_mode Top intra mode
1257
* \param[in] left_intra_mode Left intra mode
1258
* \param[in] available_top Top availability flag
1259
* \param[in] available_left Left availability flag
1260
* \param[in] cu_pos_y CU 'y' position
1261
* \param[in] ps_cand_mode_list pointer to populate candidate list
1262
*
1263
* \returns none
1264
*
1265
* \author
1266
*  Ittiam
1267
*
1268
*******************************************************************************
1269
*/
1270
1271
void ihevce_populate_intra_pred_mode(
1272
    WORD32 top_intra_mode,
1273
    WORD32 left_intra_mode,
1274
    WORD32 available_top,
1275
    WORD32 available_left,
1276
    WORD32 cu_pos_y,
1277
    WORD32 *ps_cand_mode_list)
1278
0
{
1279
    /* local variables */
1280
0
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1281
1282
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1283
    /* N = top */
1284
0
    if(0 == available_top)
1285
0
    {
1286
0
        cand_intra_pred_mode_top = INTRA_DC;
1287
0
    }
1288
    /* for neighbour != INTRA, setting DC is done outside */
1289
0
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1290
0
    {
1291
0
        cand_intra_pred_mode_top = INTRA_DC;
1292
0
    }
1293
0
    else
1294
0
    {
1295
0
        cand_intra_pred_mode_top = top_intra_mode;
1296
0
    }
1297
1298
    /* N = left */
1299
0
    if(0 == available_left)
1300
0
    {
1301
0
        cand_intra_pred_mode_left = INTRA_DC;
1302
0
    }
1303
    /* for neighbour != INTRA, setting DC is done outside */
1304
0
    else
1305
0
    {
1306
0
        cand_intra_pred_mode_left = left_intra_mode;
1307
0
    }
1308
1309
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1310
0
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1311
0
    {
1312
0
        if(cand_intra_pred_mode_left < 2)
1313
0
        {
1314
0
            ps_cand_mode_list[0] = INTRA_PLANAR;
1315
0
            ps_cand_mode_list[1] = INTRA_DC;
1316
0
            ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1317
0
        }
1318
0
        else
1319
0
        {
1320
0
            ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1321
0
            ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1322
0
            ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1323
0
        }
1324
0
    }
1325
0
    else
1326
0
    {
1327
0
        ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1328
0
        ps_cand_mode_list[1] = cand_intra_pred_mode_top;
1329
1330
0
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1331
0
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1332
0
        {
1333
0
            ps_cand_mode_list[2] = INTRA_PLANAR;
1334
0
        }
1335
0
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1336
0
        {
1337
0
            ps_cand_mode_list[2] = INTRA_DC;
1338
0
        }
1339
0
        else
1340
0
        {
1341
0
            ps_cand_mode_list[2] = INTRA_ANGULAR(26);
1342
0
        }
1343
0
    }
1344
0
}
1345
/**
1346
*******************************************************************************
1347
* \if Function name : ihevce_intra_pred_mode_signaling \endif
1348
*
1349
* \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
1350
* b5_rem_intra_pred_mode for a CU
1351
*
1352
* \par   Description
1353
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1354
* for a CU
1355
*
1356
* \param[in] ps_nbr_top Top neighbour context
1357
* \param[in] ps_nbr_left Left neighbour context
1358
* \param[in] available_top Top availability flag
1359
* \param[in] available_left Left availability flag
1360
* \param[in] cu_pos_y CU 'y' position
1361
* \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
1362
* \param[inout] ps_intra_pred_mode_current
1363
* Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
1364
* b5_rem_intra_pred_mode
1365
*
1366
* \returns none
1367
*
1368
* \author
1369
*  Ittiam
1370
*
1371
*******************************************************************************
1372
*/
1373
1374
void ihevce_intra_pred_mode_signaling(
1375
    WORD32 top_intra_mode,
1376
    WORD32 left_intra_mode,
1377
    WORD32 available_top,
1378
    WORD32 available_left,
1379
    WORD32 cu_pos_y,
1380
    WORD32 luma_intra_pred_mode_current,
1381
    intra_prev_rem_flags_t *ps_intra_pred_mode_current)
1382
0
{
1383
    /* local variables */
1384
0
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1385
0
    WORD32 cand_mode_list[3];
1386
1387
0
    ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1388
0
    ps_intra_pred_mode_current->b2_mpm_idx = 0;  // for safety purpose
1389
0
    ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
1390
1391
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1392
    /* N = top */
1393
0
    if(0 == available_top)
1394
0
    {
1395
0
        cand_intra_pred_mode_top = INTRA_DC;
1396
0
    }
1397
    /* for neighbour != INTRA, setting DC is done outside */
1398
0
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1399
0
    {
1400
0
        cand_intra_pred_mode_top = INTRA_DC;
1401
0
    }
1402
0
    else
1403
0
    {
1404
0
        cand_intra_pred_mode_top = top_intra_mode;
1405
0
    }
1406
1407
    /* N = left */
1408
0
    if(0 == available_left)
1409
0
    {
1410
0
        cand_intra_pred_mode_left = INTRA_DC;
1411
0
    }
1412
    /* for neighbour != INTRA, setting DC is done outside */
1413
0
    else
1414
0
    {
1415
0
        cand_intra_pred_mode_left = left_intra_mode;
1416
0
    }
1417
1418
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1419
0
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1420
0
    {
1421
0
        if(cand_intra_pred_mode_left < 2)
1422
0
        {
1423
0
            cand_mode_list[0] = INTRA_PLANAR;
1424
0
            cand_mode_list[1] = INTRA_DC;
1425
0
            cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1426
0
        }
1427
0
        else
1428
0
        {
1429
0
            cand_mode_list[0] = cand_intra_pred_mode_left;
1430
0
            cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1431
0
            cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1432
0
        }
1433
0
    }
1434
0
    else
1435
0
    {
1436
0
        cand_mode_list[0] = cand_intra_pred_mode_left;
1437
0
        cand_mode_list[1] = cand_intra_pred_mode_top;
1438
1439
0
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1440
0
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1441
0
        {
1442
0
            cand_mode_list[2] = INTRA_PLANAR;
1443
0
        }
1444
0
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1445
0
        {
1446
0
            cand_mode_list[2] = INTRA_DC;
1447
0
        }
1448
0
        else
1449
0
        {
1450
0
            cand_mode_list[2] = INTRA_ANGULAR(26);
1451
0
        }
1452
0
    }
1453
1454
    /* Signal Generation */
1455
1456
    /* Flag & mpm_index generation */
1457
0
    if(cand_mode_list[0] == luma_intra_pred_mode_current)
1458
0
    {
1459
0
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1460
0
        ps_intra_pred_mode_current->b2_mpm_idx = 0;
1461
0
    }
1462
0
    else if(cand_mode_list[1] == luma_intra_pred_mode_current)
1463
0
    {
1464
0
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1465
0
        ps_intra_pred_mode_current->b2_mpm_idx = 1;
1466
0
    }
1467
0
    else if(cand_mode_list[2] == luma_intra_pred_mode_current)
1468
0
    {
1469
0
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1470
0
        ps_intra_pred_mode_current->b2_mpm_idx = 2;
1471
0
    }
1472
    /* Flag & b5_rem_intra_pred_mode generation */
1473
0
    else
1474
0
    {
1475
0
        WORD32 rem_mode;
1476
1477
0
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1478
1479
        /* sorting cand_mode_list */
1480
0
        if(cand_mode_list[0] > cand_mode_list[1])
1481
0
        {
1482
0
            SWAP(cand_mode_list[0], cand_mode_list[1]);
1483
0
        }
1484
0
        if(cand_mode_list[0] > cand_mode_list[2])
1485
0
        {
1486
0
            SWAP(cand_mode_list[0], cand_mode_list[2]);
1487
0
        }
1488
0
        if(cand_mode_list[1] > cand_mode_list[2])
1489
0
        {
1490
0
            SWAP(cand_mode_list[1], cand_mode_list[2]);
1491
0
        }
1492
1493
0
        rem_mode = luma_intra_pred_mode_current;
1494
1495
0
        if((rem_mode) >= cand_mode_list[2])
1496
0
        {
1497
0
            (rem_mode)--;
1498
0
        }
1499
0
        if((rem_mode) >= cand_mode_list[1])
1500
0
        {
1501
0
            (rem_mode)--;
1502
0
        }
1503
0
        if((rem_mode) >= cand_mode_list[0])
1504
0
        {
1505
0
            (rem_mode)--;
1506
0
        }
1507
0
        ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
1508
0
    }
1509
0
}
1510
1511
void ihevce_quant_rounding_factor_gen(
1512
    WORD32 i4_trans_size,
1513
    WORD32 is_luma,
1514
    rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
1515
    WORD32 *pi4_quant_round_0_1,
1516
    WORD32 *pi4_quant_round_1_2,
1517
    double i4_lamda_modifier,
1518
    UWORD8 i4_is_tu_level_quant_rounding)
1519
0
{
1520
    //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
1521
0
    UWORD8 *pu1_ctxt_model;
1522
0
    WORD32 scan_pos;
1523
0
    WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1524
0
    WORD32 abs_gt1_base_ctxt;
1525
0
    WORD32 log2_tr_size, i;
1526
0
    UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
1527
0
    UWORD16 u4_bits_estimated_r1_temp;
1528
0
    WORD32 j = 0;
1529
0
    WORD32 k = 0;
1530
0
    WORD32 temp2;
1531
1532
0
    double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
1533
0
    LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
1534
    /* transform size to log2transform size */
1535
0
    GETRANGE(log2_tr_size, i4_trans_size);
1536
0
    log2_tr_size -= 1;
1537
1538
0
    if(1 == i4_is_tu_level_quant_rounding)
1539
0
    {
1540
0
        entropy_context_t *ps_cur_tu_entropy;
1541
0
        cab_ctxt_t *ps_cabac;
1542
0
        WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1543
0
        ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
1544
1545
0
        ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
1546
1547
0
        pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1548
0
    }
1549
0
    else
1550
0
    {
1551
0
        pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
1552
0
    }
1553
    /*If transform size is 4x4, then only one sub-block*/
1554
0
    if(is_luma)
1555
0
    {
1556
0
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1557
0
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1558
1559
0
        if(3 == log2_tr_size)
1560
0
        {
1561
            /* 8x8 transform size */
1562
            /* Assuming diagnol scan idx for now */
1563
0
            sig_coeff_base_ctxt += 9;
1564
0
        }
1565
0
        else if(3 < log2_tr_size)
1566
0
        {
1567
            /* larger transform sizes */
1568
0
            sig_coeff_base_ctxt += 21;
1569
0
        }
1570
0
    }
1571
0
    else
1572
0
    {
1573
        /* chroma context initializations */
1574
0
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1575
0
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1576
1577
0
        if(3 == log2_tr_size)
1578
0
        {
1579
            /* 8x8 transform size */
1580
0
            sig_coeff_base_ctxt += 9;
1581
0
        }
1582
0
        else if(3 < log2_tr_size)
1583
0
        {
1584
            /* larger transform sizes */
1585
0
            sig_coeff_base_ctxt += 12;
1586
0
        }
1587
0
    }
1588
1589
    /*Transform size of 4x4 will have only a single CSB */
1590
    /* derive the context inc as per section 9.3.3.1.4 */
1591
1592
0
    if(2 == log2_tr_size)
1593
0
    {
1594
0
        UWORD8 sig_ctxinc;
1595
0
        WORD32 state_mps;
1596
0
        WORD32 gt1_ctxt = 0;
1597
0
        WORD32 ctxt_set = 0;
1598
0
        WORD32 ctxt_idx = 0;
1599
1600
        /* context set based on luma subblock pos */
1601
1602
        /* Encodet the abs level gt1 bins */
1603
        /* Currently calculating trade off between mps(2) and mps(1)*/
1604
        /* The estimation has to be further done for mps(11) and mps(111)*/
1605
        /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
1606
        /* gt1_ctxt = 0 for the co-ef value to be 2 */
1607
1608
0
        ctxt_set = gt1_ctxt = 0;
1609
0
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1610
1611
0
        state_mps = pu1_ctxt_model[ctxt_idx];
1612
1613
0
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1614
1615
0
        u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1616
1617
0
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
1618
0
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1619
0
        {
1620
0
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1621
0
        }
1622
1623
0
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1624
0
        {
1625
            //UWORD8 nbr_csbf = 1;
1626
            /* derive the x,y pos */
1627
0
            UWORD8 y_pos_x_pos = scan_pos;  //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1628
1629
            /* 4x4 transform size increment uses lookup */
1630
0
            sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1631
1632
            /*Get the mps state based on ctxt modes */
1633
0
            state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
1634
1635
            /* Bits taken to encode sig co-ef flag as 0 */
1636
0
            u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1637
1638
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1639
            //
1640
0
            u4_bits_estimated_r1 =
1641
0
                (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1642
1643
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1644
0
            u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1645
1646
0
            QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1647
0
            *(pi4_quant_round_0_1 + scan_pos) = temp2;
1648
0
        }
1649
0
    }
1650
0
    else
1651
0
    {
1652
0
        UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
1653
0
        WORD32 is_nbr_csb_state_mps;
1654
1655
0
        WORD32 state_mps;
1656
0
        WORD32 gt1_ctxt = 0;
1657
0
        WORD32 ctxt_set = 0;
1658
0
        WORD32 ctxt_idx;
1659
        /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
1660
        /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
1661
1662
        /*ctxt_set = 0 DC subblock, the previous state did not have 2
1663
        ctxt_set = 1 DC subblock, the previous state did have >= 2
1664
        ctxt_set = 2 AC subblock, the previous state did not have 2
1665
        ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1666
0
        i = 1;
1667
0
        ctxt_set = (i && is_luma) ? 2 : 0;
1668
1669
0
        ctxt_set++;
1670
1671
        /*0th position indicates the probability of 2 */
1672
        /*1th position indicates the probability of 1 */
1673
        /*2th position indicates the probability of 11 */
1674
        /*3th position indicates the probability of 111 */
1675
1676
0
        gt1_ctxt = 0;
1677
0
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1678
1679
0
        state_mps = pu1_ctxt_model[ctxt_idx];
1680
1681
0
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1682
1683
0
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1684
0
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1685
1686
0
        for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
1687
0
        {
1688
0
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1689
0
        }
1690
1691
0
        i = 0;
1692
0
        ctxt_set = (i && is_luma) ? 2 : 0;
1693
0
        ctxt_set++;
1694
1695
        /*0th position indicates the probability of 2 */
1696
        /*1th position indicates the probability of 1 */
1697
        /*2th position indicates the probability of 11 */
1698
        /*3th position indicates the probability of 111 */
1699
1700
0
        gt1_ctxt = 0;
1701
0
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1702
1703
0
        state_mps = pu1_ctxt_model[ctxt_idx];
1704
1705
0
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1706
1707
0
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1708
0
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1709
1710
0
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1711
0
        {
1712
0
            *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1713
0
        }
1714
1715
0
        {
1716
0
            WORD32 ctxt_idx;
1717
1718
0
            WORD32 nbr_csbf_0, nbr_csbf_1;
1719
0
            WORD32 state_mps_0, state_mps_1;
1720
0
            ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1721
0
            ctxt_idx += is_luma ? 0 : 2;
1722
1723
            /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1724
            /* if neibhor not available, ctxt idx = 0*/
1725
0
            nbr_csbf_0 = 0;
1726
0
            ctxt_idx += nbr_csbf_0 ? 1 : 0;
1727
0
            state_mps_0 = pu1_ctxt_model[ctxt_idx];
1728
1729
0
            nbr_csbf_1 = 1;
1730
0
            ctxt_idx += nbr_csbf_1 ? 1 : 0;
1731
0
            state_mps_1 = pu1_ctxt_model[ctxt_idx];
1732
1733
0
            is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
1734
0
        }
1735
1736
0
        if(1 == is_nbr_csb_state_mps)
1737
0
        {
1738
0
            for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
1739
0
            {
1740
0
                UWORD8 sig_ctxinc;
1741
0
                WORD32 state_mps;
1742
0
                WORD32 gt1_ctxt = 0;
1743
0
                WORD32 ctxt_set = 0;
1744
1745
0
                WORD32 ctxt_idx;
1746
1747
                /*Check if the cabac states had previous nbr available */
1748
1749
0
                if(i == 0)
1750
0
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
1751
0
                else if(i < (i4_trans_size >> 2))
1752
0
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
1753
0
                else if((i % (i4_trans_size >> 2)) == 0)
1754
0
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
1755
0
                else
1756
0
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1757
1758
0
                if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
1759
0
                    k++;
1760
1761
0
                j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
1762
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1763
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1764
                ctxt_set = 2 AC subblock, the previous state did not have 2
1765
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1766
1767
0
                ctxt_set = (i && is_luma) ? 2 : 0;
1768
1769
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1770
0
                gt1_ctxt = 0;
1771
0
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1772
1773
0
                state_mps = pu1_ctxt_model[ctxt_idx];
1774
1775
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1776
0
                u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1777
1778
0
                for(scan_pos = 0; scan_pos < 16; scan_pos++)
1779
0
                {
1780
0
                    UWORD8 y_pos_x_pos;
1781
1782
0
                    if(scan_pos || i)
1783
0
                    {
1784
0
                        y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1785
                        /* ctxt for AC coeff depends on curpos and neigbour csbf */
1786
0
                        sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1787
1788
                        /* based on luma subblock pos */
1789
0
                        sig_ctxinc += (i && is_luma) ? 3 : 0;
1790
1791
0
                        sig_ctxinc += sig_coeff_base_ctxt;
1792
0
                    }
1793
0
                    else
1794
0
                    {
1795
                        /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1796
                        /* DC coeff has fixed context for luma and chroma */
1797
0
                        sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1798
0
                    }
1799
1800
                    /*Get the mps state based on ctxt modes */
1801
0
                    state_mps = pu1_ctxt_model[sig_ctxinc];
1802
1803
                    /* Bits taken to encode sig co-ef flag as 0 */
1804
0
                    u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1805
1806
0
                    u4_bits_estimated_r1 =
1807
0
                        (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1808
1809
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1810
0
                    u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1811
0
                    {
1812
0
                        QUANT_ROUND_FACTOR(
1813
0
                            temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1814
0
                        *(pi4_quant_round_0_1 +
1815
0
                          ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
1816
0
                    }
1817
0
                }
1818
0
            }
1819
0
        }
1820
0
        else
1821
0
        {
1822
            /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
1823
            Hence will write the same value to all sub block, and overwrite for the 1st one */
1824
0
            i = 1;
1825
0
            {
1826
0
                UWORD8 sig_ctxinc;
1827
0
                UWORD8 y_pos_x_pos;
1828
0
                WORD32 quant_rounding_0_1;
1829
1830
0
                pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
1831
1832
0
                scan_pos = 0;
1833
0
                y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1834
                /* ctxt for AC coeff depends on curpos and neigbour csbf */
1835
0
                sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1836
1837
                /* based on luma subblock pos */
1838
0
                sig_ctxinc += (is_luma) ? 3 : 0;
1839
1840
0
                sig_ctxinc += sig_coeff_base_ctxt;
1841
1842
                /*Get the mps state based on ctxt modes */
1843
0
                state_mps = pu1_ctxt_model[sig_ctxinc];
1844
1845
                /* Bits taken to encode sig co-ef flag as 0 */
1846
0
                u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1847
1848
0
                u4_bits_estimated_r1 =
1849
0
                    (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1850
1851
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1852
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1853
                ctxt_set = 2 AC subblock, the previous state did not have 2
1854
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1855
1856
0
                ctxt_set = (i && is_luma) ? 2 : 0;
1857
1858
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1859
0
                gt1_ctxt = 0;
1860
0
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1861
1862
0
                state_mps = pu1_ctxt_model[ctxt_idx];
1863
1864
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1865
0
                u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1866
1867
0
                QUANT_ROUND_FACTOR(
1868
0
                    quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1869
1870
0
                for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
1871
0
                    scan_pos++)
1872
0
                {
1873
0
                    *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
1874
0
                }
1875
0
            }
1876
1877
            /*First Subblock*/
1878
0
            i = 0;
1879
1880
0
            {
1881
0
                UWORD8 sig_ctxinc;
1882
0
                WORD32 state_mps;
1883
0
                WORD32 gt1_ctxt = 0;
1884
0
                WORD32 ctxt_set = 0;
1885
1886
0
                WORD32 ctxt_idx;
1887
1888
                /*Check if the cabac states had previous nbr available */
1889
1890
0
                {
1891
0
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1892
1893
                    /*ctxt_set = 0 DC subblock, the previous state did not have 2
1894
                    ctxt_set = 1 DC subblock, the previous state did have >= 2
1895
                    ctxt_set = 2 AC subblock, the previous state did not have 2
1896
                    ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1897
0
                    ctxt_set = (i && is_luma) ? 2 : 0;
1898
1899
                    /* gt1_ctxt = 1 for the co-ef value to be 1 */
1900
0
                    gt1_ctxt = 0;
1901
0
                    ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1902
1903
0
                    state_mps = pu1_ctxt_model[ctxt_idx];
1904
1905
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1906
0
                    u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1907
1908
0
                    for(scan_pos = 0; scan_pos < 16; scan_pos++)
1909
0
                    {
1910
0
                        UWORD8 y_pos_x_pos;
1911
1912
0
                        if(scan_pos)
1913
0
                        {
1914
0
                            y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1915
                            /* ctxt for AC coeff depends on curpos and neigbour csbf */
1916
0
                            sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1917
1918
                            /* based on luma subblock pos */
1919
0
                            sig_ctxinc += (i && is_luma) ? 3 : 0;
1920
1921
0
                            sig_ctxinc += sig_coeff_base_ctxt;
1922
0
                        }
1923
0
                        else
1924
0
                        {
1925
                            /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1926
                            /* DC coeff has fixed context for luma and chroma */
1927
0
                            sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1928
0
                        }
1929
1930
                        /*Get the mps state based on ctxt modes */
1931
0
                        state_mps = pu1_ctxt_model[sig_ctxinc];
1932
1933
                        /* Bits taken to encode sig co-ef flag as 0 */
1934
0
                        u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1935
1936
0
                        u4_bits_estimated_r1 =
1937
0
                            (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1938
1939
                        /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1940
0
                        u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1941
0
                        {
1942
0
                            QUANT_ROUND_FACTOR(
1943
0
                                temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1944
0
                            *(pi4_quant_round_0_1 +
1945
0
                              ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1946
0
                        }
1947
0
                    }
1948
0
                }
1949
0
            }
1950
0
        }
1951
0
    }
1952
0
    return;
1953
0
}
1954
1955
/*!
1956
******************************************************************************
1957
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
1958
*
1959
* \brief
1960
*    Transform unit level (Luma) enc_loop function
1961
*
1962
* \param[in] ps_ctxt    enc_loop module ctxt pointer
1963
* \param[in] pu1_pred   pointer to predicted data buffer
1964
* \param[in] pred_strd  predicted buffer stride
1965
* \param[in] pu1_src    pointer to source data buffer
1966
* \param[in] src_strd   source buffer stride
1967
* \param[in] pi2_deq_data   pointer to store iq data
1968
* \param[in] deq_data_strd  iq data buffer stride
1969
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
1970
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
1971
*                           block
1972
* \param[out] csbf_strd  csbf buffer stride
1973
* \param[in] trans_size transform size (4, 8, 16,32)
1974
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
1975
* \param[out] pi4_cost      pointer to store the cost
1976
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
1977
*                           coeff buffer
1978
* \param[out] pu4_tu_bits   pointer to store the best TU bits required encode
1979
the current TU in RDopt Mode
1980
* \param[out] pu4_blk_sad   pointer to store the block sad for RC
1981
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
1982
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
1983
* \param[in]  i4_perform_rdoq Indicates if RDOQ should be performed or not
1984
* \param[in]  i4_perform_sbh Indicates if SBH should be performed or not
1985
*
1986
* \return
1987
*    CBF of the current block
1988
*
1989
* \author
1990
*  Ittiam
1991
*
1992
*****************************************************************************
1993
*/
1994
1995
WORD32 ihevce_t_q_iq_ssd_scan_fxn(
1996
    ihevce_enc_loop_ctxt_t *ps_ctxt,
1997
    UWORD8 *pu1_pred,
1998
    WORD32 pred_strd,
1999
    UWORD8 *pu1_src,
2000
    WORD32 src_strd,
2001
    WORD16 *pi2_deq_data,
2002
    WORD32 deq_data_strd,
2003
    UWORD8 *pu1_recon,
2004
    WORD32 i4_recon_stride,
2005
    UWORD8 *pu1_ecd_data,
2006
    UWORD8 *pu1_csbf_buf,
2007
    WORD32 csbf_strd,
2008
    WORD32 trans_size,
2009
    WORD32 packed_pred_mode,
2010
    LWORD64 *pi8_cost,
2011
    WORD32 *pi4_coeff_off,
2012
    WORD32 *pi4_tu_bits,
2013
    UWORD32 *pu4_blk_sad,
2014
    WORD32 *pi4_zero_col,
2015
    WORD32 *pi4_zero_row,
2016
    UWORD8 *pu1_is_recon_available,
2017
    WORD32 i4_perform_rdoq,
2018
    WORD32 i4_perform_sbh,
2019
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2020
    WORD32 i4_alpha_stim_multiplier,
2021
    UWORD8 u1_is_cu_noisy,
2022
#endif
2023
    SSD_TYPE_T e_ssd_type,
2024
    WORD32 early_cbf)
2025
0
{
2026
0
    WORD32 cbf = 0;
2027
0
    WORD32 trans_idx;
2028
0
    WORD32 quant_scale_mat_offset;
2029
0
    WORD32 *pi4_trans_scratch;
2030
0
    WORD16 *pi2_trans_values;
2031
0
    WORD16 *pi2_quant_coeffs;
2032
0
    WORD32 *pi4_subBlock2csbfId_map = NULL;
2033
2034
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2035
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
2036
#endif
2037
2038
0
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
2039
2040
0
    WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
2041
0
                             (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
2042
0
    WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
2043
0
    WORD8 intra_flag = 0;
2044
0
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
2045
2046
0
    *pi4_tu_bits = 0;
2047
0
    *pi4_coeff_off = 0;
2048
0
    pu1_is_recon_available[0] = 0;
2049
2050
0
    if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
2051
0
    {
2052
0
        if(e_ssd_type != NULL_TYPE)
2053
0
        {
2054
            /* SSD cost is stored to the pointer */
2055
0
            pi8_cost[0] =
2056
2057
0
                ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
2058
0
                    pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
2059
2060
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2061
0
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2062
0
            {
2063
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2064
0
                    pu1_src,
2065
0
                    src_strd,
2066
0
                    pu1_pred,
2067
0
                    pred_strd,
2068
0
                    pi8_cost[0],
2069
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2070
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2071
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2072
0
                                                 100.0,
2073
0
                    trans_size,
2074
0
                    0,
2075
0
                    ps_ctxt->u1_enable_psyRDOPT,
2076
0
                    NULL_PLANE);
2077
0
            }
2078
0
#endif
2079
2080
            /* copy pred to recon for skip mode */
2081
0
            if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2082
0
            {
2083
0
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2084
0
                    pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2085
0
                pu1_is_recon_available[0] = 1;
2086
0
            }
2087
0
            else
2088
0
            {
2089
0
                pu1_is_recon_available[0] = 0;
2090
0
            }
2091
2092
0
#if ENABLE_INTER_ZCU_COST
2093
0
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
2094
0
#endif
2095
0
        }
2096
0
        else
2097
0
        {
2098
0
            pi8_cost[0] = UINT_MAX;
2099
0
        }
2100
2101
        /* cbf is returned as 0 */
2102
0
        return (0);
2103
0
    }
2104
2105
    /* derive context variables */
2106
0
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
2107
0
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2108
0
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
2109
2110
    /* translate the transform size to index for 4x4 and 8x8 */
2111
0
    trans_idx = trans_size >> 2;
2112
2113
0
    if(PRED_MODE_INTRA == packed_pred_mode)
2114
0
    {
2115
0
        quant_scale_mat_offset = 0;
2116
0
        intra_flag = 1;
2117
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2118
        ai4_quant_rounding_factors[0][0] =
2119
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
2120
2121
        for(i = 0; i < trans_size * trans_size; i++)
2122
        {
2123
            ai4_quant_rounding_factors[1][i] =
2124
                MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
2125
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2126
            ai4_quant_rounding_factors[2][i] =
2127
                MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
2128
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2129
        }
2130
#endif
2131
0
    }
2132
0
    else
2133
0
    {
2134
0
        quant_scale_mat_offset = NUM_TRANS_TYPES;
2135
0
    }
2136
    /* for intra 4x4 DST transform should be used */
2137
0
    if((1 == trans_idx) && (1 == intra_flag))
2138
0
    {
2139
0
        trans_idx = 0;
2140
0
    }
2141
    /* for 16x16 cases */
2142
0
    else if(16 == trans_size)
2143
0
    {
2144
0
        trans_idx = 3;
2145
0
    }
2146
    /* for 32x32 cases */
2147
0
    else if(32 == trans_size)
2148
0
    {
2149
0
        trans_idx = 4;
2150
0
    }
2151
2152
0
    switch(trans_size)
2153
0
    {
2154
0
    case 4:
2155
0
    {
2156
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
2157
2158
0
        break;
2159
0
    }
2160
0
    case 8:
2161
0
    {
2162
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
2163
2164
0
        break;
2165
0
    }
2166
0
    case 16:
2167
0
    {
2168
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
2169
2170
0
        break;
2171
0
    }
2172
0
    case 32:
2173
0
    {
2174
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
2175
2176
0
        break;
2177
0
    }
2178
0
    }
2179
2180
    /* Do not call the FT and Quant functions if early_cbf is 0 */
2181
0
    if(1 == early_cbf)
2182
0
    {
2183
        /* ---------- call residue and transform block ------- */
2184
0
        *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
2185
0
            pu1_src,
2186
0
            pu1_pred,
2187
0
            pi4_trans_scratch,
2188
0
            pi2_trans_values,
2189
0
            src_strd,
2190
0
            pred_strd,
2191
0
            trans_size,
2192
0
            NULL_PLANE);
2193
2194
0
        cbf = ps_ctxt->apf_quant_iquant_ssd
2195
0
                  [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
2196
0
                      pi2_trans_values,
2197
0
                      ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
2198
0
                      pi2_quant_coeffs,
2199
0
                      pi2_deq_data,
2200
0
                      trans_size,
2201
0
                      ps_ctxt->i4_cu_qp_div6,
2202
0
                      ps_ctxt->i4_cu_qp_mod6,
2203
0
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2204
0
                      ps_ctxt->i4_quant_rnd_factor[intra_flag],
2205
0
                      ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2206
0
                      ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2207
#else
2208
                      intra_flag ? ai4_quant_rounding_factors[0][0]
2209
                                 : ps_ctxt->i4_quant_rnd_factor[intra_flag],
2210
                      intra_flag ? ai4_quant_rounding_factors[1]
2211
                                 : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2212
                      intra_flag ? ai4_quant_rounding_factors[2]
2213
                                 : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2214
#endif
2215
0
                      trans_size,
2216
0
                      trans_size,
2217
0
                      deq_data_strd,
2218
0
                      pu1_csbf_buf,
2219
0
                      csbf_strd,
2220
0
                      pi4_zero_col,
2221
0
                      pi4_zero_row,
2222
0
                      ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
2223
0
                      pi8_cost);
2224
2225
0
        if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
2226
0
        {
2227
0
            pi8_cost[0] = UINT_MAX;
2228
0
        }
2229
0
    }
2230
2231
0
    if(0 != cbf)
2232
0
    {
2233
0
        if(i4_perform_sbh || i4_perform_rdoq)
2234
0
        {
2235
0
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
2236
0
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
2237
0
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
2238
2239
0
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
2240
0
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
2241
0
            ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
2242
0
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2243
0
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
2244
2245
0
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
2246
0
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
2247
0
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
2248
0
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
2249
0
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
2250
0
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
2251
2252
            /* ------- call coeffs scan function ------- */
2253
0
            if((!i4_perform_rdoq))
2254
0
            {
2255
0
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2256
2257
0
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2258
0
            }
2259
0
        }
2260
2261
0
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2262
0
            pi2_quant_coeffs,
2263
0
            pi4_subBlock2csbfId_map,
2264
0
            ps_ctxt->i4_scan_idx,
2265
0
            trans_size,
2266
0
            pu1_ecd_data,
2267
0
            pu1_csbf_buf,
2268
0
            csbf_strd);
2269
0
    }
2270
0
    *pi8_cost >>= ga_trans_shift[trans_idx];
2271
2272
0
#if RDOPT_ZERO_CBF_ENABLE
2273
    /* compare null cbf cost with encode tu rd-cost */
2274
0
    if(cbf != 0)
2275
0
    {
2276
0
        WORD32 tu_bits;
2277
0
        LWORD64 tu_rd_cost;
2278
2279
0
        LWORD64 zero_cbf_cost = 0;
2280
2281
        /*Populating the feilds of rdoq_ctxt structure*/
2282
0
        if(i4_perform_rdoq)
2283
0
        {
2284
            /* transform size to log2transform size */
2285
0
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
2286
0
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
2287
0
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
2288
0
            ps_rdoq_sbh_ctxt->i4_is_luma = 1;
2289
0
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
2290
0
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
2291
0
                (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
2292
0
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
2293
0
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
2294
0
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
2295
0
        }
2296
0
        else if(i4_perform_zcbf)
2297
0
        {
2298
0
            zero_cbf_cost =
2299
2300
0
                ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
2301
0
                    pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
2302
0
        }
2303
2304
        /************************************************************************/
2305
        /* call the entropy rdo encode to get the bit estimate for current tu   */
2306
        /* note that tu includes only residual coding bits and does not include */
2307
        /* tu split, cbf and qp delta encoding bits for a TU                    */
2308
        /************************************************************************/
2309
0
        if(i4_perform_rdoq)
2310
0
        {
2311
0
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
2312
0
                &ps_ctxt->s_rdopt_entropy_ctxt,
2313
0
                (pu1_ecd_data),
2314
0
                trans_size,
2315
0
                1,
2316
0
                ps_rdoq_sbh_ctxt,
2317
0
                pi8_cost,
2318
0
                &zero_cbf_cost,
2319
0
                0);
2320
2321
0
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
2322
0
            {
2323
0
                cbf = 0;
2324
0
                *pi4_coeff_off = 0;
2325
0
            }
2326
2327
0
            if((i4_perform_sbh) && (0 != cbf))
2328
0
            {
2329
0
                ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2330
0
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2331
0
                *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2332
0
            }
2333
2334
            /*Add round value before normalizing*/
2335
0
            *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
2336
0
            *pi8_cost >>= ga_trans_shift[trans_idx];
2337
2338
0
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
2339
0
            {
2340
0
                pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2341
0
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2342
0
                    pi2_quant_coeffs,
2343
0
                    pi4_subBlock2csbfId_map,
2344
0
                    ps_ctxt->i4_scan_idx,
2345
0
                    trans_size,
2346
0
                    pu1_ecd_data,
2347
0
                    pu1_csbf_buf,
2348
0
                    csbf_strd);
2349
0
            }
2350
0
        }
2351
0
        else
2352
0
        {
2353
0
            tu_bits = ihevce_entropy_rdo_encode_tu(
2354
0
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
2355
0
        }
2356
2357
0
        *pi4_tu_bits = tu_bits;
2358
2359
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2360
0
        {
2361
0
            *pi8_cost = ihevce_it_recon_ssd(
2362
0
                ps_ctxt,
2363
0
                pu1_src,
2364
0
                src_strd,
2365
0
                pu1_pred,
2366
0
                pred_strd,
2367
0
                pi2_deq_data,
2368
0
                deq_data_strd,
2369
0
                pu1_recon,
2370
0
                i4_recon_stride,
2371
0
                pu1_ecd_data,
2372
0
                trans_size,
2373
0
                packed_pred_mode,
2374
0
                cbf,
2375
0
                *pi4_zero_col,
2376
0
                *pi4_zero_row,
2377
0
                NULL_PLANE);
2378
2379
0
            pu1_is_recon_available[0] = 1;
2380
0
        }
2381
2382
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2383
0
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2384
0
        {
2385
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2386
0
                pu1_src,
2387
0
                src_strd,
2388
0
                pu1_recon,
2389
0
                i4_recon_stride,
2390
0
                pi8_cost[0],
2391
0
                i4_alpha_stim_multiplier,
2392
0
                trans_size,
2393
0
                0,
2394
0
                ps_ctxt->u1_enable_psyRDOPT,
2395
0
                NULL_PLANE);
2396
0
        }
2397
0
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2398
0
        {
2399
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2400
0
                pu1_src,
2401
0
                src_strd,
2402
0
                pu1_pred,
2403
0
                pred_strd,
2404
0
                pi8_cost[0],
2405
0
                i4_alpha_stim_multiplier,
2406
0
                trans_size,
2407
0
                0,
2408
0
                ps_ctxt->u1_enable_psyRDOPT,
2409
0
                NULL_PLANE);
2410
0
        }
2411
0
#endif
2412
2413
        /* add the SSD cost to bits estimate given by ECD */
2414
0
        tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
2415
0
                                     tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
2416
2417
0
        if(i4_perform_zcbf)
2418
0
        {
2419
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2420
0
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2421
0
            {
2422
0
                zero_cbf_cost = ihevce_inject_stim_into_distortion(
2423
0
                    pu1_src,
2424
0
                    src_strd,
2425
0
                    pu1_pred,
2426
0
                    pred_strd,
2427
0
                    zero_cbf_cost,
2428
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2429
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2430
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2431
0
                                                 100.0,
2432
0
                    trans_size,
2433
0
                    0,
2434
0
                    ps_ctxt->u1_enable_psyRDOPT,
2435
0
                    NULL_PLANE);
2436
0
            }
2437
0
#endif
2438
2439
            /* force the tu as zero cbf if zero_cbf_cost is lower */
2440
0
            if(zero_cbf_cost < tu_rd_cost)
2441
0
            {
2442
                /* num bytes is set to 0 */
2443
0
                *pi4_coeff_off = 0;
2444
2445
                /* cbf is returned as 0 */
2446
0
                cbf = 0;
2447
2448
                /* cost is returned as 0 cbf cost */
2449
0
                *pi8_cost = zero_cbf_cost;
2450
2451
                /* TU bits is set to 0 */
2452
0
                *pi4_tu_bits = 0;
2453
0
                pu1_is_recon_available[0] = 0;
2454
2455
0
                if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2456
0
                {
2457
                    /* copy pred to recon for zcbf mode */
2458
2459
0
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2460
0
                        pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2461
2462
0
                    pu1_is_recon_available[0] = 1;
2463
0
                }
2464
0
            }
2465
            /* accumulate cu not coded cost with zcbf cost */
2466
0
#if ENABLE_INTER_ZCU_COST
2467
0
            ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
2468
0
#endif
2469
0
        }
2470
0
    }
2471
0
    else
2472
0
    {
2473
        /* cbf = 0, accumulate cu not coded cost */
2474
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2475
0
        {
2476
0
            *pi8_cost = ihevce_it_recon_ssd(
2477
0
                ps_ctxt,
2478
0
                pu1_src,
2479
0
                src_strd,
2480
0
                pu1_pred,
2481
0
                pred_strd,
2482
0
                pi2_deq_data,
2483
0
                deq_data_strd,
2484
0
                pu1_recon,
2485
0
                i4_recon_stride,
2486
0
                pu1_ecd_data,
2487
0
                trans_size,
2488
0
                packed_pred_mode,
2489
0
                cbf,
2490
0
                *pi4_zero_col,
2491
0
                *pi4_zero_row,
2492
0
                NULL_PLANE);
2493
2494
0
            pu1_is_recon_available[0] = 1;
2495
0
        }
2496
2497
0
#if ENABLE_INTER_ZCU_COST
2498
0
        {
2499
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2500
0
            if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2501
0
            {
2502
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2503
0
                    pu1_src,
2504
0
                    src_strd,
2505
0
                    pu1_recon,
2506
0
                    i4_recon_stride,
2507
0
                    pi8_cost[0],
2508
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2509
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2510
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2511
0
                                                 100.0,
2512
0
                    trans_size,
2513
0
                    0,
2514
0
                    ps_ctxt->u1_enable_psyRDOPT,
2515
0
                    NULL_PLANE);
2516
0
            }
2517
0
            else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2518
0
            {
2519
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2520
0
                    pu1_src,
2521
0
                    src_strd,
2522
0
                    pu1_pred,
2523
0
                    pred_strd,
2524
0
                    pi8_cost[0],
2525
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2526
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2527
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2528
0
                                                 100.0,
2529
0
                    trans_size,
2530
0
                    0,
2531
0
                    ps_ctxt->u1_enable_psyRDOPT,
2532
0
                    NULL_PLANE);
2533
0
            }
2534
0
#endif
2535
2536
0
            ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
2537
0
        }
2538
0
#endif /* ENABLE_INTER_ZCU_COST */
2539
0
    }
2540
0
#endif
2541
2542
0
    return (cbf);
2543
0
}
2544
2545
/*!
2546
******************************************************************************
2547
* \if Function name : ihevce_it_recon_fxn \endif
2548
*
2549
* \brief
2550
*    Transform unit level (Luma) IT Recon function
2551
*
2552
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2553
* \param[in] pi2_deq_data   pointer to iq data
2554
* \param[in] deq_data_strd  iq data buffer stride
2555
* \param[in] pu1_pred       pointer to predicted data buffer
2556
* \param[in] pred_strd      predicted buffer stride
2557
* \param[in] pu1_recon      pointer to recon buffer
2558
* \param[in] recon_strd     recon buffer stride
2559
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2560
* \param[in] trans_size     transform size (4, 8, 16,32)
2561
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
2562
* \param[in] cbf            CBF of the current block
2563
* \param[in] zero_cols      zero_cols of the current block
2564
* \param[in] zero_rows      zero_rows of the current block
2565
*
2566
* \return
2567
*
2568
* \author
2569
*  Ittiam
2570
*
2571
*****************************************************************************
2572
*/
2573
2574
void ihevce_it_recon_fxn(
2575
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2576
    WORD16 *pi2_deq_data,
2577
    WORD32 deq_dat_strd,
2578
    UWORD8 *pu1_pred,
2579
    WORD32 pred_strd,
2580
    UWORD8 *pu1_recon,
2581
    WORD32 recon_strd,
2582
    UWORD8 *pu1_ecd_data,
2583
    WORD32 trans_size,
2584
    WORD32 packed_pred_mode,
2585
    WORD32 cbf,
2586
    WORD32 zero_cols,
2587
    WORD32 zero_rows)
2588
0
{
2589
0
    WORD32 dc_add_flag = 0;
2590
0
    WORD32 trans_idx;
2591
2592
    /* translate the transform size to index for 4x4 and 8x8 */
2593
0
    trans_idx = trans_size >> 2;
2594
2595
    /* if SKIP mode needs to be evaluated the pred is copied to recon */
2596
0
    if(PRED_MODE_SKIP == packed_pred_mode)
2597
0
    {
2598
0
        UWORD8 *pu1_curr_recon, *pu1_curr_pred;
2599
2600
0
        pu1_curr_pred = pu1_pred;
2601
0
        pu1_curr_recon = pu1_recon;
2602
2603
        /* 2D copy of data */
2604
2605
0
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2606
0
            pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
2607
2608
0
        return;
2609
0
    }
2610
2611
    /* for intra 4x4 DST transform should be used */
2612
0
    if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
2613
0
    {
2614
0
        trans_idx = 0;
2615
0
    }
2616
    /* for 16x16 cases */
2617
0
    else if(16 == trans_size)
2618
0
    {
2619
0
        trans_idx = 3;
2620
0
    }
2621
    /* for 32x32 cases */
2622
0
    else if(32 == trans_size)
2623
0
    {
2624
0
        trans_idx = 4;
2625
0
    }
2626
2627
    /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
2628
0
    if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2629
0
    {
2630
0
        dc_add_flag = 1;
2631
0
    }
2632
2633
0
    if(0 == cbf)
2634
0
    {
2635
        /* buffer copy */
2636
0
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2637
0
            pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
2638
0
    }
2639
0
    else if((1 == dc_add_flag) && (0 != trans_idx))
2640
0
    {
2641
        /* dc add */
2642
0
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2643
0
            pu1_pred,
2644
0
            pred_strd,
2645
0
            pu1_recon,
2646
0
            recon_strd,
2647
0
            trans_size,
2648
0
            pi2_deq_data[0],
2649
0
            NULL_PLANE /* luma */
2650
0
        );
2651
0
    }
2652
0
    else
2653
0
    {
2654
0
        ps_ctxt->apf_it_recon[trans_idx](
2655
0
            pi2_deq_data,
2656
0
            &ps_ctxt->ai2_scratch[0],
2657
0
            pu1_pred,
2658
0
            pu1_recon,
2659
0
            deq_dat_strd,
2660
0
            pred_strd,
2661
0
            recon_strd,
2662
0
            zero_cols,
2663
0
            zero_rows);
2664
0
    }
2665
0
}
2666
2667
/*!
2668
******************************************************************************
2669
* \if Function name : ihevce_chroma_it_recon_fxn \endif
2670
*
2671
* \brief
2672
*    Transform unit level (Chroma) IT Recon function
2673
*
2674
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2675
* \param[in] pi2_deq_data   pointer to iq data
2676
* \param[in] deq_data_strd  iq data buffer stride
2677
* \param[in] pu1_pred       pointer to predicted data buffer
2678
* \param[in] pred_strd      predicted buffer stride
2679
* \param[in] pu1_recon      pointer to recon buffer
2680
* \param[in] recon_strd     recon buffer stride
2681
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2682
* \param[in] trans_size     transform size (4, 8, 16)
2683
* \param[in] cbf            CBF of the current block
2684
* \param[in] zero_cols      zero_cols of the current block
2685
* \param[in] zero_rows      zero_rows of the current block
2686
*
2687
* \return
2688
*
2689
* \author
2690
*  Ittiam
2691
*
2692
*****************************************************************************
2693
*/
2694
2695
void ihevce_chroma_it_recon_fxn(
2696
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2697
    WORD16 *pi2_deq_data,
2698
    WORD32 deq_dat_strd,
2699
    UWORD8 *pu1_pred,
2700
    WORD32 pred_strd,
2701
    UWORD8 *pu1_recon,
2702
    WORD32 recon_strd,
2703
    UWORD8 *pu1_ecd_data,
2704
    WORD32 trans_size,
2705
    WORD32 cbf,
2706
    WORD32 zero_cols,
2707
    WORD32 zero_rows,
2708
    CHROMA_PLANE_ID_T e_chroma_plane)
2709
0
{
2710
0
    WORD32 trans_idx;
2711
2712
0
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
2713
2714
    /* since 2x2 transform is not allowed for chroma*/
2715
0
    if(2 == trans_size)
2716
0
    {
2717
0
        trans_size = 4;
2718
0
    }
2719
2720
    /* translate the transform size to index */
2721
0
    trans_idx = trans_size >> 2;
2722
2723
    /* for 16x16 cases */
2724
0
    if(16 == trans_size)
2725
0
    {
2726
0
        trans_idx = 3;
2727
0
    }
2728
2729
0
    if(0 == cbf)
2730
0
    {
2731
        /* buffer copy */
2732
0
        ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
2733
0
            pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
2734
0
    }
2735
0
    else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2736
0
    {
2737
        /* dc add */
2738
0
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2739
0
            pu1_pred,
2740
0
            pred_strd,
2741
0
            pu1_recon,
2742
0
            recon_strd,
2743
0
            trans_size,
2744
0
            pi2_deq_data[0],
2745
0
            e_chroma_plane /* chroma plane */
2746
0
        );
2747
0
    }
2748
0
    else
2749
0
    {
2750
0
        ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
2751
0
            pi2_deq_data,
2752
0
            &ps_ctxt->ai2_scratch[0],
2753
0
            pu1_pred + (WORD32)e_chroma_plane,
2754
0
            pu1_recon + (WORD32)e_chroma_plane,
2755
0
            deq_dat_strd,
2756
0
            pred_strd,
2757
0
            recon_strd,
2758
0
            zero_cols,
2759
0
            zero_rows);
2760
0
    }
2761
0
}
2762
2763
/**
2764
*******************************************************************************
2765
* \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
2766
*
2767
* \brief * Filters the RDOPT candidates based on mpm_idx
2768
*
2769
* \par   Description
2770
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
2771
* for a CU
2772
*
2773
* \param[in] ps_ctxt : ptr to enc loop context
2774
* \param[in] ps_cu_analyse : ptr to CU analyse structure
2775
* \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
2776
* \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
2777
* \param[in] pu1_luma_mode luma mode
2778
*
2779
* \returns none
2780
*
2781
* \author
2782
*  Ittiam
2783
*
2784
*******************************************************************************
2785
*/
2786
2787
void ihevce_mpm_idx_based_filter_RDOPT_cand(
2788
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2789
    cu_analyse_t *ps_cu_analyse,
2790
    nbr_4x4_t *ps_left_nbr_4x4,
2791
    nbr_4x4_t *ps_top_nbr_4x4,
2792
    UWORD8 *pu1_luma_mode,
2793
    UWORD8 *pu1_eval_mark)
2794
0
{
2795
0
    WORD32 cu_pos_x;
2796
0
    WORD32 cu_pos_y;
2797
0
    nbr_avail_flags_t s_nbr;
2798
0
    WORD32 trans_size;
2799
0
    WORD32 au4_cand_mode_list[3];
2800
0
    WORD32 nbr_flags;
2801
0
    UWORD8 *pu1_intra_luma_modes;
2802
0
    WORD32 rdopt_cand_ctr = 0;
2803
0
    UWORD8 *pu1_luma_eval_mark;
2804
2805
0
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
2806
0
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
2807
0
    trans_size = ps_cu_analyse->u1_cu_size;
2808
2809
    /* get the neighbour availability flags */
2810
0
    nbr_flags = ihevce_get_nbr_intra(
2811
0
        &s_nbr,
2812
0
        ps_ctxt->pu1_ctb_nbr_map,
2813
0
        ps_ctxt->i4_nbr_map_strd,
2814
0
        cu_pos_x,
2815
0
        cu_pos_y,
2816
0
        trans_size >> 2);
2817
0
    (void)nbr_flags;
2818
    /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
2819
    *TU=CU/2 also since the modes are same in both the cases.
2820
    */
2821
0
    ihevce_populate_intra_pred_mode(
2822
0
        ps_top_nbr_4x4->b6_luma_intra_mode,
2823
0
        ps_left_nbr_4x4->b6_luma_intra_mode,
2824
0
        s_nbr.u1_top_avail,
2825
0
        s_nbr.u1_left_avail,
2826
0
        cu_pos_y,
2827
0
        &au4_cand_mode_list[0]);
2828
2829
    /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
2830
    *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
2831
    */
2832
2833
0
    pu1_intra_luma_modes = pu1_luma_mode;
2834
0
    pu1_luma_eval_mark = pu1_eval_mark;
2835
2836
0
    while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
2837
0
    {
2838
0
        WORD32 i;
2839
0
        WORD32 found_flag = 0;
2840
2841
        /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
2842
        *irrespective of whether the cand is present in the mpm idx list or not
2843
        */
2844
0
        if(rdopt_cand_ctr == 0)
2845
0
        {
2846
0
            rdopt_cand_ctr++;
2847
0
            continue;
2848
0
        }
2849
2850
0
        for(i = 0; i < 3; i++)
2851
0
        {
2852
0
            if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
2853
0
            {
2854
0
                found_flag = 1;
2855
0
                break;
2856
0
            }
2857
0
        }
2858
2859
0
        if(found_flag == 0)
2860
0
        {
2861
0
            pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
2862
0
        }
2863
2864
0
        rdopt_cand_ctr++;
2865
0
    }
2866
0
}
2867
2868
/*!
2869
******************************************************************************
2870
* \if Function name : ihevce_intra_rdopt_cu_ntu \endif
2871
*
2872
* \brief
2873
*    Intra Coding unit funtion for RD opt mode
2874
*
2875
* \param[in] ps_ctxt    enc_loop module ctxt pointer
2876
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
2877
* \param[in] pu1_luma_mode : pointer to luma mode
2878
* \param[in] ps_cu_analyse  pointer to cu analyse pointer
2879
* \param[in] pu1_src    pointer to source data buffer
2880
* \param[in] src_strd   source buffer stride
2881
* \param[in] pu1_cu_left pointer to left recon data buffer
2882
* \param[in] pu1_cu_top  pointer to top recon data buffer
2883
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
2884
* \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
2885
* \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
2886
* \param[in] nbr_4x4_left_strd left nbr4x4 stride
2887
* \param[in] cu_left_stride left recon buffer stride
2888
* \param[in] curr_buf_idx RD opt buffer index for current usage
2889
* \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
2890
*
2891
* \return
2892
*    RDopt cost
2893
*
2894
* \author
2895
*  Ittiam
2896
*
2897
*****************************************************************************
2898
*/
2899
LWORD64 ihevce_intra_rdopt_cu_ntu(
2900
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2901
    enc_loop_cu_prms_t *ps_cu_prms,
2902
    void *pv_pred_org,
2903
    WORD32 pred_strd_org,
2904
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
2905
    UWORD8 *pu1_luma_mode,
2906
    cu_analyse_t *ps_cu_analyse,
2907
    void *pv_curr_src,
2908
    void *pv_cu_left,
2909
    void *pv_cu_top,
2910
    void *pv_cu_top_left,
2911
    nbr_4x4_t *ps_left_nbr_4x4,
2912
    nbr_4x4_t *ps_top_nbr_4x4,
2913
    WORD32 nbr_4x4_left_strd,
2914
    WORD32 cu_left_stride,
2915
    WORD32 curr_buf_idx,
2916
    WORD32 func_proc_mode,
2917
    WORD32 i4_alpha_stim_multiplier)
2918
0
{
2919
0
    enc_loop_cu_final_prms_t *ps_final_prms;
2920
0
    nbr_avail_flags_t s_nbr;
2921
0
    nbr_4x4_t *ps_nbr_4x4;
2922
0
    nbr_4x4_t *ps_tmp_lt_4x4;
2923
0
    recon_datastore_t *ps_recon_datastore;
2924
2925
0
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
2926
2927
0
    UWORD32 *pu4_nbr_flags;
2928
0
    UWORD8 *pu1_intra_pred_mode;
2929
0
    WORD32 cu_pos_x;
2930
0
    WORD32 cu_pos_y;
2931
0
    WORD32 trans_size = 0;
2932
0
    UWORD8 *pu1_left;
2933
0
    UWORD8 *pu1_top;
2934
0
    UWORD8 *pu1_top_left;
2935
0
    UWORD8 *pu1_recon;
2936
0
    UWORD8 *pu1_csbf_buf;
2937
0
    UWORD8 *pu1_ecd_data;
2938
0
    WORD16 *pi2_deq_data;
2939
0
    WORD32 deq_data_strd;
2940
0
    LWORD64 total_rdopt_cost;
2941
0
    WORD32 ctr;
2942
0
    WORD32 left_strd;
2943
0
    WORD32 i4_recon_stride;
2944
0
    WORD32 csbf_strd;
2945
0
    WORD32 ecd_data_bytes_cons;
2946
0
    WORD32 num_4x4_in_tu;
2947
0
    WORD32 num_4x4_in_cu;
2948
0
    WORD32 chrm_present_flag;
2949
0
    WORD32 tx_size;
2950
0
    WORD32 cu_bits;
2951
0
    WORD32 num_cu_parts = 0;
2952
0
    WORD32 num_cands = 0;
2953
0
    WORD32 cu_pos_x_8pelunits;
2954
0
    WORD32 cu_pos_y_8pelunits;
2955
0
    WORD32 i4_perform_rdoq;
2956
0
    WORD32 i4_perform_sbh;
2957
0
    UWORD8 u1_compute_spatial_ssd;
2958
0
    UWORD8 u1_compute_recon;
2959
0
    UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
2960
2961
0
    UWORD16 u2_num_tus_in_cu = 0;
2962
0
    WORD32 is_sub_pu_in_hq = 0;
2963
    /* Get the RDOPT cost of the best CU mode for early_exit */
2964
0
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
2965
    /* cabac context of prev intra luma pred flag */
2966
0
    UWORD8 u1_prev_flag_cabac_ctxt =
2967
0
        ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
2968
0
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
2969
2970
0
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
2971
2972
0
    total_rdopt_cost = 0;
2973
0
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
2974
0
    ps_recon_datastore = &ps_final_prms->s_recon_datastore;
2975
0
    i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
2976
0
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
2977
0
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
2978
0
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
2979
0
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
2980
0
    deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
2981
0
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
2982
0
    ps_tmp_lt_4x4 = ps_left_nbr_4x4;
2983
0
    pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
2984
0
    pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
2985
0
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
2986
0
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
2987
0
    cu_pos_x_8pelunits = cu_pos_x;
2988
0
    cu_pos_y_8pelunits = cu_pos_y;
2989
2990
    /* reset cu not coded cost */
2991
0
    ps_ctxt->i8_cu_not_coded_cost = 0;
2992
2993
    /* based on the Processng mode */
2994
0
    if(TU_EQ_CU == func_proc_mode)
2995
0
    {
2996
0
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
2997
0
        trans_size = ps_cu_analyse->u1_cu_size;
2998
0
        num_cu_parts = 1;
2999
0
        num_cands = 1;
3000
0
        u2_num_tus_in_cu = 1;
3001
0
    }
3002
0
    else if(TU_EQ_CU_DIV2 == func_proc_mode)
3003
0
    {
3004
0
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
3005
0
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3006
0
        num_cu_parts = 4;
3007
0
        num_cands = 1;
3008
0
        u2_num_tus_in_cu = 4;
3009
0
    }
3010
0
    else if(TU_EQ_SUBCU == func_proc_mode)
3011
0
    {
3012
0
        ps_final_prms->u1_part_mode = SIZE_NxN;
3013
0
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3014
0
        num_cu_parts = 4;
3015
        /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
3016
0
        if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
3017
0
        {
3018
0
            if(ps_ctxt->i1_slice_type != BSLICE)
3019
0
            {
3020
0
                num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
3021
0
            }
3022
0
            else
3023
0
            {
3024
0
                num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
3025
0
            }
3026
0
        }
3027
0
        else
3028
0
        {
3029
0
            num_cands = MAX_INTRA_CU_CANDIDATES;
3030
0
        }
3031
0
        u2_num_tus_in_cu = 4;
3032
0
    }
3033
0
    else
3034
0
    {
3035
        /* should not enter here */
3036
0
        ASSERT(0);
3037
0
    }
3038
3039
0
    if(ps_ctxt->i1_cu_qp_delta_enable)
3040
0
    {
3041
0
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1);
3042
0
    }
3043
3044
0
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
3045
0
    {
3046
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
3047
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
3048
0
             100.0f);
3049
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
3050
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
3051
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
3052
0
    }
3053
3054
0
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
3055
0
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3056
0
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3057
3058
0
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
3059
0
    {
3060
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
3061
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3062
0
    }
3063
3064
    /* populate the neigbours */
3065
0
    pu1_left = (UWORD8 *)pv_cu_left;
3066
0
    pu1_top = (UWORD8 *)pv_cu_top;
3067
0
    pu1_top_left = (UWORD8 *)pv_cu_top_left;
3068
0
    left_strd = cu_left_stride;
3069
0
    num_4x4_in_tu = (trans_size >> 2);
3070
0
    num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
3071
0
    chrm_present_flag = 1;
3072
0
    ecd_data_bytes_cons = 0;
3073
0
    cu_bits = 0;
3074
3075
    /* get the 4x4 level postion of current cu */
3076
0
    cu_pos_x = cu_pos_x << 1;
3077
0
    cu_pos_y = cu_pos_y << 1;
3078
3079
    /* pouplate cu level params knowing that current is intra */
3080
0
    ps_final_prms->u1_skip_flag = 0;
3081
0
    ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
3082
0
    ps_final_prms->u2_num_pus_in_cu = 1;
3083
    /*init the is_cu_coded flag*/
3084
0
    ps_final_prms->u1_is_cu_coded = 0;
3085
0
    ps_final_prms->u4_cu_sad = 0;
3086
3087
0
    ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
3088
0
    ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
3089
0
    ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
3090
0
    ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
3091
0
    ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
3092
0
    ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
3093
3094
0
    ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
3095
3096
    /*copy qp directly as intra cant be skip*/
3097
0
    ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
3098
0
    ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
3099
0
    ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
3100
0
    ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
3101
0
    ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
3102
0
    ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
3103
0
    ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
3104
0
    ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
3105
0
    ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
3106
3107
    /* RDOPT copy States :  TU init (best until prev TU) to current */
3108
0
    memcpy(
3109
0
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3110
0
             .s_cabac_ctxt.au1_ctxt_models[0],
3111
0
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3112
0
        IHEVC_CAB_COEFFX_PREFIX);
3113
3114
    /* RDOPT copy States :update to init state if 0 cbf */
3115
0
    memcpy(
3116
0
        &au1_intra_nxn_rdopt_ctxt_models[0][0],
3117
0
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3118
0
        IHEVC_CAB_COEFFX_PREFIX);
3119
0
    memcpy(
3120
0
        &au1_intra_nxn_rdopt_ctxt_models[1][0],
3121
0
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3122
0
        IHEVC_CAB_COEFFX_PREFIX);
3123
3124
    /* loop for all partitions in CU  blocks */
3125
0
    for(ctr = 0; ctr < num_cu_parts; ctr++)
3126
0
    {
3127
0
        UWORD8 *pu1_curr_mode;
3128
0
        WORD32 cand_ctr;
3129
0
        WORD32 nbr_flags;
3130
3131
        /* for NxN case to track the best mode       */
3132
        /* for other cases zeroth index will be used */
3133
0
        intra_prev_rem_flags_t as_intra_prev_rem[2];
3134
0
        LWORD64 ai8_cand_rdopt_cost[2];
3135
0
        UWORD32 au4_tu_sad[2];
3136
0
        WORD32 ai4_tu_bits[2];
3137
0
        WORD32 ai4_cbf[2];
3138
0
        WORD32 ai4_curr_bytes[2];
3139
0
        WORD32 ai4_zero_col[2];
3140
0
        WORD32 ai4_zero_row[2];
3141
        /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
3142
        cand. are there) ping-pong buffer to store the best and current */
3143
0
        UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
3144
0
        UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
3145
0
        WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
3146
        /* Context models stored for RDopt store and restore purpose */
3147
3148
0
        UWORD8 au1_recon_availability[2];
3149
3150
0
        WORD32 best_cand_idx = 0;
3151
0
        LWORD64 best_cand_cost = MAX_COST_64;
3152
        /* counters to toggle b/w best and current */
3153
0
        WORD32 best_intra_buf_idx = 1;
3154
0
        WORD32 curr_intra_buf_idx = 0;
3155
3156
        /* copy the mode pointer to be used in inner loop */
3157
0
        pu1_curr_mode = pu1_luma_mode;
3158
3159
        /* get the neighbour availability flags */
3160
0
        nbr_flags = ihevce_get_nbr_intra(
3161
0
            &s_nbr,
3162
0
            ps_ctxt->pu1_ctb_nbr_map,
3163
0
            ps_ctxt->i4_nbr_map_strd,
3164
0
            cu_pos_x,
3165
0
            cu_pos_y,
3166
0
            num_4x4_in_tu);
3167
3168
        /* copy the nbr flags for chroma reuse */
3169
0
        if(4 != trans_size)
3170
0
        {
3171
0
            *pu4_nbr_flags = nbr_flags;
3172
0
        }
3173
0
        else if(1 == chrm_present_flag)
3174
0
        {
3175
            /* compute the avail flags assuming luma trans is 8x8 */
3176
            /* get the neighbour availability flags */
3177
0
            *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
3178
0
                ps_ctxt->pu1_ctb_nbr_map,
3179
0
                ps_ctxt->i4_nbr_map_strd,
3180
0
                cu_pos_x,
3181
0
                cu_pos_y,
3182
0
                (num_4x4_in_tu << 1),
3183
0
                (num_4x4_in_tu << 1));
3184
0
        }
3185
3186
0
        u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
3187
3188
0
        if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
3189
0
        {
3190
0
            ps_recon_datastore->u1_is_lumaRecon_available = 1;
3191
0
        }
3192
0
        else if(!ctr)
3193
0
        {
3194
0
            ps_recon_datastore->u1_is_lumaRecon_available = 0;
3195
0
        }
3196
3197
0
        ihevc_intra_pred_luma_ref_substitution_fptr =
3198
0
            ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3199
3200
        /* call reference array substitution */
3201
0
        ihevc_intra_pred_luma_ref_substitution_fptr(
3202
0
            pu1_top_left,
3203
0
            pu1_top,
3204
0
            pu1_left,
3205
0
            left_strd,
3206
0
            trans_size,
3207
0
            nbr_flags,
3208
0
            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3209
0
            1);
3210
3211
        /* Intra Mode gating based on MPM cand list and encoder quality preset */
3212
0
        if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
3213
0
           (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
3214
0
        {
3215
0
            ihevce_mpm_idx_based_filter_RDOPT_cand(
3216
0
                ps_ctxt,
3217
0
                ps_cu_analyse,
3218
0
                ps_left_nbr_4x4,
3219
0
                ps_top_nbr_4x4,
3220
0
                pu1_luma_mode,
3221
0
                &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
3222
0
        }
3223
3224
0
        if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3225
0
           (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
3226
0
        {
3227
0
            WORD32 ai4_mpm_mode_list[3];
3228
0
            WORD32 i;
3229
3230
0
            WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
3231
3232
0
            ihevce_populate_intra_pred_mode(
3233
0
                ps_top_nbr_4x4->b6_luma_intra_mode,
3234
0
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3235
0
                s_nbr.u1_top_avail,
3236
0
                s_nbr.u1_left_avail,
3237
0
                cu_pos_y,
3238
0
                &ai4_mpm_mode_list[0]);
3239
3240
0
            for(i = 0; i < 3; i++)
3241
0
            {
3242
0
                if(ps_cu_analyse->s_cu_intra_cand
3243
0
                       .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
3244
0
                {
3245
0
                    ASSERT(ai4_mpm_mode_list[i] < 35);
3246
3247
0
                    ps_cu_analyse->s_cu_intra_cand
3248
0
                        .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
3249
0
                    pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
3250
0
                    ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
3251
0
                    i4_curr_index++;
3252
0
                }
3253
0
            }
3254
3255
0
            pu1_luma_mode[i4_curr_index] = 255;
3256
0
        }
3257
3258
        /* loop over candidates for each partition */
3259
0
        for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
3260
0
        {
3261
0
            WORD32 curr_pred_mode;
3262
0
            WORD32 bits = 0;
3263
0
            LWORD64 curr_cost;
3264
0
            WORD32 luma_pred_func_idx;
3265
0
            UWORD8 *pu1_curr_ecd_data;
3266
0
            WORD16 *pi2_curr_deq_data;
3267
0
            WORD32 curr_deq_data_strd;
3268
0
            WORD32 pred_strd;
3269
0
            UWORD8 *pu1_pred;
3270
3271
            /* if NXN case the recon and ecd data is stored in temp buffers */
3272
0
            if(TU_EQ_SUBCU == func_proc_mode)
3273
0
            {
3274
0
                pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
3275
0
                pred_strd = trans_size;
3276
0
                pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
3277
0
                pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
3278
0
                curr_deq_data_strd = trans_size;
3279
3280
0
                ASSERT(trans_size == MIN_TU_SIZE);
3281
0
            }
3282
0
            else
3283
0
            {
3284
0
                pu1_pred = (UWORD8 *)pv_pred_org;
3285
0
                pred_strd = pred_strd_org;
3286
0
                pu1_curr_ecd_data = pu1_ecd_data;
3287
0
                pi2_curr_deq_data = pi2_deq_data;
3288
0
                curr_deq_data_strd = deq_data_strd;
3289
0
            }
3290
3291
0
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
3292
0
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3293
3294
0
            if(is_sub_pu_in_hq == 1)
3295
0
            {
3296
0
                curr_pred_mode = cand_ctr;
3297
0
            }
3298
0
            else
3299
0
            {
3300
0
                curr_pred_mode = pu1_curr_mode[cand_ctr];
3301
0
            }
3302
3303
            /* If the candidate mode is 255, then break */
3304
0
            if(255 == curr_pred_mode)
3305
0
            {
3306
0
                break;
3307
0
            }
3308
0
            else if(250 == curr_pred_mode)
3309
0
            {
3310
0
                continue;
3311
0
            }
3312
3313
            /* check if this mode needs to be evaluated or not. For 2nx2n cases, this   */
3314
            /* function will be called once per candidate, so this check has been done  */
3315
            /* outside this function call. For NxN case, this function will be called   */
3316
            /* only once, and all the candidates will be evaluated here.                */
3317
0
            if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
3318
0
            {
3319
0
                if((TU_EQ_SUBCU == func_proc_mode) &&
3320
0
                   (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
3321
0
                {
3322
0
                    continue;
3323
0
                }
3324
0
            }
3325
3326
            /* call reference filtering */
3327
0
            ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
3328
0
                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3329
0
                trans_size,
3330
0
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3331
0
                curr_pred_mode,
3332
0
                ps_ctxt->i1_strong_intra_smoothing_enable_flag);
3333
3334
            /* use the look up to get the function idx */
3335
0
            luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
3336
3337
            /* call the intra prediction function */
3338
0
            ps_ctxt->apf_lum_ip[luma_pred_func_idx](
3339
0
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3340
0
                1,
3341
0
                pu1_pred,
3342
0
                pred_strd,
3343
0
                trans_size,
3344
0
                curr_pred_mode);
3345
3346
            /* populate the coeffs scan idx */
3347
0
            ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
3348
3349
            /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
3350
0
            if(trans_size < 16)
3351
0
            {
3352
                /* for modes from 22 upto 30 horizontal scan is used */
3353
0
                if((curr_pred_mode > 21) && (curr_pred_mode < 31))
3354
0
                {
3355
0
                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
3356
0
                }
3357
                /* for modes from 6 upto 14 horizontal scan is used */
3358
0
                else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
3359
0
                {
3360
0
                    ps_ctxt->i4_scan_idx = SCAN_VERT;
3361
0
                }
3362
0
            }
3363
3364
            /* RDOPT copy States :  TU init (best until prev TU) to current */
3365
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3366
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3367
0
                        .s_cabac_ctxt.au1_ctxt_models[0] +
3368
0
                    IHEVC_CAB_COEFFX_PREFIX,
3369
0
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3370
0
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3371
3372
0
            i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
3373
0
            i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
3374
3375
#if DISABLE_RDOQ_INTRA
3376
            i4_perform_rdoq = 0;
3377
#endif
3378
3379
            /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
3380
            /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
3381
            /* Currently the complete array will contain only single value*/
3382
            /*The rounding factor is calculated with the formula
3383
            Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
3384
            rounding factor = (1 - DeadZone Val)
3385
3386
            Assumption: Cabac states of All the sub-blocks in the TU are considered independent
3387
            */
3388
0
            if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
3389
0
            {
3390
0
                if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
3391
0
                {
3392
0
                    double i4_lamda_modifier;
3393
3394
0
                    if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
3395
0
                    {
3396
0
                        i4_lamda_modifier =
3397
0
                            ps_ctxt->i4_lamda_modifier *
3398
0
                            CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3399
0
                    }
3400
0
                    else
3401
0
                    {
3402
0
                        i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
3403
0
                    }
3404
0
                    if(ps_ctxt->i4_use_const_lamda_modifier)
3405
0
                    {
3406
0
                        if(ISLICE == ps_ctxt->i1_slice_type)
3407
0
                        {
3408
0
                            i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3409
0
                        }
3410
0
                        else
3411
0
                        {
3412
0
                            i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
3413
0
                        }
3414
0
                    }
3415
3416
0
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3417
0
                        &ps_ctxt->i4_quant_round_tu[0][0];
3418
0
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3419
0
                        &ps_ctxt->i4_quant_round_tu[1][0];
3420
3421
0
                    memset(
3422
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3423
0
                        0,
3424
0
                        trans_size * trans_size * sizeof(WORD32));
3425
0
                    memset(
3426
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3427
0
                        0,
3428
0
                        trans_size * trans_size * sizeof(WORD32));
3429
3430
0
                    ihevce_quant_rounding_factor_gen(
3431
0
                        trans_size,
3432
0
                        1,
3433
0
                        &ps_ctxt->s_rdopt_entropy_ctxt,
3434
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3435
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3436
0
                        i4_lamda_modifier,
3437
0
                        1);
3438
0
                }
3439
0
                else
3440
0
                {
3441
0
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3442
0
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
3443
0
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3444
0
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
3445
0
                }
3446
0
            }
3447
3448
            /* call T Q IT IQ and recon function */
3449
0
            ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
3450
0
                ps_ctxt,
3451
0
                pu1_pred,
3452
0
                pred_strd,
3453
0
                (UWORD8 *)pv_curr_src,
3454
0
                src_strd,
3455
0
                pi2_curr_deq_data,
3456
0
                curr_deq_data_strd,
3457
0
                pu1_recon,
3458
0
                i4_recon_stride,
3459
0
                pu1_curr_ecd_data,
3460
0
                pu1_csbf_buf,
3461
0
                csbf_strd,
3462
0
                trans_size,
3463
0
                PRED_MODE_INTRA,
3464
0
                &ai8_cand_rdopt_cost[curr_intra_buf_idx],
3465
0
                &ai4_curr_bytes[curr_intra_buf_idx],
3466
0
                &ai4_tu_bits[curr_intra_buf_idx],
3467
0
                &au4_tu_sad[curr_intra_buf_idx],
3468
0
                &ai4_zero_col[curr_intra_buf_idx],
3469
0
                &ai4_zero_row[curr_intra_buf_idx],
3470
0
                &au1_recon_availability[curr_intra_buf_idx],
3471
0
                i4_perform_rdoq,
3472
0
                i4_perform_sbh,
3473
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3474
0
                i4_alpha_stim_multiplier,
3475
0
                u1_is_cu_noisy,
3476
0
#endif
3477
0
                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
3478
0
                1 /*early_cbf */
3479
0
            );
3480
3481
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3482
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
3483
            {
3484
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
3485
                ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3486
                    pv_curr_src,
3487
                    src_strd,
3488
                    pu1_pred,
3489
                    pred_strd,
3490
                    ai8_cand_rdopt_cost[curr_intra_buf_idx],
3491
                    i4_alpha_stim_multiplier,
3492
                    trans_size,
3493
                    0,
3494
                    ps_ctxt->u1_enable_psyRDOPT,
3495
                    NULL_PLANE);
3496
#else
3497
                if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
3498
                {
3499
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3500
                        pv_curr_src,
3501
                        src_strd,
3502
                        pu1_recon,
3503
                        i4_recon_stride,
3504
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3505
                        i4_alpha_stim_multiplier,
3506
                        trans_size,
3507
                        0,
3508
                        ps_ctxt->u1_enable_psyRDOPT,
3509
                        NULL_PLANE);
3510
                }
3511
                else
3512
                {
3513
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3514
                        pv_curr_src,
3515
                        src_strd,
3516
                        pu1_pred,
3517
                        pred_strd,
3518
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3519
                        i4_alpha_stim_multiplier,
3520
                        trans_size,
3521
                        0,
3522
                        ps_ctxt->u1_enable_psyRDOPT,
3523
                        NULL_PLANE);
3524
                }
3525
#endif
3526
            }
3527
#endif
3528
3529
0
            if(TU_EQ_SUBCU == func_proc_mode)
3530
0
            {
3531
0
                ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
3532
0
            }
3533
3534
            /* based on CBF/No CBF copy the corresponding state */
3535
0
            if(0 == ai4_cbf[curr_intra_buf_idx])
3536
0
            {
3537
                /* RDOPT copy States :update to init state if 0 cbf */
3538
0
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3539
0
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3540
0
                        IHEVC_CAB_COEFFX_PREFIX,
3541
0
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3542
0
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3543
0
            }
3544
0
            else
3545
0
            {
3546
                /* RDOPT copy States :update to new state only if CBF is non zero */
3547
0
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3548
0
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3549
0
                        IHEVC_CAB_COEFFX_PREFIX,
3550
0
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3551
0
                            .s_cabac_ctxt.au1_ctxt_models[0] +
3552
0
                        IHEVC_CAB_COEFFX_PREFIX,
3553
0
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3554
0
            }
3555
3556
            /* call the function which perform intra mode prediction */
3557
0
            ihevce_intra_pred_mode_signaling(
3558
0
                ps_top_nbr_4x4->b6_luma_intra_mode,
3559
0
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3560
0
                s_nbr.u1_top_avail,
3561
0
                s_nbr.u1_left_avail,
3562
0
                cu_pos_y,
3563
0
                curr_pred_mode,
3564
0
                &as_intra_prev_rem[curr_intra_buf_idx]);
3565
            /******************************************************************/
3566
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3567
            The bits for these are evaluated for every RDO mode of current subcu
3568
            as they can significantly contribute to RDO cost.  Note that these
3569
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3570
            are accounted for in encode_cu call later */
3571
3572
            /******************************************************************/
3573
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3574
            The bits for these are evaluated for every RDO mode of current subcu
3575
            as they can significantly contribute to RDO cost.  Note that these
3576
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3577
            are accounted for in encode_cu call later */
3578
3579
            /* Estimate bits to encode prev rem flag  for NXN mode */
3580
0
            {
3581
0
                WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
3582
0
                    [u1_prev_flag_cabac_ctxt ^
3583
0
                     as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3584
3585
                /* rounding the fractional bits to nearest integer */
3586
0
                bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
3587
0
            }
3588
3589
            /* based on prev flag all the mpmidx bits and rem bits */
3590
0
            if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
3591
0
            {
3592
                /* mpm_idx */
3593
0
                bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
3594
0
            }
3595
0
            else
3596
0
            {
3597
                /* rem intra mode */
3598
0
                bits += 5;
3599
0
            }
3600
3601
0
            bits += ai4_tu_bits[curr_intra_buf_idx];
3602
3603
            /* compute the total cost for current candidate */
3604
0
            curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
3605
3606
            /* get the final ssd cost */
3607
0
            curr_cost +=
3608
0
                COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3609
3610
            /* check of the best candidate cost */
3611
0
            if(curr_cost < best_cand_cost)
3612
0
            {
3613
0
                best_cand_cost = curr_cost;
3614
0
                best_cand_idx = cand_ctr;
3615
0
                best_intra_buf_idx = curr_intra_buf_idx;
3616
0
                curr_intra_buf_idx = !curr_intra_buf_idx;
3617
0
            }
3618
0
        }
3619
3620
        /***************    For TU_EQ_SUBCU case    *****************/
3621
        /* Copy the pred for best cand. to the final pred array     */
3622
        /* Copy the iq-coeff for best cand. to the final array      */
3623
        /* copy the best coeffs data to final buffer                */
3624
0
        if(TU_EQ_SUBCU == func_proc_mode)
3625
0
        {
3626
            /* Copy the pred for best cand. to the final pred array */
3627
3628
0
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3629
0
                (UWORD8 *)pv_pred_org,
3630
0
                pred_strd_org,
3631
0
                &au1_cur_pred_data[best_intra_buf_idx][0],
3632
0
                trans_size,
3633
0
                trans_size,
3634
0
                trans_size);
3635
3636
            /* Copy the deq-coeff for best cand. to the final array */
3637
3638
0
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3639
0
                (UWORD8 *)pi2_deq_data,
3640
0
                deq_data_strd << 1,
3641
0
                (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
3642
0
                trans_size << 1,
3643
0
                trans_size << 1,
3644
0
                trans_size);
3645
            /* copy the coeffs to final cu ecd bytes buffer */
3646
0
            memcpy(
3647
0
                pu1_ecd_data,
3648
0
                &au1_intra_coeffs[best_intra_buf_idx][0],
3649
0
                ai4_curr_bytes[best_intra_buf_idx]);
3650
3651
0
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
3652
0
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3653
0
        }
3654
3655
        /*----------   Calculate Recon for the best INTRA mode     ---------*/
3656
        /* TU_EQ_CU case : No need for recon, otherwise recon is required   */
3657
        /* Compute recon only for the best mode for TU_EQ_SUBCU case        */
3658
0
        if(u1_compute_recon)
3659
0
        {
3660
0
            ihevce_it_recon_fxn(
3661
0
                ps_ctxt,
3662
0
                pi2_deq_data,
3663
0
                deq_data_strd,
3664
0
                (UWORD8 *)pv_pred_org,
3665
0
                pred_strd_org,
3666
0
                pu1_recon,
3667
0
                i4_recon_stride,
3668
0
                pu1_ecd_data,
3669
0
                trans_size,
3670
0
                PRED_MODE_INTRA,
3671
0
                ai4_cbf[best_intra_buf_idx],
3672
0
                ai4_zero_col[best_intra_buf_idx],
3673
0
                ai4_zero_row[best_intra_buf_idx]);
3674
3675
0
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3676
0
        }
3677
0
        else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
3678
0
        {
3679
0
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3680
0
        }
3681
0
        else
3682
0
        {
3683
0
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
3684
0
        }
3685
3686
        /* RDOPT copy States :update to best modes state */
3687
0
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3688
0
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3689
0
            &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
3690
0
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3691
3692
        /* copy the prev,mpm_idx and rem modes from best cand */
3693
0
        ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
3694
3695
        /* update the cabac context of prev intra pred mode flag */
3696
0
        u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
3697
0
            [(u1_prev_flag_cabac_ctxt << 1) |
3698
0
             as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3699
3700
        /* accumulate the TU bits into cu bits */
3701
0
        cu_bits += ai4_tu_bits[best_intra_buf_idx];
3702
3703
        /* copy the intra pred mode for chroma reuse */
3704
0
        if(is_sub_pu_in_hq == 0)
3705
0
        {
3706
0
            *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
3707
0
        }
3708
0
        else
3709
0
        {
3710
0
            *pu1_intra_pred_mode = best_cand_idx;
3711
0
        }
3712
3713
        /* Store luma mode as chroma mode. If chroma prcs happens, and
3714
        if a diff. mode wins, it should update this!! */
3715
0
        if(1 == chrm_present_flag)
3716
0
        {
3717
0
            if(is_sub_pu_in_hq == 0)
3718
0
            {
3719
0
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3720
0
                    ((ps_ctxt->u1_chroma_array_type == 2)
3721
0
                         ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
3722
0
                         : pu1_curr_mode[best_cand_idx]);
3723
0
            }
3724
0
            else
3725
0
            {
3726
0
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3727
0
                    ((ps_ctxt->u1_chroma_array_type == 2)
3728
0
                         ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
3729
0
                         : best_cand_idx);
3730
0
            }
3731
3732
0
            ps_final_prms->u1_chroma_intra_pred_mode = 4;
3733
0
        }
3734
3735
        /*remember the cbf flag to replicate qp for 4x4 neighbour*/
3736
0
        ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
3737
3738
        /*accumulate ssd over all TU of intra CU*/
3739
0
        ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
3740
3741
        /* update the bytes */
3742
0
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3743
0
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
3744
0
            ai4_curr_bytes[best_intra_buf_idx];
3745
        /* update the zero_row and col info for the final mode */
3746
0
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
3747
0
            ai4_zero_col[best_intra_buf_idx];
3748
0
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
3749
0
            ai4_zero_row[best_intra_buf_idx];
3750
3751
0
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3752
3753
        /* update the total bytes cons */
3754
0
        ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
3755
0
        pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
3756
3757
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3758
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
3759
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
3760
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
3761
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
3762
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
3763
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
3764
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
3765
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
3766
0
        GETRANGE(tx_size, trans_size);
3767
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
3768
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
3769
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
3770
3771
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
3772
0
        ps_nbr_4x4->b1_skip_flag = 0;
3773
0
        ps_nbr_4x4->b1_intra_flag = 1;
3774
0
        ps_nbr_4x4->b1_pred_l0_flag = 0;
3775
0
        ps_nbr_4x4->b1_pred_l1_flag = 0;
3776
3777
0
        if(is_sub_pu_in_hq == 0)
3778
0
        {
3779
0
            ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
3780
0
        }
3781
0
        else
3782
0
        {
3783
0
            ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
3784
0
        }
3785
3786
0
        ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3787
3788
        /* since tu size can be less than cusize, replication is done with strd */
3789
0
        {
3790
0
            WORD32 i, j;
3791
0
            nbr_4x4_t *ps_tmp_4x4;
3792
3793
0
            ps_tmp_4x4 = ps_nbr_4x4;
3794
3795
0
            for(i = 0; i < num_4x4_in_tu; i++)
3796
0
            {
3797
0
                for(j = 0; j < num_4x4_in_tu; j++)
3798
0
                {
3799
0
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
3800
0
                }
3801
                /* row level update*/
3802
0
                ps_tmp_4x4 += num_4x4_in_cu;
3803
0
            }
3804
0
        }
3805
3806
0
        if(TU_EQ_SUBCU == func_proc_mode)
3807
0
        {
3808
0
            pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
3809
0
        }
3810
3811
0
        if((num_cu_parts > 1) && (ctr < 3))
3812
0
        {
3813
            /* set the neighbour map to 1 */
3814
0
            ihevce_set_nbr_map(
3815
0
                ps_ctxt->pu1_ctb_nbr_map,
3816
0
                ps_ctxt->i4_nbr_map_strd,
3817
0
                cu_pos_x,
3818
0
                cu_pos_y,
3819
0
                trans_size >> 2,
3820
0
                1);
3821
3822
            /* block level updates block number (1 & 3 )*/
3823
0
            pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
3824
0
            pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
3825
0
            pi2_deq_data += trans_size;
3826
3827
0
            switch(ctr)
3828
0
            {
3829
0
            case 0:
3830
0
            {
3831
0
                pu1_left = pu1_recon + trans_size - 1;
3832
0
                pu1_top += trans_size;
3833
0
                pu1_top_left = pu1_top - 1;
3834
0
                left_strd = i4_recon_stride;
3835
3836
0
                break;
3837
0
            }
3838
0
            case 1:
3839
0
            {
3840
0
                ASSERT(
3841
0
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
3842
0
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
3843
3844
                /* Since the 'lumaRefSubstitution' function expects both Top and */
3845
                /* TopRight recon pixels to be present in the same buffer */
3846
0
                if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
3847
0
                   ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
3848
0
                {
3849
0
                    UWORD8 *pu1_src =
3850
0
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3851
0
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3852
0
                        trans_size;
3853
0
                    UWORD8 *pu1_dst =
3854
0
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3855
0
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3856
0
                        trans_size;
3857
3858
0
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3859
0
                        pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
3860
3861
0
                    ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
3862
0
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
3863
0
                }
3864
3865
0
                pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
3866
0
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3867
0
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3868
0
                          (trans_size - 1) * i4_recon_stride;
3869
0
                pu1_top_left = pu1_left - cu_left_stride;
3870
0
                left_strd = cu_left_stride;
3871
3872
0
                break;
3873
0
            }
3874
0
            case 2:
3875
0
            {
3876
0
                ASSERT(
3877
0
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
3878
0
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
3879
3880
0
                pu1_left = pu1_recon + trans_size - 1;
3881
0
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3882
0
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3883
0
                          (trans_size - 1) * i4_recon_stride + trans_size;
3884
0
                pu1_top_left = pu1_top - 1;
3885
0
                left_strd = i4_recon_stride;
3886
3887
0
                break;
3888
0
            }
3889
0
            }
3890
3891
0
            pu1_csbf_buf += num_4x4_in_tu;
3892
0
            cu_pos_x += num_4x4_in_tu;
3893
0
            ps_nbr_4x4 += num_4x4_in_tu;
3894
0
            ps_top_nbr_4x4 += num_4x4_in_tu;
3895
0
            ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
3896
3897
0
            pu1_intra_pred_mode++;
3898
3899
            /* after 2 blocks increment the pointers to bottom blocks */
3900
0
            if(1 == ctr)
3901
0
            {
3902
0
                pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
3903
0
                pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
3904
3905
0
                pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
3906
0
                pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
3907
0
                pi2_deq_data -= (trans_size << 1);
3908
0
                pi2_deq_data += (trans_size * deq_data_strd);
3909
3910
0
                pu1_csbf_buf -= (num_4x4_in_tu << 1);
3911
0
                pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
3912
3913
0
                ps_nbr_4x4 -= (num_4x4_in_tu << 1);
3914
0
                ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
3915
0
                ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
3916
0
                ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
3917
3918
                /* decrement pos x to start */
3919
0
                cu_pos_x -= (num_4x4_in_tu << 1);
3920
0
                cu_pos_y += num_4x4_in_tu;
3921
0
            }
3922
0
        }
3923
3924
0
#if RDOPT_ENABLE
3925
        /* compute the RDOPT cost for the current TU */
3926
0
        ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
3927
0
            ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3928
0
#endif
3929
3930
        /* accumulate the costs */
3931
0
        total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
3932
3933
0
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
3934
0
        {
3935
            /* Early exit : If the current running cost exceeds
3936
            the prev. best mode cost, break */
3937
0
            if(total_rdopt_cost > prev_best_rdopt_cost)
3938
0
            {
3939
0
                return (total_rdopt_cost);
3940
0
            }
3941
0
        }
3942
3943
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
3944
0
        chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
3945
3946
0
        pu4_nbr_flags++;
3947
0
    }
3948
    /* Modify the cost function for this CU. */
3949
    /* loop in for 8x8 blocks */
3950
0
    if(ps_ctxt->u1_enable_psyRDOPT)
3951
0
    {
3952
0
        UWORD8 *pu1_recon_cu;
3953
0
        WORD32 recon_stride;
3954
0
        WORD32 curr_pos_x;
3955
0
        WORD32 curr_pos_y;
3956
0
        WORD32 start_index;
3957
0
        WORD32 num_horz_cu_in_ctb;
3958
0
        WORD32 cu_size;
3959
0
        WORD32 had_block_size;
3960
3961
        /* tODO: sreenivasa ctb size has to be used appropriately */
3962
0
        had_block_size = 8;
3963
0
        cu_size = ps_cu_analyse->u1_cu_size; /* todo */
3964
0
        num_horz_cu_in_ctb = 64 / had_block_size;
3965
3966
0
        curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
3967
0
        curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
3968
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
3969
0
        pu1_recon_cu =
3970
0
            ((UWORD8 *)ps_final_prms->s_recon_datastore
3971
0
                 .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
3972
        /* + \  curr_pos_x + curr_pos_y * recon_stride; */
3973
3974
        /* start index to index the source satd of curr cu int he current ctb*/
3975
0
        start_index =
3976
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
3977
3978
0
        {
3979
0
            total_rdopt_cost += ihevce_psy_rd_cost(
3980
0
                ps_ctxt->ai4_source_satd_8x8,
3981
0
                pu1_recon_cu,
3982
0
                recon_stride,
3983
0
                1,  //
3984
0
                cu_size,
3985
0
                0,  // pic type
3986
0
                0,  //layer id
3987
0
                ps_ctxt->i4_satd_lamda,  // lambda
3988
0
                start_index,
3989
0
                ps_ctxt->u1_is_input_data_hbd,
3990
0
                ps_ctxt->u4_psy_strength,
3991
0
                &ps_ctxt->s_cmn_opt_func
3992
3993
0
            );  // 8 bit
3994
0
        }
3995
0
    }
3996
3997
#if !FORCE_INTRA_TU_DEPTH_TO_0  //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
3998
0
    if(TU_EQ_SUBCU == func_proc_mode)
3999
0
    {
4000
0
        UWORD8 au1_tu_eq_cu_div2_modes[4];
4001
0
        UWORD8 au1_freq_of_mode[4];
4002
4003
0
        WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
4004
0
            ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
4005
4006
0
        if(1 == i4_num_clusters)
4007
0
        {
4008
0
            ps_final_prms->u2_num_pus_in_cu = 1;
4009
0
            ps_final_prms->u1_part_mode = SIZE_2Nx2N;
4010
0
        }
4011
0
    }
4012
0
#endif
4013
4014
    /* store the num TUs*/
4015
0
    ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
4016
4017
    /* update the bytes consumed */
4018
0
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4019
4020
    /* store the current cu size to final prms */
4021
0
    ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
4022
4023
    /* cu bits will be having luma residual bits till this point    */
4024
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4025
0
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4026
4027
    /* ------------- Chroma processing -------------- */
4028
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4029
0
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4030
0
    {
4031
0
        LWORD64 chrm_rdopt_cost;
4032
0
        WORD32 chrm_rdopt_tu_bits;
4033
4034
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4035
0
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4036
4037
0
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4038
0
            ps_ctxt,
4039
0
            curr_buf_idx,
4040
0
            func_proc_mode,
4041
0
            ps_chrm_cu_buf_prms->pu1_curr_src,
4042
0
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4043
0
            ps_chrm_cu_buf_prms->pu1_cu_left,
4044
0
            ps_chrm_cu_buf_prms->pu1_cu_top,
4045
0
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4046
0
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4047
0
            cu_pos_x_8pelunits,
4048
0
            cu_pos_y_8pelunits,
4049
0
            &chrm_rdopt_tu_bits,
4050
0
            i4_alpha_stim_multiplier,
4051
0
            u1_is_cu_noisy);
4052
4053
0
#if WEIGH_CHROMA_COST
4054
0
        chrm_rdopt_cost = (LWORD64)(
4055
0
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4056
0
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4057
0
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4058
0
#endif
4059
4060
0
#if CHROMA_RDOPT_ENABLE
4061
0
        total_rdopt_cost += chrm_rdopt_cost;
4062
0
#endif
4063
0
        cu_bits += chrm_rdopt_tu_bits;
4064
4065
        /* cu bits for chroma residual if chroma rdopt is on       */
4066
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4067
0
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4068
4069
0
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4070
0
        {
4071
            /* Early exit : If the current running cost exceeds
4072
            the prev. best mode cost, break */
4073
0
            if(total_rdopt_cost > prev_best_rdopt_cost)
4074
0
            {
4075
0
                return (total_rdopt_cost);
4076
0
            }
4077
0
        }
4078
0
    }
4079
0
    else
4080
0
    {}
4081
4082
    /* RDOPT copy States :  Best after all luma TUs to current */
4083
0
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4084
0
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4085
0
                .s_cabac_ctxt.au1_ctxt_models[0] +
4086
0
            IHEVC_CAB_COEFFX_PREFIX,
4087
0
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4088
0
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4089
4090
    /* get the neighbour availability flags for current cu  */
4091
0
    ihevce_get_only_nbr_flag(
4092
0
        &s_nbr,
4093
0
        ps_ctxt->pu1_ctb_nbr_map,
4094
0
        ps_ctxt->i4_nbr_map_strd,
4095
0
        (cu_pos_x_8pelunits << 1),
4096
0
        (cu_pos_y_8pelunits << 1),
4097
0
        (trans_size << 1),
4098
0
        (trans_size << 1));
4099
4100
    /* call the entropy rdo encode to get the bit estimate for current cu */
4101
    /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
4102
0
    {
4103
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4104
0
        WORD32 cbf_bits, header_bits;
4105
4106
0
        header_bits = ihevce_entropy_rdo_encode_cu(
4107
0
            &ps_ctxt->s_rdopt_entropy_ctxt,
4108
0
            ps_final_prms,
4109
0
            cu_pos_x_8pelunits,
4110
0
            cu_pos_y_8pelunits,
4111
0
            ps_cu_analyse->u1_cu_size,
4112
0
            s_nbr.u1_top_avail,
4113
0
            s_nbr.u1_left_avail,
4114
0
            &ps_final_prms->pu1_cu_coeffs[0],
4115
0
            &cbf_bits);
4116
4117
0
        cu_bits += header_bits;
4118
4119
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4120
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4121
0
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4122
0
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4123
4124
0
#if RDOPT_ENABLE
4125
        /* add the cost of coding the cu bits */
4126
0
        total_rdopt_cost +=
4127
0
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4128
0
#endif
4129
0
    }
4130
0
    return (total_rdopt_cost);
4131
0
}
4132
/*!
4133
******************************************************************************
4134
* \if Function name : ihevce_inter_rdopt_cu_ntu \endif
4135
*
4136
* \brief
4137
*    Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
4138
*
4139
* \param[in] ps_ctxt       enc_loop module ctxt pointer
4140
* \param[in] ps_inter_cand pointer to inter candidate structure
4141
* \param[in] pu1_src       pointer to source data buffer
4142
* \param[in] cu_size       Current CU size
4143
* \param[in] cu_pos_x      cu position x w.r.t to ctb
4144
* \param[in] cu_pos_y      cu position y w.r.t to ctb
4145
* \param[in] src_strd      source buffer stride
4146
* \param[in] curr_buf_idx  buffer index for current output storage
4147
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
4148
*
4149
* \return
4150
*    Rdopt cost
4151
*
4152
* \author
4153
*  Ittiam
4154
*
4155
*****************************************************************************
4156
*/
4157
LWORD64 ihevce_inter_rdopt_cu_ntu(
4158
    ihevce_enc_loop_ctxt_t *ps_ctxt,
4159
    enc_loop_cu_prms_t *ps_cu_prms,
4160
    void *pv_src,
4161
    WORD32 cu_size,
4162
    WORD32 cu_pos_x,
4163
    WORD32 cu_pos_y,
4164
    WORD32 curr_buf_idx,
4165
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
4166
    cu_inter_cand_t *ps_inter_cand,
4167
    cu_analyse_t *ps_cu_analyse,
4168
    WORD32 i4_alpha_stim_multiplier)
4169
0
{
4170
0
    enc_loop_cu_final_prms_t *ps_final_prms;
4171
0
    nbr_4x4_t *ps_nbr_4x4;
4172
0
    tu_prms_t s_tu_prms[64 * 4];
4173
0
    tu_prms_t *ps_tu_prms;
4174
4175
0
    WORD32 i4_perform_rdoq;
4176
0
    WORD32 i4_perform_sbh;
4177
0
    WORD32 ai4_tu_split_flags[4];
4178
0
    WORD32 ai4_tu_early_cbf[4];
4179
0
    WORD32 num_split_flags = 1;
4180
0
    WORD32 i;
4181
0
    UWORD8 u1_tu_size;
4182
0
    UWORD8 *pu1_pred;
4183
0
    UWORD8 *pu1_ecd_data;
4184
0
    WORD16 *pi2_deq_data;
4185
0
    UWORD8 *pu1_csbf_buf;
4186
0
    UWORD8 *pu1_tu_sz_sft;
4187
0
    UWORD8 *pu1_tu_posx;
4188
0
    UWORD8 *pu1_tu_posy;
4189
0
    LWORD64 total_rdopt_cost;
4190
0
    WORD32 ctr;
4191
0
    WORD32 chrm_ctr;
4192
0
    WORD32 num_tu_in_cu = 0;
4193
0
    WORD32 pred_stride;
4194
0
    WORD32 recon_stride;
4195
0
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
4196
0
    WORD32 csbf_strd;
4197
0
    WORD32 chrm_present_flag;
4198
0
    WORD32 ecd_data_bytes_cons;
4199
0
    WORD32 num_4x4_in_cu;
4200
0
    WORD32 num_4x4_in_tu;
4201
0
    WORD32 recon_func_mode;
4202
0
    WORD32 cu_bits;
4203
0
    UWORD8 u1_compute_spatial_ssd;
4204
4205
    /* min_trans_size is initialized to some huge number than usual TU sizes */
4206
0
    WORD32 i4_min_trans_size = 256;
4207
    /* Get the RDOPT cost of the best CU mode for early_exit */
4208
0
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
4209
0
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
4210
4211
    /* model for no residue syntax qt root cbf flag */
4212
0
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
4213
4214
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4215
0
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
4216
4217
    /* for skip cases tables are not reqquired */
4218
0
    UWORD8 u1_skip_tu_sz_sft = 0;
4219
0
    UWORD8 u1_skip_tu_posx = 0;
4220
0
    UWORD8 u1_skip_tu_posy = 0;
4221
0
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
4222
4223
    /* get the pointers based on curbuf idx */
4224
0
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
4225
0
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
4226
0
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
4227
0
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
4228
0
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
4229
0
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
4230
4231
0
    pred_stride = ps_inter_cand->i4_pred_data_stride;
4232
0
    recon_stride = cu_size;
4233
0
    pu1_pred = ps_inter_cand->pu1_pred_data;
4234
0
    chrm_ctr = 0;
4235
0
    ecd_data_bytes_cons = 0;
4236
0
    total_rdopt_cost = 0;
4237
0
    num_4x4_in_cu = cu_size >> 2;
4238
0
    recon_func_mode = PRED_MODE_INTER;
4239
0
    cu_bits = 0;
4240
4241
    /* get the 4x4 level postion of current cu */
4242
0
    cu_pos_x = cu_pos_x << 1;
4243
0
    cu_pos_y = cu_pos_y << 1;
4244
4245
    /* default value for cu coded flag */
4246
0
    ps_final_prms->u1_is_cu_coded = 0;
4247
4248
    /*init of ssd of CU accuumulated over all TU*/
4249
0
    ps_final_prms->u4_cu_sad = 0;
4250
4251
    /* populate the coeffs scan idx */
4252
0
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
4253
4254
0
#if ENABLE_INTER_ZCU_COST
4255
    /* reset cu not coded cost */
4256
0
    ps_ctxt->i8_cu_not_coded_cost = 0;
4257
4258
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4259
0
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
4260
0
#endif
4261
4262
0
    if(ps_cu_analyse->u1_cu_size == 64)
4263
0
    {
4264
0
        num_split_flags = 4;
4265
0
        u1_tu_size = 32;
4266
0
    }
4267
0
    else
4268
0
    {
4269
0
        num_split_flags = 1;
4270
0
        u1_tu_size = ps_cu_analyse->u1_cu_size;
4271
0
    }
4272
4273
    /* ckeck for skip mode */
4274
0
    if(1 == ps_final_prms->u1_skip_flag)
4275
0
    {
4276
0
        if(64 == cu_size)
4277
0
        {
4278
            /* TU = CU/2 is set but no trnaform is evaluated  */
4279
0
            num_tu_in_cu = 4;
4280
0
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4281
0
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4282
0
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4283
0
        }
4284
0
        else
4285
0
        {
4286
            /* TU = CU is set but no trnaform is evaluated  */
4287
0
            num_tu_in_cu = 1;
4288
0
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4289
0
            pu1_tu_posx = &u1_skip_tu_posx;
4290
0
            pu1_tu_posy = &u1_skip_tu_posy;
4291
0
        }
4292
4293
0
        recon_func_mode = PRED_MODE_SKIP;
4294
0
    }
4295
    /* check for PU part mode being AMP or No AMP */
4296
0
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
4297
0
    {
4298
0
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
4299
0
        {
4300
            /* TU= CU is evaluated 2Nx2N inter case */
4301
0
            num_tu_in_cu = 1;
4302
0
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4303
0
            pu1_tu_posx = &u1_skip_tu_posx;
4304
0
            pu1_tu_posy = &u1_skip_tu_posy;
4305
0
        }
4306
0
        else
4307
0
        {
4308
            /* currently TU= CU/2 is evaluated for all inter case */
4309
0
            num_tu_in_cu = 4;
4310
0
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4311
0
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4312
0
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4313
0
        }
4314
0
    }
4315
0
    else
4316
0
    {
4317
        /* for AMP cases one level of TU recurssion is done */
4318
        /* based on oreintation of the partitions           */
4319
0
        num_tu_in_cu = 10;
4320
0
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4321
0
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4322
0
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4323
0
    }
4324
4325
0
    ps_tu_prms = &s_tu_prms[0];
4326
0
    num_tu_in_cu = 0;
4327
4328
0
    for(i = 0; i < num_split_flags; i++)
4329
0
    {
4330
0
        WORD32 i4_x_off = 0, i4_y_off = 0;
4331
4332
0
        if(i == 1 || i == 3)
4333
0
        {
4334
0
            i4_x_off = 32;
4335
0
        }
4336
4337
0
        if(i == 2 || i == 3)
4338
0
        {
4339
0
            i4_y_off = 32;
4340
0
        }
4341
4342
0
        if(1 == ps_final_prms->u1_skip_flag)
4343
0
        {
4344
0
            ai4_tu_split_flags[0] = 0;
4345
0
            ps_inter_cand->ai4_tu_split_flag[i] = 0;
4346
4347
0
            ai4_tu_early_cbf[0] = 0;
4348
0
        }
4349
0
        else
4350
0
        {
4351
0
            ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
4352
0
            ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
4353
0
        }
4354
4355
0
        ps_tu_prms->u1_tu_size = u1_tu_size;
4356
4357
0
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
4358
0
            ps_tu_prms,
4359
0
            &num_tu_in_cu,
4360
0
            0,
4361
0
            ai4_tu_split_flags[0],
4362
0
            ai4_tu_early_cbf[0],
4363
0
            i4_x_off,
4364
0
            i4_y_off);
4365
0
    }
4366
4367
    /* loop for all tu blocks in current cu */
4368
0
    ps_tu_prms = &s_tu_prms[0];
4369
0
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4370
0
    {
4371
0
        trans_size = ps_tu_prms->u1_tu_size;
4372
4373
0
        if(i4_min_trans_size > trans_size)
4374
0
        {
4375
0
            i4_min_trans_size = trans_size;
4376
0
        }
4377
0
        ps_tu_prms++;
4378
0
    }
4379
4380
0
    if(ps_ctxt->i1_cu_qp_delta_enable)
4381
0
    {
4382
0
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
4383
0
    }
4384
4385
0
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
4386
0
    {
4387
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
4388
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
4389
0
             100.0f);
4390
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
4391
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
4392
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
4393
0
    }
4394
4395
0
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
4396
0
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
4397
0
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4398
4399
0
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
4400
0
    {
4401
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
4402
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4403
0
    }
4404
4405
0
    if(!u1_compute_spatial_ssd)
4406
0
    {
4407
0
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4408
0
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4409
0
    }
4410
0
    else
4411
0
    {
4412
0
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
4413
0
    }
4414
4415
0
    ps_tu_prms = &s_tu_prms[0];
4416
4417
0
    ASSERT(num_tu_in_cu <= 256);
4418
4419
    /* RDOPT copy States :  TU init (best until prev TU) to current */
4420
0
    memcpy(
4421
0
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4422
0
             .s_cabac_ctxt.au1_ctxt_models[0],
4423
0
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
4424
0
        IHEVC_CAB_COEFFX_PREFIX);
4425
4426
0
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4427
0
    {
4428
0
        WORD32 curr_bytes;
4429
0
        WORD32 tx_size;
4430
0
        WORD32 cbf, zero_col, zero_row;
4431
0
        LWORD64 rdopt_cost;
4432
0
        UWORD8 u1_is_recon_available;
4433
4434
0
        WORD32 curr_pos_x;
4435
0
        WORD32 curr_pos_y;
4436
0
        nbr_4x4_t *ps_cur_nbr_4x4;
4437
0
        UWORD8 *pu1_cur_pred;
4438
0
        UWORD8 *pu1_cur_src;
4439
0
        UWORD8 *pu1_cur_recon;
4440
0
        WORD16 *pi2_cur_deq_data;
4441
0
        UWORD32 u4_tu_sad;
4442
0
        WORD32 tu_bits;
4443
4444
0
        WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4445
4446
0
        trans_size = ps_tu_prms->u1_tu_size;
4447
        /* get the current pos x and pos y in pixels */
4448
0
        curr_pos_x = ps_tu_prms->u1_x_off;  //((cu_size >> 2) * pu1_tu_posx[ctr]);
4449
0
        curr_pos_y = ps_tu_prms->u1_y_off;  //((cu_size >> 2) * pu1_tu_posy[ctr]);
4450
4451
0
        num_4x4_in_tu = trans_size >> 2;
4452
4453
#if FORCE_8x8_TFR
4454
        if(cu_size == 64)
4455
        {
4456
            curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
4457
            curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
4458
        }
4459
#endif
4460
4461
        /* increment the pointers to start of current TU  */
4462
0
        pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
4463
0
        pu1_cur_src += (curr_pos_y * src_strd);
4464
0
        pu1_cur_pred = (pu1_pred + curr_pos_x);
4465
0
        pu1_cur_pred += (curr_pos_y * pred_stride);
4466
0
        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
4467
0
        pi2_cur_deq_data += (curr_pos_y * cu_size);
4468
0
        pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
4469
0
                        curr_pos_x + curr_pos_y * i4_recon_stride;
4470
4471
0
        ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
4472
0
        ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
4473
4474
        /* RDOPT copy States :  TU init (best until prev TU) to current */
4475
0
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4476
0
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4477
0
                    .s_cabac_ctxt.au1_ctxt_models[0] +
4478
0
                IHEVC_CAB_COEFFX_PREFIX,
4479
0
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4480
0
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4481
4482
0
        i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
4483
0
        i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
4484
4485
        /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
4486
        /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
4487
        /* Currently the complete array will contain only single value*/
4488
        /*The rounding factor is calculated with the formula
4489
        Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
4490
        rounding factor = (1 - DeadZone Val)
4491
4492
        Assumption: Cabac states of All the sub-blocks in the TU are considered independent
4493
        */
4494
0
        if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
4495
0
        {
4496
0
            double i4_lamda_modifier;
4497
4498
0
            if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
4499
0
            {
4500
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
4501
0
                                    CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
4502
0
            }
4503
0
            else
4504
0
            {
4505
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
4506
0
            }
4507
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
4508
0
            {
4509
0
                if(ISLICE == ps_ctxt->i1_slice_type)
4510
0
                {
4511
0
                    i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
4512
0
                }
4513
0
                else
4514
0
                {
4515
0
                    i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
4516
0
                }
4517
0
            }
4518
0
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4519
0
                &ps_ctxt->i4_quant_round_tu[0][0];
4520
0
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4521
0
                &ps_ctxt->i4_quant_round_tu[1][0];
4522
4523
0
            memset(
4524
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4525
0
                0,
4526
0
                trans_size * trans_size * sizeof(WORD32));
4527
0
            memset(
4528
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4529
0
                0,
4530
0
                trans_size * trans_size * sizeof(WORD32));
4531
4532
0
            ihevce_quant_rounding_factor_gen(
4533
0
                trans_size,
4534
0
                1,
4535
0
                &ps_ctxt->s_rdopt_entropy_ctxt,
4536
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4537
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4538
0
                i4_lamda_modifier,
4539
0
                1);
4540
0
        }
4541
0
        else
4542
0
        {
4543
0
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4544
0
                ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
4545
0
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4546
0
                ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
4547
0
        }
4548
4549
        /* call T Q IT IQ and recon function */
4550
0
        cbf = ihevce_t_q_iq_ssd_scan_fxn(
4551
0
            ps_ctxt,
4552
0
            pu1_cur_pred,
4553
0
            pred_stride,
4554
0
            pu1_cur_src,
4555
0
            src_strd,
4556
0
            pi2_cur_deq_data,
4557
0
            cu_size,
4558
0
            pu1_cur_recon,
4559
0
            i4_recon_stride,
4560
0
            pu1_ecd_data,
4561
0
            pu1_csbf_buf,
4562
0
            csbf_strd,
4563
0
            trans_size,
4564
0
            recon_func_mode,
4565
0
            &rdopt_cost,
4566
0
            &curr_bytes,
4567
0
            &tu_bits,
4568
0
            &u4_tu_sad,
4569
0
            &zero_col,
4570
0
            &zero_row,
4571
0
            &u1_is_recon_available,
4572
0
            i4_perform_rdoq,
4573
0
            i4_perform_sbh,
4574
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4575
0
            i4_alpha_stim_multiplier,
4576
0
            u1_is_cu_noisy,
4577
0
#endif
4578
0
            u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
4579
0
            ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
4580
4581
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4582
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
4583
        {
4584
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
4585
            rdopt_cost = ihevce_inject_stim_into_distortion(
4586
                pu1_cur_src,
4587
                src_strd,
4588
                pu1_cur_pred,
4589
                pred_stride,
4590
                rdopt_cost,
4591
                i4_alpha_stim_multiplier,
4592
                trans_size,
4593
                0,
4594
                ps_ctxt->u1_enable_psyRDOPT,
4595
                NULL_PLANE);
4596
#else
4597
            if(u1_compute_spatial_ssd && u1_is_recon_available)
4598
            {
4599
                rdopt_cost = ihevce_inject_stim_into_distortion(
4600
                    pu1_cur_src,
4601
                    src_strd,
4602
                    pu1_cur_recon,
4603
                    i4_recon_stride,
4604
                    rdopt_cost,
4605
                    i4_alpha_stim_multiplier,
4606
                    trans_size,
4607
                    0,
4608
                    NULL_PLANE);
4609
            }
4610
            else
4611
            {
4612
                rdopt_cost = ihevce_inject_stim_into_distortion(
4613
                    pu1_cur_src,
4614
                    src_strd,
4615
                    pu1_cur_pred,
4616
                    pred_stride,
4617
                    rdopt_cost,
4618
                    i4_alpha_stim_multiplier,
4619
                    trans_size,
4620
                    0,
4621
                    ps_ctxt->u1_enable_psyRDOPT,
4622
                    NULL_PLANE);
4623
            }
4624
#endif
4625
        }
4626
#endif
4627
4628
0
        if(u1_compute_spatial_ssd && u1_is_recon_available)
4629
0
        {
4630
0
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
4631
0
        }
4632
0
        else
4633
0
        {
4634
0
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
4635
0
        }
4636
4637
        /* accumulate the TU sad into cu sad */
4638
0
        ps_final_prms->u4_cu_sad += u4_tu_sad;
4639
4640
        /* accumulate the TU bits into cu bits */
4641
0
        cu_bits += tu_bits;
4642
4643
        /* inter cu is coded if any of the tu is coded in it */
4644
0
        ps_final_prms->u1_is_cu_coded |= cbf;
4645
4646
        /* call the entropy function to get the bits */
4647
        /* add that to rd opt cost(SSD)              */
4648
4649
        /* update the bytes */
4650
0
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4651
0
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
4652
        /* update the zero_row and col info for the final mode */
4653
0
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
4654
0
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
4655
4656
        /* update the bytes */
4657
0
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4658
4659
        /* update the total bytes cons */
4660
0
        ecd_data_bytes_cons += curr_bytes;
4661
0
        pu1_ecd_data += curr_bytes;
4662
4663
        /* RDOPT copy States :  New updated after curr TU to TU init */
4664
0
        if(0 != cbf)
4665
0
        {
4666
            /* update to new state only if CBF is non zero */
4667
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4668
0
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4669
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4670
0
                        .s_cabac_ctxt.au1_ctxt_models[0] +
4671
0
                    IHEVC_CAB_COEFFX_PREFIX,
4672
0
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4673
0
        }
4674
4675
        /* by default chroma present is set to 1*/
4676
0
        chrm_present_flag = 1;
4677
0
        if(4 == trans_size)
4678
0
        {
4679
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
4680
0
            if(0 != chrm_ctr)
4681
0
            {
4682
0
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
4683
0
            }
4684
4685
            /* increment the chrm ctr unconditionally */
4686
0
            chrm_ctr++;
4687
4688
            /* after ctr reached 4 reset it */
4689
0
            if(4 == chrm_ctr)
4690
0
            {
4691
0
                chrm_ctr = 0;
4692
0
            }
4693
0
        }
4694
4695
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
4696
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
4697
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
4698
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
4699
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
4700
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
4701
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
4702
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
4703
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
4704
0
        GETRANGE(tx_size, trans_size);
4705
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
4706
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
4707
0
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
4708
4709
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
4710
0
        ps_cur_nbr_4x4->b1_y_cbf = cbf;
4711
        /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
4712
0
        ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
4713
4714
        /* Qp and cbf are stored for the all 4x4 in TU */
4715
0
        {
4716
0
            WORD32 i, j;
4717
0
            nbr_4x4_t *ps_tmp_4x4;
4718
0
            ps_tmp_4x4 = ps_cur_nbr_4x4;
4719
4720
0
            for(i = 0; i < num_4x4_in_tu; i++)
4721
0
            {
4722
0
                for(j = 0; j < num_4x4_in_tu; j++)
4723
0
                {
4724
0
                    ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
4725
0
                    ps_tmp_4x4[j].b1_y_cbf = cbf;
4726
0
                }
4727
                /* row level update*/
4728
0
                ps_tmp_4x4 += num_4x4_in_cu;
4729
0
            }
4730
0
        }
4731
4732
0
#if RDOPT_ENABLE
4733
        /* compute the rdopt cost */
4734
0
        rdopt_cost +=
4735
0
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4736
0
#endif
4737
        /* accumulate the costs */
4738
0
        total_rdopt_cost += rdopt_cost;
4739
4740
0
        ps_tu_prms++;
4741
4742
0
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4743
0
        {
4744
            /* Early exit : If the current running cost exceeds
4745
            the prev. best mode cost, break */
4746
0
            if(total_rdopt_cost > prev_best_rdopt_cost)
4747
0
            {
4748
0
                return (total_rdopt_cost);
4749
0
            }
4750
0
        }
4751
0
    }
4752
4753
    /* Modify the cost function for this CU. */
4754
    /* loop in for 8x8 blocks */
4755
0
    if(ps_ctxt->u1_enable_psyRDOPT)
4756
0
    {
4757
0
        UWORD8 *pu1_recon_cu;
4758
0
        WORD32 recon_stride;
4759
0
        WORD32 curr_pos_x;
4760
0
        WORD32 curr_pos_y;
4761
0
        WORD32 start_index;
4762
0
        WORD32 num_horz_cu_in_ctb;
4763
0
        WORD32 had_block_size;
4764
4765
        /* tODO: sreenivasa ctb size has to be used appropriately */
4766
0
        had_block_size = 8;
4767
0
        num_horz_cu_in_ctb = 64 / had_block_size;
4768
4769
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
4770
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
4771
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4772
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
4773
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
4774
        //+ \curr_pos_x + curr_pos_y * recon_stride;
4775
4776
        /* start index to index the source satd of curr cu int he current ctb*/
4777
0
        start_index =
4778
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
4779
4780
0
        {
4781
0
            total_rdopt_cost += ihevce_psy_rd_cost(
4782
0
                ps_ctxt->ai4_source_satd_8x8,
4783
0
                pu1_recon_cu,
4784
0
                recon_stride,
4785
0
                1,  //howz stride
4786
0
                cu_size,
4787
0
                0,  // pic type
4788
0
                0,  //layer id
4789
0
                ps_ctxt->i4_satd_lamda,  // lambda
4790
0
                start_index,
4791
0
                ps_ctxt->u1_is_input_data_hbd,
4792
0
                ps_ctxt->u4_psy_strength,
4793
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
4794
0
        }
4795
0
    }
4796
4797
    /* store the num TUs*/
4798
0
    ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
4799
4800
    /* update the bytes consumed */
4801
0
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4802
4803
    /* store the current cu size to final prms */
4804
0
    ps_final_prms->u1_cu_size = cu_size;
4805
4806
    /* cu bits will be having luma residual bits till this point    */
4807
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4808
0
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4809
4810
    /* ------------- Chroma processing -------------- */
4811
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4812
0
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4813
0
    {
4814
0
        LWORD64 chrm_rdopt_cost;
4815
0
        WORD32 chrm_rdopt_tu_bits;
4816
4817
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4818
0
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4819
4820
0
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4821
0
            ps_ctxt,
4822
0
            curr_buf_idx,
4823
0
            0, /* TU mode : Don't care in Inter patrh */
4824
0
            ps_chrm_cu_buf_prms->pu1_curr_src,
4825
0
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4826
0
            ps_chrm_cu_buf_prms->pu1_cu_left,
4827
0
            ps_chrm_cu_buf_prms->pu1_cu_top,
4828
0
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4829
0
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4830
0
            (cu_pos_x >> 1),
4831
0
            (cu_pos_y >> 1),
4832
0
            &chrm_rdopt_tu_bits,
4833
0
            i4_alpha_stim_multiplier,
4834
0
            u1_is_cu_noisy);
4835
4836
0
#if WEIGH_CHROMA_COST
4837
0
        chrm_rdopt_cost = (LWORD64)(
4838
0
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4839
0
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4840
0
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4841
0
#endif
4842
4843
0
#if CHROMA_RDOPT_ENABLE
4844
0
        total_rdopt_cost += chrm_rdopt_cost;
4845
0
#endif
4846
0
        cu_bits += chrm_rdopt_tu_bits;
4847
4848
        /* during chroma evaluation if skip decision was over written     */
4849
        /* then the current skip candidate is set to a non skip candidate */
4850
0
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
4851
4852
        /* cu bits for chroma residual if chroma rdopt is on       */
4853
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4854
0
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4855
4856
0
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4857
0
        {
4858
            /* Early exit : If the current running cost exceeds
4859
            the prev. best mode cost, break */
4860
0
            if(total_rdopt_cost > prev_best_rdopt_cost)
4861
0
            {
4862
0
                return (total_rdopt_cost);
4863
0
            }
4864
0
        }
4865
0
    }
4866
0
    else
4867
0
    {}
4868
4869
0
#if SHRINK_INTER_TUTREE
4870
    /* ------------- Quadtree TU split  optimization ------------  */
4871
0
    if(ps_final_prms->u1_is_cu_coded)
4872
0
    {
4873
0
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
4874
0
            &ps_final_prms->as_tu_enc_loop[0],
4875
0
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
4876
0
            &ps_final_prms->s_recon_datastore,
4877
0
            num_tu_in_cu,
4878
0
            (ps_ctxt->u1_chroma_array_type == 2));
4879
0
    }
4880
0
#endif
4881
4882
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
4883
0
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4884
0
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4885
0
                .s_cabac_ctxt.au1_ctxt_models[0] +
4886
0
            IHEVC_CAB_COEFFX_PREFIX,
4887
0
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4888
0
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4889
4890
    /* -------- Bit estimate for RD opt -------------- */
4891
0
    {
4892
0
        nbr_avail_flags_t s_nbr;
4893
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4894
0
        WORD32 cbf_bits, header_bits;
4895
4896
        /* get the neighbour availability flags for current cu  */
4897
0
        ihevce_get_only_nbr_flag(
4898
0
            &s_nbr,
4899
0
            ps_ctxt->pu1_ctb_nbr_map,
4900
0
            ps_ctxt->i4_nbr_map_strd,
4901
0
            cu_pos_x,
4902
0
            cu_pos_y,
4903
0
            (cu_size >> 2),
4904
0
            (cu_size >> 2));
4905
4906
        /* call the entropy rdo encode to get the bit estimate for current cu */
4907
0
        header_bits = ihevce_entropy_rdo_encode_cu(
4908
0
            &ps_ctxt->s_rdopt_entropy_ctxt,
4909
0
            ps_final_prms,
4910
0
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
4911
0
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
4912
0
            cu_size,
4913
0
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
4914
0
                                           : s_nbr.u1_top_avail,
4915
0
            s_nbr.u1_left_avail,
4916
0
            &ps_final_prms->pu1_cu_coeffs[0],
4917
0
            &cbf_bits);
4918
4919
0
        cu_bits += header_bits;
4920
4921
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4922
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4923
0
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4924
0
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4925
4926
0
#if RDOPT_ENABLE
4927
        /* add the cost of coding the header bits */
4928
0
        total_rdopt_cost +=
4929
0
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4930
4931
0
#if ENABLE_INTER_ZCU_COST
4932
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
4933
0
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
4934
0
        {
4935
0
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
4936
4937
0
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
4938
0
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
4939
4940
0
            cab_ctxt_t *ps_cab_ctxt =
4941
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
4942
4943
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
4944
0
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
4945
4946
            /* account for coding qt_root_cbf = 0 */
4947
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
4948
0
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
4949
0
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
4950
0
                u4_cu_hdr_bits_q12 = 0;
4951
0
            else
4952
0
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
4953
4954
            /* add the cost of coding the header bits */
4955
0
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
4956
0
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
4957
0
                ps_ctxt->i8_cl_ssd_lambda_qf,
4958
0
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
4959
4960
0
            if(ps_ctxt->u1_enable_psyRDOPT)
4961
0
            {
4962
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
4963
0
            }
4964
4965
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
4966
0
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
4967
0
            {
4968
0
                WORD32 tx_size;
4969
4970
                /* force cu as not coded and update the cost */
4971
0
                ps_final_prms->u1_is_cu_coded = 0;
4972
0
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4973
0
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4974
4975
0
                total_rdopt_cost = i8_cu_not_coded_cost;
4976
4977
                /* reset num TUs to 1 unless cu size id 64 */
4978
0
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
4979
0
                trans_size = (64 == cu_size) ? 32 : cu_size;
4980
0
                GETRANGE(tx_size, trans_size);
4981
4982
                /* reset the bytes consumed */
4983
0
                ps_final_prms->i4_num_bytes_ecd_data = 0;
4984
4985
                /* reset texture related bits and roll back header bits*/
4986
0
                ps_final_prms->u4_cu_cbf_bits = 0;
4987
0
                ps_final_prms->u4_cu_luma_res_bits = 0;
4988
0
                ps_final_prms->u4_cu_chroma_res_bits = 0;
4989
0
                ps_final_prms->u4_cu_hdr_bits =
4990
0
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
4991
4992
                /* update cabac model with qtroot cbf = 0 decision */
4993
0
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
4994
0
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
4995
4996
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
4997
0
                memcpy(
4998
0
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
4999
0
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5000
0
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5001
5002
                /* mark all tus as not coded for final eval */
5003
0
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5004
0
                {
5005
0
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5006
0
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5007
5008
0
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5009
0
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5010
5011
0
                    num_4x4_in_tu = trans_size >> 2;
5012
5013
0
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5014
0
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5015
0
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5016
5017
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5018
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5019
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5020
5021
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5022
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5023
5024
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5025
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5026
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5027
5028
                    /* reset cbf for the all 4x4 in TU */
5029
0
                    {
5030
0
                        WORD32 i, j;
5031
0
                        nbr_4x4_t *ps_tmp_4x4;
5032
0
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5033
5034
0
                        for(i = 0; i < num_4x4_in_tu; i++)
5035
0
                        {
5036
0
                            for(j = 0; j < num_4x4_in_tu; j++)
5037
0
                            {
5038
0
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5039
0
                            }
5040
                            /* row level update*/
5041
0
                            ps_tmp_4x4 += num_4x4_in_cu;
5042
0
                        }
5043
0
                    }
5044
0
                }
5045
0
            }
5046
0
        }
5047
0
#endif /* ENABLE_INTER_ZCU_COST */
5048
5049
0
#endif /* RDOPT_ENABLE */
5050
0
    }
5051
5052
0
    return (total_rdopt_cost);
5053
0
}
5054
5055
#if ENABLE_RDO_BASED_TU_RECURSION
5056
LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
5057
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5058
    enc_loop_cu_prms_t *ps_cu_prms,
5059
    void *pv_src,
5060
    WORD32 cu_size,
5061
    WORD32 cu_pos_x,
5062
    WORD32 cu_pos_y,
5063
    WORD32 curr_buf_idx,
5064
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
5065
    cu_inter_cand_t *ps_inter_cand,
5066
    cu_analyse_t *ps_cu_analyse,
5067
    WORD32 i4_alpha_stim_multiplier)
5068
0
{
5069
0
    tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
5070
0
    buffer_data_for_tu_t s_buffer_data_for_tu;
5071
0
    enc_loop_cu_final_prms_t *ps_final_prms;
5072
0
    nbr_4x4_t *ps_nbr_4x4;
5073
5074
0
    WORD32 num_split_flags = 1;
5075
0
    UWORD8 u1_tu_size;
5076
0
    UWORD8 *pu1_pred;
5077
0
    UWORD8 *pu1_ecd_data;
5078
0
    WORD16 *pi2_deq_data;
5079
0
    UWORD8 *pu1_csbf_buf;
5080
0
    UWORD8 *pu1_tu_sz_sft;
5081
0
    UWORD8 *pu1_tu_posx;
5082
0
    UWORD8 *pu1_tu_posy;
5083
0
    LWORD64 total_rdopt_cost;
5084
0
    WORD32 ctr;
5085
0
    WORD32 chrm_ctr;
5086
0
    WORD32 pred_stride;
5087
0
    WORD32 recon_stride;
5088
0
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
5089
0
    WORD32 csbf_strd;
5090
0
    WORD32 ecd_data_bytes_cons;
5091
0
    WORD32 num_4x4_in_cu;
5092
0
    WORD32 num_4x4_in_tu;
5093
0
    WORD32 recon_func_mode;
5094
0
    WORD32 cu_bits;
5095
0
    UWORD8 u1_compute_spatial_ssd;
5096
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5097
0
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
5098
5099
0
    WORD32 i4_min_trans_size = 256;
5100
0
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
5101
0
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
5102
    /* model for no residue syntax qt root cbf flag */
5103
0
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
5104
0
    UWORD8 u1_skip_tu_sz_sft = 0;
5105
0
    UWORD8 u1_skip_tu_posx = 0;
5106
0
    UWORD8 u1_skip_tu_posy = 0;
5107
0
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
5108
5109
0
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5110
0
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5111
0
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
5112
0
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
5113
0
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
5114
0
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
5115
0
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5116
0
    recon_stride = cu_size;
5117
0
    pu1_pred = ps_inter_cand->pu1_pred_data;
5118
0
    chrm_ctr = 0;
5119
0
    ecd_data_bytes_cons = 0;
5120
0
    total_rdopt_cost = 0;
5121
0
    num_4x4_in_cu = cu_size >> 2;
5122
0
    recon_func_mode = PRED_MODE_INTER;
5123
0
    cu_bits = 0;
5124
5125
    /* get the 4x4 level postion of current cu */
5126
0
    cu_pos_x = cu_pos_x << 1;
5127
0
    cu_pos_y = cu_pos_y << 1;
5128
5129
0
    ps_final_prms->u1_is_cu_coded = 0;
5130
0
    ps_final_prms->u4_cu_sad = 0;
5131
5132
    /* populate the coeffs scan idx */
5133
0
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
5134
5135
0
#if ENABLE_INTER_ZCU_COST
5136
    /* reset cu not coded cost */
5137
0
    ps_ctxt->i8_cu_not_coded_cost = 0;
5138
5139
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5140
0
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
5141
0
#endif
5142
5143
0
    if(ps_cu_analyse->u1_cu_size == 64)
5144
0
    {
5145
0
        num_split_flags = 4;
5146
0
        u1_tu_size = 32;
5147
0
    }
5148
0
    else
5149
0
    {
5150
0
        num_split_flags = 1;
5151
0
        u1_tu_size = ps_cu_analyse->u1_cu_size;
5152
0
    }
5153
5154
0
    if(1 == ps_final_prms->u1_skip_flag)
5155
0
    {
5156
0
        if(64 == cu_size)
5157
0
        {
5158
            /* TU = CU/2 is set but no trnaform is evaluated  */
5159
0
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5160
0
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5161
0
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5162
0
        }
5163
0
        else
5164
0
        {
5165
            /* TU = CU is set but no trnaform is evaluated  */
5166
0
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5167
0
            pu1_tu_posx = &u1_skip_tu_posx;
5168
0
            pu1_tu_posy = &u1_skip_tu_posy;
5169
0
        }
5170
5171
0
        recon_func_mode = PRED_MODE_SKIP;
5172
0
    }
5173
    /* check for PU part mode being AMP or No AMP */
5174
0
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
5175
0
    {
5176
0
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
5177
0
        {
5178
            /* TU= CU is evaluated 2Nx2N inter case */
5179
0
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5180
0
            pu1_tu_posx = &u1_skip_tu_posx;
5181
0
            pu1_tu_posy = &u1_skip_tu_posy;
5182
0
        }
5183
0
        else
5184
0
        {
5185
            /* currently TU= CU/2 is evaluated for all inter case */
5186
0
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5187
0
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5188
0
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5189
0
        }
5190
0
    }
5191
0
    else
5192
0
    {
5193
        /* for AMP cases one level of TU recurssion is done */
5194
        /* based on oreintation of the partitions           */
5195
0
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5196
0
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5197
0
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5198
0
    }
5199
5200
0
    i4_min_trans_size = 4;
5201
5202
0
    if(ps_ctxt->i1_cu_qp_delta_enable)
5203
0
    {
5204
0
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
5205
0
    }
5206
5207
0
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
5208
0
    {
5209
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
5210
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
5211
0
             100.0f);
5212
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
5213
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
5214
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
5215
0
    }
5216
5217
0
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
5218
0
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
5219
0
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5220
5221
0
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
5222
0
    {
5223
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
5224
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5225
0
    }
5226
5227
0
    if(!u1_compute_spatial_ssd)
5228
0
    {
5229
0
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5230
0
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5231
0
    }
5232
0
    else
5233
0
    {
5234
0
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
5235
5236
0
        if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5237
0
        {
5238
0
            ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
5239
0
        }
5240
0
    }
5241
5242
    /* RDOPT copy States :  TU init (best until prev TU) to current */
5243
0
    memcpy(
5244
0
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5245
0
             .s_cabac_ctxt.au1_ctxt_models[0],
5246
0
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
5247
0
        IHEVC_CAB_COEFFX_PREFIX);
5248
5249
0
    ihevce_tu_tree_init(
5250
0
        as_tu_nodes,
5251
0
        cu_size,
5252
0
        (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
5253
0
        ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
5254
0
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5255
0
        ps_ctxt->u1_chroma_array_type == 2);
5256
5257
0
    if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
5258
0
    {
5259
0
        ihevce_tuSplitArray_to_tuTree_mapper(
5260
0
            as_tu_nodes,
5261
0
            ps_inter_cand->ai4_tu_split_flag,
5262
0
            cu_size,
5263
0
            cu_size,
5264
0
            MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
5265
0
            MIN(MAX_TU_SIZE, cu_size),
5266
0
            ps_inter_cand->b1_skip_flag);
5267
0
    }
5268
5269
0
    ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
5270
5271
0
#if ENABLE_INTER_ZCU_COST
5272
0
    ps_ctxt->i8_cu_not_coded_cost = 0;
5273
0
#endif
5274
5275
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
5276
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
5277
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
5278
0
        ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
5279
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
5280
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
5281
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
5282
0
        ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5283
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
5284
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
5285
0
        ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
5286
0
        curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
5287
0
                                                              (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
5288
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
5289
0
        ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
5290
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
5291
0
        ps_chrm_cu_buf_prms->i4_chrm_src_stride;
5292
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
5293
0
        ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
5294
0
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
5295
0
        ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
5296
0
    s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
5297
0
    s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
5298
0
    s_buffer_data_for_tu.pi2_deq_data_chroma =
5299
0
        pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
5300
0
    s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
5301
0
    s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
5302
0
    s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
5303
0
    s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
5304
5305
0
    if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5306
0
    {
5307
0
        UWORD8 i;
5308
5309
0
        UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
5310
5311
0
        for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
5312
0
        {
5313
0
            pu_t *ps_pu;
5314
5315
0
            WORD32 inter_pu_wd;
5316
0
            WORD32 inter_pu_ht;
5317
5318
0
            ps_pu = ps_inter_cand->as_inter_pu + i;
5319
5320
0
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
5321
0
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
5322
0
            inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
5323
0
            ihevce_chroma_inter_pred_pu(
5324
0
                &ps_ctxt->s_mc_ctxt,
5325
0
                ps_pu,
5326
0
                pu1_pred,
5327
0
                s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5328
0
            if(!!ps_inter_cand->b3_part_size)
5329
0
            {
5330
                /* 2Nx__ partion case */
5331
0
                if(inter_pu_wd == cu_size)
5332
0
                {
5333
0
                    pu1_pred +=
5334
0
                        (inter_pu_ht *
5335
0
                         s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5336
0
                }
5337
5338
                /* __x2N partion case */
5339
0
                if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
5340
0
                {
5341
0
                    pu1_pred += inter_pu_wd;
5342
0
                }
5343
0
            }
5344
0
        }
5345
0
    }
5346
5347
#if !ENABLE_TOP_DOWN_TU_RECURSION
5348
    total_rdopt_cost = ihevce_tu_tree_selector(
5349
        ps_ctxt,
5350
        as_tu_nodes,
5351
        &s_buffer_data_for_tu,
5352
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5353
             .s_cabac_ctxt.au1_ctxt_models[0],
5354
        recon_func_mode,
5355
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5356
        i4_alpha_stim_multiplier,
5357
        u1_is_cu_noisy,
5358
#endif
5359
        0,
5360
        ps_ctxt->u1_max_inter_tr_depth,
5361
        ps_inter_cand->b3_part_size,
5362
        u1_compute_spatial_ssd);
5363
#else
5364
0
    total_rdopt_cost = ihevce_topDown_tu_tree_selector(
5365
0
        ps_ctxt,
5366
0
        as_tu_nodes,
5367
0
        &s_buffer_data_for_tu,
5368
0
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5369
0
             .s_cabac_ctxt.au1_ctxt_models[0],
5370
0
        recon_func_mode,
5371
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5372
0
        i4_alpha_stim_multiplier,
5373
0
        u1_is_cu_noisy,
5374
0
#endif
5375
0
        0,
5376
0
        ps_ctxt->u1_max_inter_tr_depth,
5377
0
        ps_inter_cand->b3_part_size,
5378
0
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5379
0
        u1_compute_spatial_ssd);
5380
0
#endif
5381
5382
0
    ps_final_prms->u2_num_tus_in_cu = 0;
5383
0
    ps_final_prms->u4_cu_luma_res_bits = 0;
5384
0
    ps_final_prms->u4_cu_sad = 0;
5385
0
    total_rdopt_cost = 0;
5386
0
    ecd_data_bytes_cons = 0;
5387
0
    cu_bits = 0;
5388
0
#if ENABLE_INTER_ZCU_COST
5389
0
    ps_ctxt->i8_cu_not_coded_cost = 0;
5390
0
#endif
5391
0
    ps_final_prms->u1_is_cu_coded = 0;
5392
0
    ps_final_prms->u1_cu_size = cu_size;
5393
5394
0
    ihevce_tu_selector_debriefer(
5395
0
        as_tu_nodes,
5396
0
        ps_final_prms,
5397
0
        &total_rdopt_cost,
5398
0
#if ENABLE_INTER_ZCU_COST
5399
0
        &ps_ctxt->i8_cu_not_coded_cost,
5400
0
#endif
5401
0
        &ecd_data_bytes_cons,
5402
0
        &cu_bits,
5403
0
        &ps_final_prms->u2_num_tus_in_cu,
5404
0
        ps_ctxt->i4_cu_qp,
5405
0
        cu_pos_x * 4,
5406
0
        cu_pos_y * 4,
5407
0
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5408
0
        (ps_ctxt->u1_chroma_array_type == 2),
5409
0
        POS_TL);
5410
5411
0
    if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5412
0
    {
5413
0
        ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
5414
0
    }
5415
5416
    /* Modify the cost function for this CU. */
5417
    /* loop in for 8x8 blocks */
5418
0
    if(ps_ctxt->u1_enable_psyRDOPT)
5419
0
    {
5420
0
        UWORD8 *pu1_recon_cu;
5421
0
        WORD32 recon_stride;
5422
0
        WORD32 curr_pos_x;
5423
0
        WORD32 curr_pos_y;
5424
0
        WORD32 start_index;
5425
0
        WORD32 num_horz_cu_in_ctb;
5426
0
        WORD32 had_block_size;
5427
5428
        /* tODO: sreenivasa ctb size has to be used appropriately */
5429
0
        had_block_size = 8;
5430
0
        num_horz_cu_in_ctb = 64 / had_block_size;
5431
5432
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
5433
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
5434
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5435
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
5436
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
5437
        //+ \curr_pos_x + curr_pos_y * recon_stride;
5438
5439
        /* start index to index the source satd of curr cu int he current ctb*/
5440
0
        start_index =
5441
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
5442
5443
0
        {
5444
0
            total_rdopt_cost += ihevce_psy_rd_cost(
5445
0
                ps_ctxt->ai4_source_satd_8x8,
5446
0
                pu1_recon_cu,
5447
0
                recon_stride,
5448
0
                1,  //howz stride
5449
0
                cu_size,
5450
0
                0,  // pic type
5451
0
                0,  //layer id
5452
0
                ps_ctxt->i4_satd_lamda,  // lambda
5453
0
                start_index,
5454
0
                ps_ctxt->u1_is_input_data_hbd,
5455
0
                ps_ctxt->u4_psy_strength,
5456
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
5457
0
        }
5458
0
    }
5459
5460
0
    ps_final_prms->u1_chroma_intra_pred_mode = 4;
5461
5462
    /* update the bytes consumed */
5463
0
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
5464
5465
    /* store the current cu size to final prms */
5466
0
    ps_final_prms->u1_cu_size = cu_size;
5467
    /* ------------- Chroma processing -------------- */
5468
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
5469
0
    if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
5470
0
       !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5471
0
    {
5472
0
        LWORD64 chrm_rdopt_cost;
5473
0
        WORD32 chrm_rdopt_tu_bits;
5474
5475
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
5476
0
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
5477
5478
0
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
5479
0
            ps_ctxt,
5480
0
            curr_buf_idx,
5481
0
            0, /* TU mode : Don't care in Inter patrh */
5482
0
            ps_chrm_cu_buf_prms->pu1_curr_src,
5483
0
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
5484
0
            ps_chrm_cu_buf_prms->pu1_cu_left,
5485
0
            ps_chrm_cu_buf_prms->pu1_cu_top,
5486
0
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
5487
0
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
5488
0
            (cu_pos_x >> 1),
5489
0
            (cu_pos_y >> 1),
5490
0
            &chrm_rdopt_tu_bits,
5491
0
            i4_alpha_stim_multiplier,
5492
0
            u1_is_cu_noisy);
5493
5494
0
#if WEIGH_CHROMA_COST
5495
0
        chrm_rdopt_cost = (LWORD64)(
5496
0
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
5497
0
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
5498
0
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
5499
0
#endif
5500
5501
0
#if CHROMA_RDOPT_ENABLE
5502
0
        total_rdopt_cost += chrm_rdopt_cost;
5503
0
#endif
5504
0
        cu_bits += chrm_rdopt_tu_bits;
5505
5506
        /* during chroma evaluation if skip decision was over written     */
5507
        /* then the current skip candidate is set to a non skip candidate */
5508
0
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
5509
5510
        /* cu bits for chroma residual if chroma rdopt is on       */
5511
        /* if zero_cbf eval is disabled then cu bits will be zero  */
5512
0
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
5513
5514
0
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
5515
0
        {
5516
            /* Early exit : If the current running cost exceeds
5517
            the prev. best mode cost, break */
5518
0
            if(total_rdopt_cost > prev_best_rdopt_cost)
5519
0
            {
5520
0
                return (total_rdopt_cost);
5521
0
            }
5522
0
        }
5523
0
    }
5524
0
    else
5525
0
    {}
5526
5527
0
#if SHRINK_INTER_TUTREE
5528
    /* ------------- Quadtree TU split  optimization ------------  */
5529
0
    if(ps_final_prms->u1_is_cu_coded)
5530
0
    {
5531
0
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
5532
0
            &ps_final_prms->as_tu_enc_loop[0],
5533
0
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
5534
0
            &ps_final_prms->s_recon_datastore,
5535
0
            ps_final_prms->u2_num_tus_in_cu,
5536
0
            (ps_ctxt->u1_chroma_array_type == 2));
5537
0
    }
5538
0
#endif
5539
5540
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
5541
0
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
5542
0
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5543
0
                .s_cabac_ctxt.au1_ctxt_models[0] +
5544
0
            IHEVC_CAB_COEFFX_PREFIX,
5545
0
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
5546
0
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
5547
5548
    /* -------- Bit estimate for RD opt -------------- */
5549
0
    {
5550
0
        nbr_avail_flags_t s_nbr;
5551
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
5552
0
        WORD32 cbf_bits, header_bits;
5553
5554
        /* get the neighbour availability flags for current cu  */
5555
0
        ihevce_get_only_nbr_flag(
5556
0
            &s_nbr,
5557
0
            ps_ctxt->pu1_ctb_nbr_map,
5558
0
            ps_ctxt->i4_nbr_map_strd,
5559
0
            cu_pos_x,
5560
0
            cu_pos_y,
5561
0
            (cu_size >> 2),
5562
0
            (cu_size >> 2));
5563
5564
        /* call the entropy rdo encode to get the bit estimate for current cu */
5565
0
        header_bits = ihevce_entropy_rdo_encode_cu(
5566
0
            &ps_ctxt->s_rdopt_entropy_ctxt,
5567
0
            ps_final_prms,
5568
0
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
5569
0
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
5570
0
            cu_size,
5571
0
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
5572
0
                                           : s_nbr.u1_top_avail,
5573
0
            s_nbr.u1_left_avail,
5574
0
            &ps_final_prms->pu1_cu_coeffs[0],
5575
0
            &cbf_bits);
5576
5577
0
        cu_bits += header_bits;
5578
5579
        /* cbf bits are excluded from header bits, instead considered as texture bits */
5580
        /* incase if zero cbf eval is disabled then texture bits gets added here */
5581
0
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
5582
0
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
5583
5584
0
#if RDOPT_ENABLE
5585
        /* add the cost of coding the header bits */
5586
0
        total_rdopt_cost +=
5587
0
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
5588
5589
0
#if ENABLE_INTER_ZCU_COST
5590
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
5591
0
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
5592
0
        {
5593
0
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
5594
5595
0
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
5596
0
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
5597
5598
0
            cab_ctxt_t *ps_cab_ctxt =
5599
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
5600
5601
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
5602
0
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
5603
5604
            /* account for coding qt_root_cbf = 0 */
5605
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
5606
0
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
5607
0
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
5608
0
                u4_cu_hdr_bits_q12 = 0;
5609
0
            else
5610
0
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
5611
5612
            /* add the cost of coding the header bits */
5613
0
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
5614
0
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
5615
0
                ps_ctxt->i8_cl_ssd_lambda_qf,
5616
0
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
5617
5618
0
            if(ps_ctxt->u1_enable_psyRDOPT)
5619
0
            {
5620
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
5621
0
            }
5622
5623
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
5624
0
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
5625
0
            {
5626
0
                WORD32 tx_size;
5627
5628
                /* force cu as not coded and update the cost */
5629
0
                ps_final_prms->u1_is_cu_coded = 0;
5630
0
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5631
0
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5632
5633
0
                total_rdopt_cost = i8_cu_not_coded_cost;
5634
5635
                /* reset num TUs to 1 unless cu size id 64 */
5636
0
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
5637
0
                trans_size = (64 == cu_size) ? 32 : cu_size;
5638
0
                GETRANGE(tx_size, trans_size);
5639
5640
                /* reset the bytes consumed */
5641
0
                ps_final_prms->i4_num_bytes_ecd_data = 0;
5642
5643
                /* reset texture related bits and roll back header bits*/
5644
0
                ps_final_prms->u4_cu_cbf_bits = 0;
5645
0
                ps_final_prms->u4_cu_luma_res_bits = 0;
5646
0
                ps_final_prms->u4_cu_chroma_res_bits = 0;
5647
0
                ps_final_prms->u4_cu_hdr_bits =
5648
0
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
5649
5650
                /* update cabac model with qtroot cbf = 0 decision */
5651
0
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
5652
0
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
5653
5654
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
5655
0
                memcpy(
5656
0
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5657
0
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5658
0
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5659
5660
                /* mark all tus as not coded for final eval */
5661
0
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5662
0
                {
5663
0
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5664
0
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5665
5666
0
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5667
0
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5668
5669
0
                    num_4x4_in_tu = trans_size >> 2;
5670
5671
0
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5672
0
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5673
0
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5674
5675
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5676
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5677
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5678
5679
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5680
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5681
5682
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5683
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5684
0
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5685
5686
                    /* reset cbf for the all 4x4 in TU */
5687
0
                    {
5688
0
                        WORD32 i, j;
5689
0
                        nbr_4x4_t *ps_tmp_4x4;
5690
0
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5691
5692
0
                        for(i = 0; i < num_4x4_in_tu; i++)
5693
0
                        {
5694
0
                            for(j = 0; j < num_4x4_in_tu; j++)
5695
0
                            {
5696
0
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5697
0
                            }
5698
                            /* row level update*/
5699
0
                            ps_tmp_4x4 += num_4x4_in_cu;
5700
0
                        }
5701
0
                    }
5702
0
                }
5703
0
            }
5704
0
        }
5705
0
#endif /* ENABLE_INTER_ZCU_COST */
5706
5707
0
#endif /* RDOPT_ENABLE */
5708
0
    }
5709
5710
0
    return (total_rdopt_cost);
5711
0
}
5712
#endif
5713
5714
/*!
5715
******************************************************************************
5716
* \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
5717
*
5718
* \brief
5719
*    Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
5720
*
5721
* \param[in] ps_ctxt       enc_loop module ctxt pointer
5722
* \param[in] ps_inter_cand pointer to inter candidate structure
5723
* \param[in] cu_size         Current CU size
5724
* \param[in] cu_pos_x        cu position x w.r.t to ctb
5725
* \param[in] cu_pos_y        cu position y w.r.t to ctb
5726
* \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
5727
* \param[in] ps_top_nbr_4x4  top neighbour 4x4 structure pointer
5728
* \param[in] ps_topleft_nbr_4x4  top left neighbour 4x4 structure pointer
5729
* \param[in] nbr_4x4_left_strd  left neighbour 4x4 buffer stride
5730
* \param[in] curr_buf_idx Current Buffer index
5731
*
5732
* \return
5733
*    Rdopt cost
5734
*
5735
* \author
5736
*  Ittiam
5737
*
5738
*****************************************************************************
5739
*/
5740
LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
5741
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5742
    cu_inter_cand_t *ps_inter_cand,
5743
    WORD32 cu_size,
5744
    WORD32 cu_pos_x,
5745
    WORD32 cu_pos_y,
5746
    nbr_4x4_t *ps_left_nbr_4x4,
5747
    nbr_4x4_t *ps_top_nbr_4x4,
5748
    nbr_4x4_t *ps_topleft_nbr_4x4,
5749
    WORD32 nbr_4x4_left_strd,
5750
    WORD32 curr_buf_idx)
5751
0
{
5752
    /* local variables */
5753
0
    enc_loop_cu_final_prms_t *ps_final_prms;
5754
0
    nbr_avail_flags_t s_nbr;
5755
0
    nbr_4x4_t *ps_nbr_4x4;
5756
5757
0
    UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
5758
0
    UWORD8 *pu1_pred;
5759
0
    WORD32 rdopt_cost;
5760
0
    WORD32 ctr;
5761
0
    WORD32 num_cu_part;
5762
0
    WORD32 inter_pu_wd;
5763
0
    WORD32 inter_pu_ht;
5764
0
    WORD32 pred_stride;
5765
5766
    /* get the pointers based on curbuf idx */
5767
0
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5768
0
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5769
0
    pu1_pred = ps_inter_cand->pu1_pred_data;
5770
5771
0
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5772
5773
    /* store the partition mode in final prms */
5774
0
    ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
5775
5776
    /* since encoder does not support NXN part type */
5777
    /* num parts can be either 1 or 2 only          */
5778
0
    ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
5779
5780
0
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
5781
5782
    /* get the 4x4 level position of current cu */
5783
0
    cu_pos_x = cu_pos_x << 1;
5784
0
    cu_pos_y = cu_pos_y << 1;
5785
5786
    /* populate cu level params */
5787
0
    ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
5788
0
    ps_final_prms->u2_num_pus_in_cu = num_cu_part;
5789
5790
    /* run a loop over all the partitons in cu */
5791
0
    for(ctr = 0; ctr < num_cu_part; ctr++)
5792
0
    {
5793
0
        pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
5794
0
        pu_t *ps_pu;
5795
0
        WORD32 skip_or_merge_flag;
5796
0
        UWORD8 u1_use_mvp_from_top_row;
5797
5798
0
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
5799
5800
        /* IF AMP then each partitions can have diff wd ht */
5801
0
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
5802
0
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
5803
5804
        /* populate reference pic buf id for bs compute */
5805
5806
        /* L0 */
5807
0
        if(-1 != ps_pu->mv.i1_l0_ref_idx)
5808
0
        {
5809
0
            ps_pu->mv.i1_l0_ref_pic_buf_id =
5810
0
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
5811
0
        }
5812
5813
        /* L1 */
5814
0
        if(-1 != ps_pu->mv.i1_l1_ref_idx)
5815
0
        {
5816
0
            ps_pu->mv.i1_l1_ref_pic_buf_id =
5817
0
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
5818
0
        }
5819
5820
        /* SKIP or merge check for every part */
5821
0
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
5822
5823
        /* ----------- MV Prediction ----------------- */
5824
0
        if(0 == skip_or_merge_flag)
5825
0
        {
5826
            /* get the neighbour availability flags */
5827
0
            ihevce_get_only_nbr_flag(
5828
0
                &s_nbr,
5829
0
                ps_ctxt->pu1_ctb_nbr_map,
5830
0
                ps_ctxt->i4_nbr_map_strd,
5831
0
                cu_pos_x,
5832
0
                cu_pos_y,
5833
0
                inter_pu_wd >> 2,
5834
0
                inter_pu_ht >> 2);
5835
5836
0
            if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
5837
0
            {
5838
0
                u1_use_mvp_from_top_row = 0;
5839
0
            }
5840
0
            else
5841
0
            {
5842
0
                u1_use_mvp_from_top_row = 1;
5843
0
            }
5844
5845
0
            if(!u1_use_mvp_from_top_row)
5846
0
            {
5847
0
                if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
5848
0
                {
5849
0
                    if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
5850
0
                    {
5851
0
                        WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
5852
5853
                        /* Ensure Top Right Sync */
5854
0
                        if(!ps_ctxt->u1_use_top_at_ctb_boundary)
5855
0
                        {
5856
0
                            curr_cu_pos_in_row =
5857
0
                                ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
5858
5859
0
                            if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
5860
0
                            {
5861
                                /* No wait for 1st row */
5862
0
                                cu_top_right_offset = -(MAX_CTB_SIZE);
5863
0
                                {
5864
0
                                    ihevce_tile_params_t *ps_col_tile_params =
5865
0
                                        ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
5866
0
                                         ps_ctxt->i4_tile_col_idx);
5867
5868
                                    /* No wait for 1st row */
5869
0
                                    cu_top_right_offset =
5870
0
                                        -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
5871
0
                                }
5872
0
                                cu_top_right_dep_pos = 0;
5873
0
                            }
5874
0
                            else
5875
0
                            {
5876
0
                                cu_top_right_offset = (cu_size) + 4;
5877
0
                                cu_top_right_dep_pos =
5878
0
                                    (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
5879
0
                            }
5880
5881
0
                            ihevce_dmgr_chk_row_row_sync(
5882
0
                                ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
5883
0
                                curr_cu_pos_in_row,
5884
0
                                cu_top_right_offset,
5885
0
                                cu_top_right_dep_pos,
5886
0
                                ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
5887
0
                                ps_ctxt->thrd_id);
5888
0
                        }
5889
5890
0
                        u1_use_mvp_from_top_row = 1;
5891
0
                    }
5892
0
                    else
5893
0
                    {
5894
0
                        s_nbr.u1_top_avail = 0;
5895
0
                        s_nbr.u1_top_lt_avail = 0;
5896
0
                        s_nbr.u1_top_rt_avail = 0;
5897
0
                    }
5898
0
                }
5899
0
                else
5900
0
                {
5901
0
                    u1_use_mvp_from_top_row = 1;
5902
0
                }
5903
0
            }
5904
            /* Call the MV prediction module to get MVP */
5905
0
            ihevce_mv_pred(
5906
0
                &ps_ctxt->s_mv_pred_ctxt,
5907
0
                ps_top_nbr_4x4,
5908
0
                ps_left_nbr_4x4,
5909
0
                ps_topleft_nbr_4x4,
5910
0
                nbr_4x4_left_strd,
5911
0
                &s_nbr,
5912
0
                NULL, /* colocated MV */
5913
0
                ps_pu,
5914
0
                &as_pred_mv[0],
5915
0
                au1_is_top_used);
5916
0
        }
5917
5918
        /* store the nbr 4x4 structure */
5919
0
        ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
5920
0
        ps_nbr_4x4->b1_intra_flag = 0;
5921
0
        ps_nbr_4x4->b1_pred_l0_flag = 0;
5922
0
        ps_nbr_4x4->b1_pred_l1_flag = 0;
5923
5924
        /* DC is default mode for inter cu, required for intra mode signalling */
5925
0
        ps_nbr_4x4->b6_luma_intra_mode = 1;
5926
5927
        /* copy the motion vectors to neighbour structure */
5928
0
        ps_nbr_4x4->mv = ps_pu->mv;
5929
5930
        /* copy the PU to final out pu */
5931
0
        ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
5932
5933
        /* copy the PU to chroma */
5934
0
        ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
5935
5936
        /* store the skip flag to final prms */
5937
0
        ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
5938
5939
        /* MVP index & MVD calc is gated on skip/merge flag */
5940
0
        if(0 == skip_or_merge_flag)
5941
0
        {
5942
            /* calculate the MVDs and popluate the MVP idx for L0 */
5943
0
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
5944
0
            {
5945
0
                WORD32 idx0_cost, idx1_cost;
5946
5947
                /* calculate the ABS mvd for cand 0 */
5948
0
                idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
5949
0
                idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
5950
5951
                /* calculate the ABS mvd for cand 1 */
5952
0
                if(u1_use_mvp_from_top_row)
5953
0
                {
5954
0
                    idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
5955
0
                    idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
5956
0
                }
5957
0
                else
5958
0
                {
5959
0
                    idx1_cost = INT_MAX;
5960
0
                }
5961
5962
                /* based on the least cost choose the mvp idx */
5963
0
                if(idx0_cost <= idx1_cost)
5964
0
                {
5965
0
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5966
0
                        as_pred_mv[0].s_l0_mv.i2_mvx;
5967
0
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5968
0
                        as_pred_mv[0].s_l0_mv.i2_mvy;
5969
5970
0
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
5971
0
                }
5972
0
                else
5973
0
                {
5974
0
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5975
0
                        as_pred_mv[1].s_l0_mv.i2_mvx;
5976
0
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5977
0
                        as_pred_mv[1].s_l0_mv.i2_mvy;
5978
5979
0
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
5980
0
                }
5981
5982
                /* set the pred l0 flag for neighbour storage */
5983
0
                ps_nbr_4x4->b1_pred_l0_flag = 1;
5984
0
            }
5985
            /* calculate the MVDs and popluate the MVP idx for L1 */
5986
0
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
5987
0
            {
5988
0
                WORD32 idx0_cost, idx1_cost;
5989
5990
                /* calculate the ABS mvd for cand 0 */
5991
0
                idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
5992
0
                idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
5993
5994
                /* calculate the ABS mvd for cand 1 */
5995
0
                if(u1_use_mvp_from_top_row)
5996
0
                {
5997
0
                    idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
5998
0
                    idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
5999
0
                }
6000
0
                else
6001
0
                {
6002
0
                    idx1_cost = INT_MAX;
6003
0
                }
6004
6005
                /* based on the least cost choose the mvp idx */
6006
0
                if(idx0_cost <= idx1_cost)
6007
0
                {
6008
0
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6009
0
                        as_pred_mv[0].s_l1_mv.i2_mvx;
6010
0
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6011
0
                        as_pred_mv[0].s_l1_mv.i2_mvy;
6012
6013
0
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
6014
0
                }
6015
0
                else
6016
0
                {
6017
0
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6018
0
                        as_pred_mv[1].s_l1_mv.i2_mvx;
6019
0
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6020
0
                        as_pred_mv[1].s_l1_mv.i2_mvy;
6021
6022
0
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
6023
0
                }
6024
6025
                /* set the pred l1 flag for neighbour storage */
6026
0
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6027
0
            }
6028
6029
            /* set the merge flag to 0 */
6030
0
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
6031
0
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
6032
0
        }
6033
0
        else
6034
0
        {
6035
            /* copy the merge index from candidate */
6036
0
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
6037
6038
0
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
6039
6040
0
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
6041
0
            {
6042
                /* set the pred l0 flag for neighbour storage */
6043
0
                ps_nbr_4x4->b1_pred_l0_flag = 1;
6044
0
            }
6045
6046
            /* calculate the MVDs and popluate the MVP idx for L1 */
6047
0
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
6048
0
            {
6049
                /* set the pred l1 flag for neighbour storage */
6050
0
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6051
0
            }
6052
0
        }
6053
6054
        /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
6055
0
        rdopt_cost = 0;
6056
6057
        /* copy the MV to colocated Mv structure */
6058
0
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
6059
0
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
6060
0
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
6061
0
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
6062
0
        ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
6063
0
        ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
6064
6065
        /* replicate neighbour 4x4 strcuture for entire partition */
6066
0
        {
6067
0
            WORD32 i, j;
6068
0
            nbr_4x4_t *ps_tmp_4x4;
6069
6070
0
            ps_tmp_4x4 = ps_nbr_4x4;
6071
6072
0
            for(i = 0; i < (inter_pu_ht >> 2); i++)
6073
0
            {
6074
0
                for(j = 0; j < (inter_pu_wd >> 2); j++)
6075
0
                {
6076
0
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
6077
0
                }
6078
                /* row level update*/
6079
0
                ps_tmp_4x4 += (cu_size >> 2);
6080
0
            }
6081
0
        }
6082
        /* set the neighbour map to 1 */
6083
0
        ihevce_set_inter_nbr_map(
6084
0
            ps_ctxt->pu1_ctb_nbr_map,
6085
0
            ps_ctxt->i4_nbr_map_strd,
6086
0
            cu_pos_x,
6087
0
            cu_pos_y,
6088
0
            (inter_pu_wd >> 2),
6089
0
            (inter_pu_ht >> 2),
6090
0
            1);
6091
        /* ----------- Motion Compensation for Luma ----------- */
6092
#if !ENABLE_MIXED_INTER_MODE_EVAL
6093
        {
6094
            IV_API_CALL_STATUS_T valid_mv_cand;
6095
6096
            /*If the inter candidate is neither merge cand nor skip cand
6097
            then calculate the mc.*/
6098
            if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
6099
            {
6100
                valid_mv_cand =
6101
                    ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
6102
6103
                /* assert if the MC is given a valid mv candidate */
6104
                ASSERT(valid_mv_cand == IV_SUCCESS);
6105
            }
6106
        }
6107
#endif
6108
0
        if((2 == num_cu_part) && (0 == ctr))
6109
0
        {
6110
            /* 2Nx__ partion case */
6111
0
            if(inter_pu_wd == cu_size)
6112
0
            {
6113
0
                cu_pos_y += (inter_pu_ht >> 2);
6114
0
                pu1_pred += (inter_pu_ht * pred_stride);
6115
0
                ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
6116
0
                ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
6117
0
                ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
6118
0
                ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
6119
0
            }
6120
6121
            /* __x2N partion case */
6122
0
            if(inter_pu_ht == cu_size)
6123
0
            {
6124
0
                cu_pos_x += (inter_pu_wd >> 2);
6125
0
                pu1_pred += inter_pu_wd;
6126
0
                ps_nbr_4x4 += (inter_pu_wd >> 2);
6127
0
                ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
6128
0
                ps_top_nbr_4x4 += (inter_pu_wd >> 2);
6129
0
                ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
6130
0
                nbr_4x4_left_strd = (cu_size >> 2);
6131
0
            }
6132
0
        }
6133
0
    }
6134
6135
0
    return (rdopt_cost);
6136
0
}
6137
6138
/*!
6139
******************************************************************************
6140
* \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
6141
*
6142
* \brief
6143
*    Coding unit processing function for chroma special modes (Non-Luma modes)
6144
*
6145
* \param[in] ps_ctxt       enc_loop module ctxt pointer
6146
* \param[in] ps_chrm_cu_buf_prms    ctxt having chroma related prms
6147
* \param[in] ps_cu_analyse      pointer to cu analyse
6148
* \param[in] rd_opt_curr_idx    index in the array of RDopt params
6149
* \param[in] tu_mode            TU_EQ_CU or other case
6150
*
6151
* \return
6152
*    Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
6153
*
6154
* \author
6155
*  Ittiam
6156
*
6157
*****************************************************************************
6158
*/
6159
UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
6160
    cu_analyse_t *ps_cu_analyse,
6161
    ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
6162
    pf_intra_pred *ppf_chroma_ip,
6163
    pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
6164
    UWORD8 *pu1_src,
6165
    WORD32 i4_src_stride,
6166
    UWORD8 *pu1_pred,
6167
    WORD32 i4_pred_stride,
6168
    UWORD8 *pu1_ctb_nbr_map,
6169
    WORD32 i4_nbr_map_strd,
6170
    UWORD8 *pu1_ref_sub_out,
6171
    WORD32 i4_alpha_stim_multiplier,
6172
    UWORD8 u1_is_cu_noisy,
6173
    UWORD8 u1_trans_size,
6174
    UWORD8 u1_trans_idx,
6175
    UWORD8 u1_num_tus_in_cu,
6176
    UWORD8 u1_num_4x4_luma_blks_in_tu,
6177
    UWORD8 u1_enable_psyRDOPT,
6178
    UWORD8 u1_is_422)
6179
0
{
6180
0
    UWORD8 u1_chrm_mode;
6181
0
    UWORD8 ctr;
6182
0
    WORD32 i4_subtu_idx;
6183
6184
0
    WORD32 i = 0;
6185
0
    UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
6186
0
    WORD32 i4_satd_had[4] = { 0 };
6187
0
    WORD32 i4_best_satd_had = INT_MAX;
6188
0
    UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6189
0
    UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6190
0
    WORD32 i4_num_sub_tus = u1_is_422 + 1;
6191
0
    UWORD8 u1_best_chrm_mode = 0;
6192
6193
    /* Get the best satd among all possible modes */
6194
0
    for(i = 0; i < 4; i++)
6195
0
    {
6196
0
        WORD32 left_strd = i4_src_stride;
6197
6198
0
        u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
6199
0
                                        : u1_chrm_modes[i];
6200
6201
        /* loop based on num tus in a cu */
6202
0
        for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
6203
0
        {
6204
0
            WORD32 luma_nbr_flags;
6205
0
            WORD32 chrm_pred_func_idx;
6206
6207
0
            WORD32 i4_trans_size_m2 = u1_trans_size << 1;
6208
0
            UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
6209
0
                                 (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
6210
0
            UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
6211
0
                                  (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
6212
0
            WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
6213
0
            WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
6214
6215
0
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6216
0
                pu1_ctb_nbr_map,
6217
0
                i4_nbr_map_strd,
6218
0
                i4_curr_tu_pos_x,
6219
0
                i4_curr_tu_pos_y,
6220
0
                u1_num_4x4_luma_blks_in_tu,
6221
0
                u1_num_4x4_luma_blks_in_tu);
6222
6223
0
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6224
0
            {
6225
0
                WORD32 nbr_flags;
6226
6227
0
                UWORD8 *pu1_cur_src =
6228
0
                    pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
6229
0
                UWORD8 *pu1_cur_pred =
6230
0
                    pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
6231
0
                UWORD8 *pu1_left = pu1_cur_src - 2;
6232
0
                UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
6233
0
                UWORD8 *pu1_top_left = pu1_top - 2;
6234
6235
0
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6236
0
                    luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
6237
6238
                /* call the chroma reference array substitution */
6239
0
                pf_ref_substitution(
6240
0
                    pu1_top_left,
6241
0
                    pu1_top,
6242
0
                    pu1_left,
6243
0
                    left_strd,
6244
0
                    u1_trans_size,
6245
0
                    nbr_flags,
6246
0
                    pu1_ref_sub_out,
6247
0
                    1);
6248
6249
                /* use the look up to get the function idx */
6250
0
                chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
6251
6252
                /* call the intra prediction function */
6253
0
                ppf_chroma_ip[chrm_pred_func_idx](
6254
0
                    pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
6255
6256
0
                if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
6257
0
                {
6258
                    /* compute Hadamard-transform satd : Cb */
6259
0
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6260
0
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6261
6262
                    /* compute Hadamard-transform satd : Cr */
6263
0
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6264
0
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6265
0
                }
6266
0
                else
6267
0
                {
6268
0
                    WORD32 i4_satd;
6269
6270
                    /* compute Hadamard-transform satd : Cb */
6271
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6272
0
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6273
6274
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6275
0
                        pu1_cur_src,
6276
0
                        i4_src_stride,
6277
0
                        pu1_cur_pred,
6278
0
                        i4_pred_stride,
6279
0
                        i4_satd,
6280
0
                        i4_alpha_stim_multiplier,
6281
0
                        u1_trans_size,
6282
0
                        0,
6283
0
                        u1_enable_psyRDOPT,
6284
0
                        U_PLANE);
6285
6286
0
                    i4_satd_had[i] += i4_satd;
6287
6288
                    /* compute Hadamard-transform satd : Cr */
6289
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6290
0
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6291
6292
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6293
0
                        pu1_cur_src,
6294
0
                        i4_src_stride,
6295
0
                        pu1_cur_pred,
6296
0
                        i4_pred_stride,
6297
0
                        i4_satd,
6298
0
                        i4_alpha_stim_multiplier,
6299
0
                        u1_trans_size,
6300
0
                        0,
6301
0
                        u1_enable_psyRDOPT,
6302
0
                        V_PLANE);
6303
6304
0
                    i4_satd_had[i] += i4_satd;
6305
0
                }
6306
0
            }
6307
6308
            /* set the neighbour map to 1 */
6309
0
            ihevce_set_nbr_map(
6310
0
                pu1_ctb_nbr_map,
6311
0
                i4_nbr_map_strd,
6312
0
                i4_curr_tu_pos_x,
6313
0
                i4_curr_tu_pos_y,
6314
0
                u1_num_4x4_luma_blks_in_tu,
6315
0
                1);
6316
0
        }
6317
6318
        /* set the neighbour map to 0 */
6319
0
        ihevce_set_nbr_map(
6320
0
            pu1_ctb_nbr_map,
6321
0
            i4_nbr_map_strd,
6322
0
            (ps_cu_analyse->b3_cu_pos_x << 1),
6323
0
            (ps_cu_analyse->b3_cu_pos_y << 1),
6324
0
            (ps_cu_analyse->u1_cu_size >> 2),
6325
0
            0);
6326
6327
        /* Get the least SATD and corresponding mode */
6328
0
        if(i4_best_satd_had > i4_satd_had[i])
6329
0
        {
6330
0
            i4_best_satd_had = i4_satd_had[i];
6331
0
            u1_best_chrm_mode = u1_chrm_mode;
6332
0
        }
6333
0
    }
6334
6335
0
    return u1_best_chrm_mode;
6336
0
}
6337
6338
void ihevce_intra_chroma_pred_mode_selector(
6339
    ihevce_enc_loop_ctxt_t *ps_ctxt,
6340
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
6341
    cu_analyse_t *ps_cu_analyse,
6342
    WORD32 rd_opt_curr_idx,
6343
    WORD32 tu_mode,
6344
    WORD32 i4_alpha_stim_multiplier,
6345
    UWORD8 u1_is_cu_noisy)
6346
0
{
6347
0
    chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
6348
6349
0
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
6350
6351
0
    UWORD8 *pu1_pred;
6352
0
    WORD32 trans_size;
6353
0
    WORD32 num_tus_in_cu;
6354
0
    WORD32 pred_strd;
6355
0
    WORD32 ctr;
6356
0
    WORD32 i4_subtu_idx;
6357
0
    WORD32 i4_num_sub_tus;
6358
0
    WORD32 trans_idx;
6359
0
    WORD32 scan_idx;
6360
0
    WORD32 num_4x4_luma_in_tu;
6361
0
    WORD32 cu_pos_x;
6362
0
    WORD32 cu_pos_y;
6363
6364
0
    recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
6365
0
                                                  &ps_ctxt->as_cu_prms[1].s_recon_datastore };
6366
6367
0
    LWORD64 chrm_cod_cost = 0;
6368
0
    WORD32 chrm_tu_bits = 0;
6369
0
    WORD32 best_chrm_mode = DM_CHROMA_IDX;
6370
0
    UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
6371
0
    WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
6372
0
    UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
6373
0
    UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
6374
0
    UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
6375
0
    WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
6376
0
    WORD32 cu_size = ps_cu_analyse->u1_cu_size;
6377
0
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
6378
0
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
6379
0
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
6380
6381
0
    ihevc_intra_pred_chroma_ref_substitution_fptr =
6382
0
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
6383
0
    i4_num_sub_tus = (u1_is_422 == 1) + 1;
6384
6385
#if DISABLE_RDOQ_INTRA
6386
    i4_perform_rdoq = 0;
6387
#endif
6388
6389
0
    if(TU_EQ_CU == tu_mode)
6390
0
    {
6391
0
        num_tus_in_cu = 1;
6392
0
        trans_size = cu_size >> 1;
6393
0
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6394
0
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6395
0
    }
6396
0
    else
6397
0
    {
6398
0
        num_tus_in_cu = 4;
6399
0
        trans_size = cu_size >> 2;
6400
0
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6401
6402
        /* For 8x8 CU only one TU */
6403
0
        if(MIN_TU_SIZE > trans_size)
6404
0
        {
6405
0
            trans_size = MIN_TU_SIZE;
6406
0
            num_tus_in_cu = 1;
6407
            /* chroma nbr avail. is derived based on luma.
6408
            for 4x4 chrm use 8x8 luma's size */
6409
0
            num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
6410
0
        }
6411
6412
0
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6413
0
    }
6414
6415
    /* Can't be TU_EQ_SUBCU case */
6416
0
    ASSERT(TU_EQ_SUBCU != tu_mode);
6417
6418
    /* translate the transform size to index */
6419
0
    trans_idx = trans_size >> 2;
6420
6421
0
    pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
6422
6423
0
    pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
6424
6425
    /* for 16x16 cases */
6426
0
    if(16 == trans_size)
6427
0
    {
6428
0
        trans_idx = 3;
6429
0
    }
6430
6431
0
    best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
6432
0
        ps_cu_analyse,
6433
0
        ihevc_intra_pred_chroma_ref_substitution_fptr,
6434
0
        ps_ctxt->apf_chrm_ip,
6435
0
        ps_ctxt->apf_chrm_resd_trns_had,
6436
0
        pu1_chrm_src,
6437
0
        chrm_src_stride,
6438
0
        pu1_pred,
6439
0
        pred_strd,
6440
0
        ps_ctxt->pu1_ctb_nbr_map,
6441
0
        ps_ctxt->i4_nbr_map_strd,
6442
0
        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6443
0
        i4_alpha_stim_multiplier,
6444
0
        u1_is_cu_noisy,
6445
0
        trans_size,
6446
0
        trans_idx,
6447
0
        num_tus_in_cu,
6448
0
        num_4x4_luma_in_tu,
6449
0
        ps_ctxt->u1_enable_psyRDOPT,
6450
0
        u1_is_422);
6451
6452
    /* Store the best chroma mode */
6453
0
    ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
6454
6455
    /* evaluate RDOPT cost for the Best mode */
6456
0
    {
6457
0
        WORD32 i4_subtu_pos_x;
6458
0
        WORD32 i4_subtu_pos_y;
6459
0
        UWORD8 u1_compute_spatial_ssd;
6460
6461
0
        WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
6462
0
        WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
6463
        /* State for prefix bin of chroma intra pred mode before CU encode */
6464
0
        UWORD8 u1_chroma_intra_mode_prefix_state =
6465
0
            ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
6466
0
        WORD32 luma_trans_size = trans_size << 1;
6467
0
        WORD32 calc_recon = 0;
6468
0
        UWORD8 *pu1_left = pu1_cu_left;
6469
0
        UWORD8 *pu1_top = pu1_cu_top;
6470
0
        UWORD8 *pu1_top_left = pu1_cu_top_left;
6471
0
        WORD32 left_strd = cu_left_stride;
6472
6473
0
        if(ps_ctxt->i1_cu_qp_delta_enable)
6474
0
        {
6475
0
            ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1);
6476
0
        }
6477
6478
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
6479
0
                                 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
6480
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6481
6482
0
        if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
6483
0
        {
6484
0
            u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
6485
0
                                     CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6486
0
        }
6487
6488
        /* get the 4x4 level postion of current cu */
6489
0
        cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6490
0
        cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6491
6492
0
        calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
6493
6494
0
        if(calc_recon || u1_compute_spatial_ssd)
6495
0
        {
6496
0
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6497
0
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6498
0
        }
6499
0
        else
6500
0
        {
6501
0
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6502
0
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6503
0
        }
6504
6505
        /* loop based on num tus in a cu */
6506
0
        for(ctr = 0; ctr < num_tus_in_cu; ctr++)
6507
0
        {
6508
0
            WORD16 *pi2_cur_deq_data_cb;
6509
0
            WORD16 *pi2_cur_deq_data_cr;
6510
6511
0
            WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
6512
0
            WORD32 luma_nbr_flags = 0;
6513
6514
0
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6515
0
                ps_ctxt->pu1_ctb_nbr_map,
6516
0
                ps_ctxt->i4_nbr_map_strd,
6517
0
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6518
0
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6519
0
                (luma_trans_size >> 2),
6520
0
                (luma_trans_size >> 2));
6521
6522
0
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6523
0
            {
6524
0
                WORD32 cbf, num_bytes;
6525
0
                LWORD64 trans_ssd_u, trans_ssd_v;
6526
0
                UWORD8 u1_is_recon_available;
6527
6528
0
                WORD32 trans_size_m2 = trans_size << 1;
6529
0
                UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
6530
0
                                      (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
6531
0
                                      (i4_subtu_idx * trans_size * chrm_src_stride);
6532
0
                UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
6533
0
                                       (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
6534
0
                                       (i4_subtu_idx * trans_size * pred_strd);
6535
0
                WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6536
0
                UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
6537
0
                                             ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
6538
0
                                        ((ctr & 1) * trans_size_m2) +
6539
0
                                        (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
6540
0
                                        (i4_subtu_idx * trans_size * i4_recon_stride);
6541
6542
                /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
6543
                chroma coeff/iq for high quality intra SATD special modes. Will
6544
                be over written by coeff of luma mode in chroma_rdopt call */
6545
0
                UWORD8 *pu1_ecd_data_cb =
6546
0
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
6547
0
                UWORD8 *pu1_ecd_data_cr =
6548
0
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
6549
6550
0
                WORD32 chrm_pred_func_idx = 0;
6551
0
                LWORD64 curr_cb_cod_cost = 0;
6552
0
                LWORD64 curr_cr_cod_cost = 0;
6553
0
                WORD32 nbr_flags = 0;
6554
6555
0
                i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
6556
0
                i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
6557
0
                                 ((i4_subtu_idx * trans_size) >> 2);
6558
0
                pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
6559
0
                                      ((ctr & 1) * trans_size) +
6560
0
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6561
0
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6562
0
                pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
6563
0
                                      ((ctr & 1) * trans_size) +
6564
0
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6565
0
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6566
6567
                /* left cu boundary */
6568
0
                if(0 == i4_subtu_pos_x)
6569
0
                {
6570
0
                    left_strd = cu_left_stride;
6571
0
                    pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
6572
0
                }
6573
0
                else
6574
0
                {
6575
0
                    pu1_left = pu1_cur_recon - 2;
6576
0
                    left_strd = i4_recon_stride;
6577
0
                }
6578
6579
                /* top cu boundary */
6580
0
                if(0 == i4_subtu_pos_y)
6581
0
                {
6582
0
                    pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
6583
0
                }
6584
0
                else
6585
0
                {
6586
0
                    pu1_top = pu1_cur_recon - i4_recon_stride;
6587
0
                }
6588
6589
                /* by default top left is set to cu top left */
6590
0
                pu1_top_left = pu1_cu_top_left;
6591
6592
                /* top left based on position */
6593
0
                if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
6594
0
                {
6595
0
                    pu1_top_left = pu1_left - left_strd;
6596
0
                }
6597
0
                else if(0 != i4_subtu_pos_x)
6598
0
                {
6599
0
                    pu1_top_left = pu1_top - 2;
6600
0
                }
6601
6602
                /* populate the coeffs scan idx */
6603
0
                scan_idx = SCAN_DIAG_UPRIGHT;
6604
6605
                /* RDOPT copy States :  TU init (best until prev TU) to current */
6606
0
                COPY_CABAC_STATES(
6607
0
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6608
0
                         .s_cabac_ctxt.au1_ctxt_models[0],
6609
0
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6610
0
                    IHEVC_CAB_CTXT_END);
6611
6612
                /* for 4x4 transforms based on intra pred mode scan is choosen*/
6613
0
                if(4 == trans_size)
6614
0
                {
6615
                    /* for modes from 22 upto 30 horizontal scan is used */
6616
0
                    if((best_chrm_mode > 21) && (best_chrm_mode < 31))
6617
0
                    {
6618
0
                        scan_idx = SCAN_HORZ;
6619
0
                    }
6620
                    /* for modes from 6 upto 14 horizontal scan is used */
6621
0
                    else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
6622
0
                    {
6623
0
                        scan_idx = SCAN_VERT;
6624
0
                    }
6625
0
                }
6626
6627
0
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6628
0
                    luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
6629
6630
                /* call the chroma reference array substitution */
6631
0
                ihevc_intra_pred_chroma_ref_substitution_fptr(
6632
0
                    pu1_top_left,
6633
0
                    pu1_top,
6634
0
                    pu1_left,
6635
0
                    left_strd,
6636
0
                    trans_size,
6637
0
                    nbr_flags,
6638
0
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6639
0
                    1);
6640
6641
                /* use the look up to get the function idx */
6642
0
                chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
6643
6644
                /* call the intra prediction function */
6645
0
                ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
6646
0
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6647
0
                    1,
6648
0
                    pu1_cur_pred,
6649
0
                    pred_strd,
6650
0
                    trans_size,
6651
0
                    best_chrm_mode);
6652
6653
                /* UPLANE RDOPT Loop */
6654
0
                {
6655
0
                    WORD32 tu_bits;
6656
6657
0
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6658
0
                        ps_ctxt,
6659
0
                        pu1_cur_pred,
6660
0
                        pred_strd,
6661
0
                        pu1_cur_src,
6662
0
                        chrm_src_stride,
6663
0
                        pi2_cur_deq_data_cb,
6664
0
                        deq_data_strd,
6665
0
                        pu1_cur_recon,
6666
0
                        i4_recon_stride,
6667
0
                        pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
6668
0
                        ps_ctxt->au1_cu_csbf,
6669
0
                        ps_ctxt->i4_cu_csbf_strd,
6670
0
                        trans_size,
6671
0
                        scan_idx,
6672
0
                        1,
6673
0
                        &num_bytes,
6674
0
                        &tu_bits,
6675
0
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6676
0
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6677
0
                        &u1_is_recon_available,
6678
0
                        i4_perform_sbh,
6679
0
                        i4_perform_rdoq,
6680
0
                        &trans_ssd_u,
6681
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6682
0
                        i4_alpha_stim_multiplier,
6683
0
                        u1_is_cu_noisy,
6684
0
#endif
6685
0
                        0,
6686
0
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6687
0
                        U_PLANE);
6688
6689
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6690
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6691
                    {
6692
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6693
                        trans_ssd_u = ihevce_inject_stim_into_distortion(
6694
                            pu1_cur_src,
6695
                            chrm_src_stride,
6696
                            pu1_cur_pred,
6697
                            pred_strd,
6698
                            trans_ssd_u,
6699
                            i4_alpha_stim_multiplier,
6700
                            trans_size,
6701
                            0,
6702
                            ps_ctxt->u1_enable_psyRDOPT,
6703
                            U_PLANE);
6704
#else
6705
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6706
                        {
6707
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6708
                                pu1_cur_src,
6709
                                chrm_src_stride,
6710
                                pu1_cur_recon,
6711
                                i4_recon_stride,
6712
                                trans_ssd_u,
6713
                                i4_alpha_stim_multiplier,
6714
                                trans_size,
6715
                                0,
6716
                                ps_ctxt->u1_enable_psyRDOPT,
6717
                                U_PLANE);
6718
                        }
6719
                        else
6720
                        {
6721
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6722
                                pu1_cur_src,
6723
                                chrm_src_stride,
6724
                                pu1_cur_pred,
6725
                                pred_strd,
6726
                                trans_ssd_u,
6727
                                i4_alpha_stim_multiplier,
6728
                                trans_size,
6729
                                0,
6730
                                ps_ctxt->u1_enable_psyRDOPT,
6731
                                U_PLANE);
6732
                        }
6733
#endif
6734
                    }
6735
#endif
6736
6737
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6738
0
                    if(0 != cbf)
6739
0
                    {
6740
0
                        memcpy(
6741
0
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6742
0
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6743
0
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6744
0
                            IHEVC_CAB_CTXT_END);
6745
0
                    }
6746
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6747
0
                    else
6748
0
                    {
6749
0
                        memcpy(
6750
0
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6751
0
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6752
0
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6753
0
                            IHEVC_CAB_CTXT_END);
6754
0
                    }
6755
6756
0
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6757
0
                    {
6758
0
                        ihevce_chroma_it_recon_fxn(
6759
0
                            ps_ctxt,
6760
0
                            pi2_cur_deq_data_cb,
6761
0
                            deq_data_strd,
6762
0
                            pu1_cur_pred,
6763
0
                            pred_strd,
6764
0
                            pu1_cur_recon,
6765
0
                            i4_recon_stride,
6766
0
                            (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
6767
0
                            trans_size,
6768
0
                            cbf,
6769
0
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6770
0
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6771
0
                            U_PLANE);
6772
0
                    }
6773
6774
0
                    ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
6775
0
                    curr_cb_cod_cost =
6776
0
                        trans_ssd_u +
6777
0
                        COMPUTE_RATE_COST_CLIP30(
6778
0
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6779
0
                    chrm_tu_bits += tu_bits;
6780
0
                    ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
6781
0
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
6782
0
                        num_bytes;
6783
0
                }
6784
6785
                /* VPLANE RDOPT Loop */
6786
0
                {
6787
0
                    WORD32 tu_bits;
6788
6789
0
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6790
0
                        ps_ctxt,
6791
0
                        pu1_cur_pred,
6792
0
                        pred_strd,
6793
0
                        pu1_cur_src,
6794
0
                        chrm_src_stride,
6795
0
                        pi2_cur_deq_data_cr,
6796
0
                        deq_data_strd,
6797
0
                        pu1_cur_recon,
6798
0
                        i4_recon_stride,
6799
0
                        pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
6800
0
                        ps_ctxt->au1_cu_csbf,
6801
0
                        ps_ctxt->i4_cu_csbf_strd,
6802
0
                        trans_size,
6803
0
                        scan_idx,
6804
0
                        1,
6805
0
                        &num_bytes,
6806
0
                        &tu_bits,
6807
0
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6808
0
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6809
0
                        &u1_is_recon_available,
6810
0
                        i4_perform_sbh,
6811
0
                        i4_perform_rdoq,
6812
0
                        &trans_ssd_v,
6813
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6814
0
                        i4_alpha_stim_multiplier,
6815
0
                        u1_is_cu_noisy,
6816
0
#endif
6817
0
                        0,
6818
0
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6819
0
                        V_PLANE);
6820
6821
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6822
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6823
                    {
6824
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6825
                        trans_ssd_v = ihevce_inject_stim_into_distortion(
6826
                            pu1_cur_src,
6827
                            chrm_src_stride,
6828
                            pu1_cur_pred,
6829
                            pred_strd,
6830
                            trans_ssd_v,
6831
                            i4_alpha_stim_multiplier,
6832
                            trans_size,
6833
                            0,
6834
                            ps_ctxt->u1_enable_psyRDOPT,
6835
                            V_PLANE);
6836
#else
6837
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6838
                        {
6839
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6840
                                pu1_cur_src,
6841
                                chrm_src_stride,
6842
                                pu1_cur_recon,
6843
                                i4_recon_stride,
6844
                                trans_ssd_v,
6845
                                i4_alpha_stim_multiplier,
6846
                                trans_size,
6847
                                0,
6848
                                ps_ctxt->u1_enable_psyRDOPT,
6849
                                V_PLANE);
6850
                        }
6851
                        else
6852
                        {
6853
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6854
                                pu1_cur_src,
6855
                                chrm_src_stride,
6856
                                pu1_cur_pred,
6857
                                pred_strd,
6858
                                trans_ssd_v,
6859
                                i4_alpha_stim_multiplier,
6860
                                trans_size,
6861
                                0,
6862
                                ps_ctxt->u1_enable_psyRDOPT,
6863
                                V_PLANE);
6864
                        }
6865
#endif
6866
                    }
6867
#endif
6868
6869
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6870
0
                    if(0 != cbf)
6871
0
                    {
6872
0
                        COPY_CABAC_STATES(
6873
0
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6874
0
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6875
0
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6876
0
                            IHEVC_CAB_CTXT_END);
6877
0
                    }
6878
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6879
0
                    else
6880
0
                    {
6881
0
                        COPY_CABAC_STATES(
6882
0
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6883
0
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6884
0
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6885
0
                            IHEVC_CAB_CTXT_END);
6886
0
                    }
6887
6888
0
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6889
0
                    {
6890
0
                        ihevce_chroma_it_recon_fxn(
6891
0
                            ps_ctxt,
6892
0
                            pi2_cur_deq_data_cr,
6893
0
                            deq_data_strd,
6894
0
                            pu1_cur_pred,
6895
0
                            pred_strd,
6896
0
                            pu1_cur_recon,
6897
0
                            i4_recon_stride,
6898
0
                            (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
6899
0
                            trans_size,
6900
0
                            cbf,
6901
0
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6902
0
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6903
0
                            V_PLANE);
6904
0
                    }
6905
6906
0
                    ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
6907
0
                    curr_cr_cod_cost =
6908
0
                        trans_ssd_v +
6909
0
                        COMPUTE_RATE_COST_CLIP30(
6910
0
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6911
0
                    chrm_tu_bits += tu_bits;
6912
0
                    ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
6913
0
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
6914
0
                        num_bytes;
6915
0
                }
6916
6917
0
                chrm_cod_cost += curr_cb_cod_cost;
6918
0
                chrm_cod_cost += curr_cr_cod_cost;
6919
0
            }
6920
6921
            /* set the neighbour map to 1 */
6922
0
            ihevce_set_nbr_map(
6923
0
                ps_ctxt->pu1_ctb_nbr_map,
6924
0
                ps_ctxt->i4_nbr_map_strd,
6925
0
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6926
0
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6927
0
                (luma_trans_size >> 2),
6928
0
                1);
6929
0
        }
6930
6931
        /* set the neighbour map to 0 */
6932
0
        ihevce_set_nbr_map(
6933
0
            ps_ctxt->pu1_ctb_nbr_map,
6934
0
            ps_ctxt->i4_nbr_map_strd,
6935
0
            (ps_cu_analyse->b3_cu_pos_x << 1),
6936
0
            (ps_cu_analyse->b3_cu_pos_y << 1),
6937
0
            (ps_cu_analyse->u1_cu_size >> 2),
6938
0
            0);
6939
6940
        /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
6941
        /* This is done by adding the bits for signalling chroma mode (0-3)    */
6942
        /* and subtracting the bits for chroma mode same as luma mode (4)      */
6943
0
#if CHROMA_RDOPT_ENABLE
6944
0
        {
6945
            /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
6946
0
            WORD32 bits_frac_1 =
6947
0
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
6948
6949
0
            WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
6950
6951
            /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
6952
0
            WORD32 bits_for_mode4 =
6953
0
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
6954
6955
            /* accumulate into final rd cost for chroma */
6956
0
            ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
6957
0
                (bits_for_mode_0to3 - bits_for_mode4),
6958
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
6959
0
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
6960
6961
0
            chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
6962
0
        }
6963
0
#endif
6964
6965
0
        if(ps_ctxt->u1_enable_psyRDOPT)
6966
0
        {
6967
0
            UWORD8 *pu1_recon_cu;
6968
0
            WORD32 recon_stride;
6969
0
            WORD32 curr_pos_x;
6970
0
            WORD32 curr_pos_y;
6971
0
            WORD32 start_index;
6972
0
            WORD32 num_horz_cu_in_ctb;
6973
0
            WORD32 had_block_size;
6974
6975
            /* tODO: sreenivasa ctb size has to be used appropriately */
6976
0
            had_block_size = 8;
6977
0
            num_horz_cu_in_ctb = 2 * 64 / had_block_size;
6978
0
            curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6979
0
            curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6980
0
            recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6981
0
            pu1_recon_cu =
6982
0
                aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)];  //
6983
6984
            /* start index to index the source satd of curr cu int he current ctb*/
6985
0
            start_index = 2 * (curr_pos_x / had_block_size) +
6986
0
                          (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
6987
6988
0
            {
6989
0
                chrm_cod_cost += ihevce_psy_rd_cost_croma(
6990
0
                    ps_ctxt->ai4_source_chroma_satd,
6991
0
                    pu1_recon_cu,
6992
0
                    recon_stride,
6993
0
                    1,  //
6994
0
                    cu_size,
6995
0
                    0,  // pic type
6996
0
                    0,  //layer id
6997
0
                    ps_ctxt->i4_satd_lamda,  // lambda
6998
0
                    start_index,
6999
0
                    ps_ctxt->u1_is_input_data_hbd,  // 8 bit
7000
0
                    ps_ctxt->u1_chroma_array_type,
7001
0
                    &ps_ctxt->s_cmn_opt_func
7002
7003
0
                );  // chroma subsampling 420
7004
0
            }
7005
0
        }
7006
7007
0
        ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
7008
0
        ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
7009
7010
0
        memcpy(
7011
0
            &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
7012
0
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7013
0
            IHEVC_CAB_CTXT_END);
7014
0
    }
7015
0
}
7016
7017
/*!
7018
******************************************************************************
7019
* \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
7020
*
7021
* \brief
7022
*    Coding unit processing function for chroma
7023
*
7024
* \param[in] ps_ctxt    enc_loop module ctxt pointer
7025
* \param[in] rd_opt_curr_idx index in the array of RDopt params
7026
* \param[in] func_proc_mode TU_EQ_CU or other case
7027
* \param[in] pu1_chrm_src  pointer to source data buffer
7028
* \param[in] chrm_src_stride   source buffer stride
7029
* \param[in] pu1_cu_left pointer to left recon data buffer
7030
* \param[in] pu1_cu_top  pointer to top recon data buffer
7031
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
7032
* \param[in] left_stride left recon buffer stride
7033
* \param[out] cu_pos_x position x of current CU in CTB
7034
* \param[out] cu_pos_y position y of current CU in CTB
7035
* \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
7036
*
7037
* \return
7038
*    Chroma coding cost (cb adn Cr included)
7039
*
7040
* \author
7041
*  Ittiam
7042
*
7043
*****************************************************************************
7044
*/
7045
LWORD64 ihevce_chroma_cu_prcs_rdopt(
7046
    ihevce_enc_loop_ctxt_t *ps_ctxt,
7047
    WORD32 rd_opt_curr_idx,
7048
    WORD32 func_proc_mode,
7049
    UWORD8 *pu1_chrm_src,
7050
    WORD32 chrm_src_stride,
7051
    UWORD8 *pu1_cu_left,
7052
    UWORD8 *pu1_cu_top,
7053
    UWORD8 *pu1_cu_top_left,
7054
    WORD32 cu_left_stride,
7055
    WORD32 cu_pos_x,
7056
    WORD32 cu_pos_y,
7057
    WORD32 *pi4_chrm_tu_bits,
7058
    WORD32 i4_alpha_stim_multiplier,
7059
    UWORD8 u1_is_cu_noisy)
7060
0
{
7061
0
    tu_enc_loop_out_t *ps_tu;
7062
0
    tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
7063
7064
0
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
7065
7066
0
    UWORD8 *pu1_pred;
7067
0
    UWORD8 *pu1_recon;
7068
0
    WORD32 i4_recon_stride;
7069
0
    WORD32 cu_size, trans_size = 0;
7070
0
    WORD32 pred_strd;
7071
0
    WORD32 ctr, i4_subtu_idx;
7072
0
    WORD32 scan_idx;
7073
0
    WORD32 u1_is_cu_coded_old;
7074
0
    WORD32 init_bytes_offset;
7075
7076
0
    enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
7077
0
    recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
7078
7079
0
    WORD32 total_bytes_offset = 0;
7080
0
    LWORD64 chrm_cod_cost = 0;
7081
0
    WORD32 chrm_tu_bits = 0;
7082
0
    WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
7083
0
    LWORD64 i8_ssd_cb = 0;
7084
0
    WORD32 i4_bits_cb = 0;
7085
0
    LWORD64 i8_ssd_cr = 0;
7086
0
    WORD32 i4_bits_cr = 0;
7087
0
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
7088
0
    UWORD8 u1_num_tus =
7089
        /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
7090
0
        (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
7091
0
            ? 1
7092
0
            : ps_best_cu_prms->u2_num_tus_in_cu;
7093
0
    UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
7094
0
    UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
7095
0
                                    (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
7096
0
                                    CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7097
    /* Get the RDOPT cost of the best CU mode for early_exit */
7098
0
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
7099
    /* Get the current running RDOPT (Luma RDOPT) for early_exit */
7100
0
    LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
7101
0
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
7102
0
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
7103
7104
0
    ihevc_intra_pred_chroma_ref_substitution_fptr =
7105
0
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
7106
7107
0
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
7108
0
    {
7109
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
7110
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7111
0
    }
7112
7113
    /* Store the init bytes offset from luma */
7114
0
    init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
7115
7116
    /* Unused pred buffer in merge_skip_pred_data_t structure is used as
7117
    Chroma pred storage buf. for final_recon function.
7118
    The buffer is split into two and used as a ping-pong buffer */
7119
0
    pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
7120
0
               rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
7121
0
                                  (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
7122
7123
0
    pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
7124
7125
0
    pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
7126
0
    i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
7127
0
    cu_size = ps_best_cu_prms->u1_cu_size;
7128
0
    chrm_tu_bits = 0;
7129
7130
    /* get the first TU pointer */
7131
0
    ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7132
    /* get the first TU enc_loop temp prms pointer */
7133
0
    ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7134
7135
0
    if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7136
0
    {
7137
        /* Mode signalled by intra prediction for luma */
7138
0
        luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
7139
7140
#if DISABLE_RDOQ_INTRA
7141
        i4_perform_rdoq = 0;
7142
#endif
7143
0
    }
7144
7145
0
    else
7146
0
    {
7147
0
        UWORD8 *pu1_pred_org = pu1_pred;
7148
7149
        /* ------ Motion Compensation for Chroma -------- */
7150
0
        for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
7151
0
        {
7152
0
            pu_t *ps_pu;
7153
0
            WORD32 inter_pu_wd;
7154
0
            WORD32 inter_pu_ht;
7155
7156
0
            ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
7157
7158
0
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
7159
0
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
7160
0
            inter_pu_ht <<= u1_is_422;
7161
7162
0
            ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
7163
7164
0
            if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
7165
0
            {
7166
                /* 2Nx__ partion case */
7167
0
                if(inter_pu_wd == cu_size)
7168
0
                {
7169
0
                    pu1_pred += (inter_pu_ht * pred_strd);
7170
0
                }
7171
7172
                /* __x2N partion case */
7173
0
                if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
7174
0
                {
7175
0
                    pu1_pred += inter_pu_wd;
7176
0
                }
7177
0
            }
7178
0
        }
7179
7180
        /* restore the pred pointer to start for transform loop */
7181
0
        pu1_pred = pu1_pred_org;
7182
0
    }
7183
7184
    /* Used to store back only the luma based info. if SATD based chorma
7185
    mode also comes */
7186
0
    u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
7187
7188
    /* evaluate chroma candidates (same as luma) and
7189
    if INTRA & HIGH_QUALITY compare with best SATD mode */
7190
0
    {
7191
0
        WORD32 calc_recon = 0, deq_data_strd;
7192
0
        WORD16 *pi2_deq_data;
7193
0
        UWORD8 *pu1_ecd_data;
7194
0
        UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
7195
7196
0
        pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
7197
0
        pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
7198
0
        deq_data_strd = cu_size;
7199
        /* update ecd buffer for storing coeff. */
7200
0
        pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
7201
0
        pu1_ecd_data += init_bytes_offset;
7202
        /* store chroma starting index */
7203
0
        ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
7204
7205
        /* get the first TU pointer */
7206
0
        ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7207
0
        ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7208
7209
        /* Reset total_bytes_offset for each candidate */
7210
0
        chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
7211
0
                                          : luma_pred_mode;
7212
7213
0
        total_bytes_offset = 0;
7214
7215
0
        if(TU_EQ_SUBCU == func_proc_mode)
7216
0
        {
7217
0
            func_proc_mode = TU_EQ_CU_DIV2;
7218
0
        }
7219
7220
        /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
7221
        TU_EQ_CU_DIV2 and  TU_EQ_SUBCU case */
7222
0
        if(8 == cu_size)
7223
0
        {
7224
0
            func_proc_mode = TU_EQ_CU;
7225
0
        }
7226
7227
        /* loop based on num tus in a cu */
7228
0
        if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
7229
0
           (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
7230
0
            (chrm_pred_mode !=
7231
0
             ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
7232
0
        {
7233
            /* loop based on num tus in a cu */
7234
0
            for(ctr = 0; ctr < u1_num_tus; ctr++)
7235
0
            {
7236
0
                WORD32 num_bytes = 0;
7237
0
                LWORD64 curr_cb_cod_cost = 0;
7238
0
                LWORD64 curr_cr_cod_cost = 0;
7239
0
                WORD32 chrm_pred_func_idx = 0;
7240
0
                UWORD8 u1_is_early_exit_condition_satisfied = 0;
7241
7242
                /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7   */
7243
                /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
7244
0
                ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
7245
0
                ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
7246
0
                ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7247
0
                ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7248
0
                ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7249
0
                ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7250
0
                ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
7251
0
                ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
7252
0
                ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
7253
0
                ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
7254
7255
                /* TU level inits */
7256
                /* check if chroma present flag is set */
7257
0
                if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7258
0
                {
7259
                    /* RDOPT copy States :  TU init (best until prev TU) to current */
7260
0
                    COPY_CABAC_STATES(
7261
0
                        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7262
0
                             .s_cabac_ctxt.au1_ctxt_models[0],
7263
0
                        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7264
0
                        IHEVC_CAB_CTXT_END);
7265
7266
                    /* get the current transform size */
7267
0
                    trans_size = ps_tu->s_tu.b3_size;
7268
0
                    trans_size = (1 << (trans_size + 1)); /* in chroma units */
7269
7270
                    /* since 2x2 transform is not allowed for chroma*/
7271
0
                    if(2 == trans_size)
7272
0
                    {
7273
0
                        trans_size = 4;
7274
0
                    }
7275
0
                }
7276
7277
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
7278
0
                {
7279
0
                    WORD32 cbf;
7280
0
                    UWORD8 u1_is_recon_available;
7281
7282
0
                    WORD32 nbr_flags = 0;
7283
0
                    WORD32 zero_cols = 0;
7284
0
                    WORD32 zero_rows = 0;
7285
7286
                    /* check if chroma present flag is set */
7287
0
                    if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7288
0
                    {
7289
0
                        UWORD8 *pu1_cur_pred;
7290
0
                        UWORD8 *pu1_cur_recon;
7291
0
                        UWORD8 *pu1_cur_src;
7292
0
                        WORD16 *pi2_cur_deq_data;
7293
0
                        WORD32 curr_pos_x, curr_pos_y;
7294
0
                        LWORD64 trans_ssd_u, trans_ssd_v;
7295
7296
                        /* get the current sub-tu posx and posy w.r.t to cu */
7297
0
                        curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
7298
0
                        curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
7299
0
                                     (i4_subtu_idx * trans_size);
7300
7301
                        /* 420sp case only vertical height will be half */
7302
0
                        if(u1_is_422 == 0)
7303
0
                        {
7304
0
                            curr_pos_y >>= 1;
7305
0
                        }
7306
7307
                        /* increment the pointers to start of current Sub-TU */
7308
0
                        pu1_cur_recon = (pu1_recon + curr_pos_x);
7309
0
                        pu1_cur_recon += (curr_pos_y * i4_recon_stride);
7310
0
                        pu1_cur_src = (pu1_chrm_src + curr_pos_x);
7311
0
                        pu1_cur_src += (curr_pos_y * chrm_src_stride);
7312
0
                        pu1_cur_pred = (pu1_pred + curr_pos_x);
7313
0
                        pu1_cur_pred += (curr_pos_y * pred_strd);
7314
0
                        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
7315
0
                        pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
7316
7317
                        /* populate the coeffs scan idx */
7318
0
                        scan_idx = SCAN_DIAG_UPRIGHT;
7319
7320
                        /* perform intra prediction only for Intra case */
7321
0
                        if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7322
0
                        {
7323
0
                            UWORD8 *pu1_top_left;
7324
0
                            UWORD8 *pu1_top;
7325
0
                            UWORD8 *pu1_left;
7326
0
                            WORD32 left_strd;
7327
7328
0
                            calc_recon = !u1_compute_spatial_ssd &&
7329
0
                                         ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
7330
0
                                         (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
7331
0
                                          ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
7332
0
                                          ((u1_num_tus == 4) && (ctr < 3)));
7333
7334
                            /* left cu boundary */
7335
0
                            if(0 == curr_pos_x)
7336
0
                            {
7337
0
                                pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
7338
0
                                left_strd = cu_left_stride;
7339
0
                            }
7340
0
                            else
7341
0
                            {
7342
0
                                pu1_left = pu1_cur_recon - 2;
7343
0
                                left_strd = i4_recon_stride;
7344
0
                            }
7345
7346
                            /* top cu boundary */
7347
0
                            if(0 == curr_pos_y)
7348
0
                            {
7349
0
                                pu1_top = pu1_cu_top + curr_pos_x;
7350
0
                            }
7351
0
                            else
7352
0
                            {
7353
0
                                pu1_top = pu1_cur_recon - i4_recon_stride;
7354
0
                            }
7355
7356
                            /* by default top left is set to cu top left */
7357
0
                            pu1_top_left = pu1_cu_top_left;
7358
7359
                            /* top left based on position */
7360
0
                            if((0 != curr_pos_y) && (0 == curr_pos_x))
7361
0
                            {
7362
0
                                pu1_top_left = pu1_left - cu_left_stride;
7363
0
                            }
7364
0
                            else if(0 != curr_pos_x)
7365
0
                            {
7366
0
                                pu1_top_left = pu1_top - 2;
7367
0
                            }
7368
7369
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
7370
0
                            if(4 == trans_size)
7371
0
                            {
7372
                                /* for modes from 22 upto 30 horizontal scan is used */
7373
0
                                if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
7374
0
                                {
7375
0
                                    scan_idx = SCAN_HORZ;
7376
0
                                }
7377
                                /* for modes from 6 upto 14 horizontal scan is used */
7378
0
                                else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
7379
0
                                {
7380
0
                                    scan_idx = SCAN_VERT;
7381
0
                                }
7382
0
                            }
7383
7384
0
                            nbr_flags = ihevce_get_intra_chroma_tu_nbr(
7385
0
                                ps_best_cu_prms->au4_nbr_flags[ctr],
7386
0
                                i4_subtu_idx,
7387
0
                                trans_size,
7388
0
                                u1_is_422);
7389
7390
                            /* call the chroma reference array substitution */
7391
0
                            ihevc_intra_pred_chroma_ref_substitution_fptr(
7392
0
                                pu1_top_left,
7393
0
                                pu1_top,
7394
0
                                pu1_left,
7395
0
                                left_strd,
7396
0
                                trans_size,
7397
0
                                nbr_flags,
7398
0
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7399
0
                                1);
7400
7401
                            /* use the look up to get the function idx */
7402
0
                            chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
7403
7404
                            /* call the intra prediction function */
7405
0
                            ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
7406
0
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7407
0
                                1,
7408
0
                                pu1_cur_pred,
7409
0
                                pred_strd,
7410
0
                                trans_size,
7411
0
                                chrm_pred_mode);
7412
0
                        }
7413
7414
0
                        if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
7415
0
                        {
7416
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] =
7417
0
                                !ps_best_cu_prms->u1_skip_flag;
7418
0
                        }
7419
0
                        else if(!ctr && !i4_subtu_idx)
7420
0
                        {
7421
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
7422
0
                        }
7423
                        /************************************************************/
7424
                        /* recon loop is done for all cases including skip cu       */
7425
                        /* This is because skipping chroma reisdual based on luma   */
7426
                        /* skip decision can lead to chroma artifacts               */
7427
                        /************************************************************/
7428
                        /************************************************************/
7429
                        /*In the high quality and medium speed modes, wherein chroma*/
7430
                        /*and luma costs are included in the total cost calculation */
7431
                        /*the cost is just a ssd cost, and not that obtained through*/
7432
                        /*iq_it path                                                */
7433
                        /************************************************************/
7434
0
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7435
0
                        {
7436
0
                            WORD32 tu_bits;
7437
7438
0
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7439
0
                                ps_ctxt,
7440
0
                                pu1_cur_pred,
7441
0
                                pred_strd,
7442
0
                                pu1_cur_src,
7443
0
                                chrm_src_stride,
7444
0
                                pi2_cur_deq_data,
7445
0
                                deq_data_strd,
7446
0
                                pu1_cur_recon,
7447
0
                                i4_recon_stride,
7448
0
                                pu1_ecd_data + total_bytes_offset,
7449
0
                                ps_ctxt->au1_cu_csbf,
7450
0
                                ps_ctxt->i4_cu_csbf_strd,
7451
0
                                trans_size,
7452
0
                                scan_idx,
7453
0
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7454
0
                                &num_bytes,
7455
0
                                &tu_bits,
7456
0
                                &zero_cols,
7457
0
                                &zero_rows,
7458
0
                                &u1_is_recon_available,
7459
0
                                i4_perform_sbh,
7460
0
                                i4_perform_rdoq,
7461
0
                                &trans_ssd_u,
7462
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7463
0
                                i4_alpha_stim_multiplier,
7464
0
                                u1_is_cu_noisy,
7465
0
#endif
7466
0
                                ps_best_cu_prms->u1_skip_flag,
7467
0
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7468
0
                                U_PLANE);
7469
7470
0
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7471
0
                            {
7472
0
                                ps_recon_datastore
7473
0
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7474
0
                                                                        [i4_subtu_idx] = 0;
7475
0
                            }
7476
0
                            else
7477
0
                            {
7478
0
                                ps_recon_datastore
7479
0
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7480
0
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7481
0
                            }
7482
7483
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7484
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7485
                            {
7486
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7487
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7488
                                    pu1_cur_src,
7489
                                    chrm_src_stride,
7490
                                    pu1_cur_pred,
7491
                                    pred_strd,
7492
                                    trans_ssd_u,
7493
                                    i4_alpha_stim_multiplier,
7494
                                    trans_size,
7495
                                    0,
7496
                                    ps_ctxt->u1_enable_psyRDOPT,
7497
                                    U_PLANE);
7498
#else
7499
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7500
                                {
7501
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7502
                                        pu1_cur_src,
7503
                                        chrm_src_stride,
7504
                                        pu1_cur_recon,
7505
                                        i4_recon_stride,
7506
                                        trans_ssd_u,
7507
                                        i4_alpha_stim_multiplier,
7508
                                        trans_size,
7509
                                        0,
7510
                                        ps_ctxt->u1_enable_psyRDOPT,
7511
                                        U_PLANE);
7512
                                }
7513
                                else
7514
                                {
7515
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7516
                                        pu1_cur_src,
7517
                                        chrm_src_stride,
7518
                                        pu1_cur_pred,
7519
                                        pred_strd,
7520
                                        trans_ssd_u,
7521
                                        i4_alpha_stim_multiplier,
7522
                                        trans_size,
7523
                                        0,
7524
                                        ps_ctxt->u1_enable_psyRDOPT,
7525
                                        U_PLANE);
7526
                                }
7527
#endif
7528
                            }
7529
#endif
7530
7531
0
                            curr_cb_cod_cost =
7532
0
                                trans_ssd_u +
7533
0
                                COMPUTE_RATE_COST_CLIP30(
7534
0
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7535
7536
0
                            chrm_tu_bits += tu_bits;
7537
0
                            i4_bits_cb += tu_bits;
7538
7539
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7540
0
                            if(0 != cbf)
7541
0
                            {
7542
0
                                COPY_CABAC_STATES(
7543
0
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7544
0
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7545
0
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7546
0
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7547
0
                                    IHEVC_CAB_CTXT_END);
7548
0
                            }
7549
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7550
0
                            else
7551
0
                            {
7552
0
                                COPY_CABAC_STATES(
7553
0
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7554
0
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7555
0
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7556
0
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7557
0
                                    IHEVC_CAB_CTXT_END);
7558
0
                            }
7559
7560
                            /* If Intra and TU=CU/2, need recon for next TUs */
7561
0
                            if(calc_recon)
7562
0
                            {
7563
0
                                ihevce_chroma_it_recon_fxn(
7564
0
                                    ps_ctxt,
7565
0
                                    pi2_cur_deq_data,
7566
0
                                    deq_data_strd,
7567
0
                                    pu1_cur_pred,
7568
0
                                    pred_strd,
7569
0
                                    pu1_cur_recon,
7570
0
                                    i4_recon_stride,
7571
0
                                    (pu1_ecd_data + total_bytes_offset),
7572
0
                                    trans_size,
7573
0
                                    cbf,
7574
0
                                    zero_cols,
7575
0
                                    zero_rows,
7576
0
                                    U_PLANE);
7577
7578
0
                                ps_recon_datastore
7579
0
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7580
0
                                                                        [i4_subtu_idx] = 0;
7581
0
                            }
7582
0
                            else
7583
0
                            {
7584
0
                                ps_recon_datastore
7585
0
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7586
0
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7587
0
                            }
7588
0
                        }
7589
0
                        else
7590
0
                        {
7591
                            /* num bytes is set to 0 */
7592
0
                            num_bytes = 0;
7593
7594
                            /* cbf is returned as 0 */
7595
0
                            cbf = 0;
7596
7597
0
                            curr_cb_cod_cost = trans_ssd_u =
7598
7599
0
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7600
0
                                    pu1_cur_pred,
7601
0
                                    pu1_cur_src,
7602
0
                                    pred_strd,
7603
0
                                    chrm_src_stride,
7604
0
                                    trans_size,
7605
0
                                    trans_size,
7606
0
                                    U_PLANE);
7607
7608
0
                            if(u1_compute_spatial_ssd)
7609
0
                            {
7610
                                /* buffer copy fromp pred to recon */
7611
7612
0
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7613
0
                                    pu1_cur_pred,
7614
0
                                    pred_strd,
7615
0
                                    pu1_cur_recon,
7616
0
                                    i4_recon_stride,
7617
0
                                    trans_size,
7618
0
                                    trans_size,
7619
0
                                    U_PLANE);
7620
7621
0
                                ps_recon_datastore
7622
0
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7623
0
                                                                        [i4_subtu_idx] = 0;
7624
0
                            }
7625
7626
0
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7627
0
                            {
7628
0
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7629
0
                                    pu1_cur_src,
7630
0
                                    chrm_src_stride,
7631
0
                                    pu1_cur_pred,
7632
0
                                    pred_strd,
7633
0
                                    trans_ssd_u,
7634
0
                                    i4_alpha_stim_multiplier,
7635
0
                                    trans_size,
7636
0
                                    0,
7637
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7638
0
                                    U_PLANE);
7639
0
                            }
7640
7641
0
#if ENABLE_INTER_ZCU_COST
7642
#if !WEIGH_CHROMA_COST
7643
                            /* cbf = 0, accumulate cu not coded cost */
7644
                            ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
7645
#else
7646
                            /* cbf = 0, accumulate cu not coded cost */
7647
7648
0
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7649
0
                                (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7650
0
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7651
0
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7652
0
#endif
7653
0
#endif
7654
0
                        }
7655
7656
#if !WEIGH_CHROMA_COST
7657
                        curr_rdopt_cost += curr_cb_cod_cost;
7658
#else
7659
0
                        curr_rdopt_cost +=
7660
0
                            ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7661
0
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7662
0
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7663
0
#endif
7664
0
                        chrm_cod_cost += curr_cb_cod_cost;
7665
0
                        i8_ssd_cb += trans_ssd_u;
7666
7667
0
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7668
0
                        {
7669
                            /* Early exit : If the current running cost exceeds
7670
                            the prev. best mode cost, break */
7671
0
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7672
0
                            {
7673
0
                                u1_is_early_exit_condition_satisfied = 1;
7674
0
                                break;
7675
0
                            }
7676
0
                        }
7677
7678
                        /* inter cu is coded if any of the tu is coded in it */
7679
0
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7680
7681
                        /* update CB related params */
7682
0
                        ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
7683
0
                            total_bytes_offset + init_bytes_offset;
7684
7685
0
                        if(0 == i4_subtu_idx)
7686
0
                        {
7687
0
                            ps_tu->s_tu.b1_cb_cbf = cbf;
7688
0
                        }
7689
0
                        else
7690
0
                        {
7691
0
                            ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
7692
0
                        }
7693
7694
0
                        total_bytes_offset += num_bytes;
7695
7696
0
                        ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
7697
0
                        ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
7698
0
                        ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
7699
7700
                        /* recon loop is done for non skip cases */
7701
0
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7702
0
                        {
7703
0
                            WORD32 tu_bits;
7704
7705
0
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7706
0
                                ps_ctxt,
7707
0
                                pu1_cur_pred,
7708
0
                                pred_strd,
7709
0
                                pu1_cur_src,
7710
0
                                chrm_src_stride,
7711
0
                                pi2_cur_deq_data + trans_size,
7712
0
                                deq_data_strd,
7713
0
                                pu1_cur_recon,
7714
0
                                i4_recon_stride,
7715
0
                                pu1_ecd_data + total_bytes_offset,
7716
0
                                ps_ctxt->au1_cu_csbf,
7717
0
                                ps_ctxt->i4_cu_csbf_strd,
7718
0
                                trans_size,
7719
0
                                scan_idx,
7720
0
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7721
0
                                &num_bytes,
7722
0
                                &tu_bits,
7723
0
                                &zero_cols,
7724
0
                                &zero_rows,
7725
0
                                &u1_is_recon_available,
7726
0
                                i4_perform_sbh,
7727
0
                                i4_perform_rdoq,
7728
0
                                &trans_ssd_v,
7729
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7730
0
                                i4_alpha_stim_multiplier,
7731
0
                                u1_is_cu_noisy,
7732
0
#endif
7733
0
                                ps_best_cu_prms->u1_skip_flag,
7734
0
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7735
0
                                V_PLANE);
7736
7737
0
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7738
0
                            {
7739
0
                                ps_recon_datastore
7740
0
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7741
0
                                                                        [i4_subtu_idx] = 0;
7742
0
                            }
7743
0
                            else
7744
0
                            {
7745
0
                                ps_recon_datastore
7746
0
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7747
0
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7748
0
                            }
7749
7750
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7751
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7752
                            {
7753
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7754
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7755
                                    pu1_cur_src,
7756
                                    chrm_src_stride,
7757
                                    pu1_cur_pred,
7758
                                    pred_strd,
7759
                                    trans_ssd_v,
7760
                                    i4_alpha_stim_multiplier,
7761
                                    trans_size,
7762
                                    0,
7763
                                    ps_ctxt->u1_enable_psyRDOPT,
7764
                                    V_PLANE);
7765
#else
7766
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7767
                                {
7768
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7769
                                        pu1_cur_src,
7770
                                        chrm_src_stride,
7771
                                        pu1_cur_recon,
7772
                                        i4_recon_stride,
7773
                                        trans_ssd_v,
7774
                                        i4_alpha_stim_multiplier,
7775
                                        trans_size,
7776
                                        0,
7777
                                        ps_ctxt->u1_enable_psyRDOPT,
7778
                                        V_PLANE);
7779
                                }
7780
                                else
7781
                                {
7782
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7783
                                        pu1_cur_src,
7784
                                        chrm_src_stride,
7785
                                        pu1_cur_pred,
7786
                                        pred_strd,
7787
                                        trans_ssd_v,
7788
                                        i4_alpha_stim_multiplier,
7789
                                        trans_size,
7790
                                        0,
7791
                                        ps_ctxt->u1_enable_psyRDOPT,
7792
                                        V_PLANE);
7793
                                }
7794
#endif
7795
                            }
7796
#endif
7797
7798
0
                            curr_cr_cod_cost =
7799
0
                                trans_ssd_v +
7800
0
                                COMPUTE_RATE_COST_CLIP30(
7801
0
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7802
0
                            chrm_tu_bits += tu_bits;
7803
0
                            i4_bits_cr += tu_bits;
7804
7805
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7806
0
                            if(0 != cbf)
7807
0
                            {
7808
0
                                COPY_CABAC_STATES(
7809
0
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7810
0
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7811
0
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7812
0
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7813
0
                                    IHEVC_CAB_CTXT_END);
7814
0
                            }
7815
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7816
0
                            else
7817
0
                            {
7818
0
                                COPY_CABAC_STATES(
7819
0
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7820
0
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7821
0
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7822
0
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7823
0
                                    IHEVC_CAB_CTXT_END);
7824
0
                            }
7825
7826
                            /* If Intra and TU=CU/2, need recon for next TUs */
7827
0
                            if(calc_recon)
7828
0
                            {
7829
0
                                ihevce_chroma_it_recon_fxn(
7830
0
                                    ps_ctxt,
7831
0
                                    (pi2_cur_deq_data + trans_size),
7832
0
                                    deq_data_strd,
7833
0
                                    pu1_cur_pred,
7834
0
                                    pred_strd,
7835
0
                                    pu1_cur_recon,
7836
0
                                    i4_recon_stride,
7837
0
                                    (pu1_ecd_data + total_bytes_offset),
7838
0
                                    trans_size,
7839
0
                                    cbf,
7840
0
                                    zero_cols,
7841
0
                                    zero_rows,
7842
0
                                    V_PLANE);
7843
7844
0
                                ps_recon_datastore
7845
0
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7846
0
                                                                        [i4_subtu_idx] = 0;
7847
0
                            }
7848
0
                            else
7849
0
                            {
7850
0
                                ps_recon_datastore
7851
0
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7852
0
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7853
0
                            }
7854
0
                        }
7855
0
                        else
7856
0
                        {
7857
                            /* num bytes is set to 0 */
7858
0
                            num_bytes = 0;
7859
7860
                            /* cbf is returned as 0 */
7861
0
                            cbf = 0;
7862
7863
0
                            curr_cr_cod_cost = trans_ssd_v =
7864
7865
0
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7866
0
                                    pu1_cur_pred,
7867
0
                                    pu1_cur_src,
7868
0
                                    pred_strd,
7869
0
                                    chrm_src_stride,
7870
0
                                    trans_size,
7871
0
                                    trans_size,
7872
0
                                    V_PLANE);
7873
7874
0
                            if(u1_compute_spatial_ssd)
7875
0
                            {
7876
                                /* buffer copy fromp pred to recon */
7877
0
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7878
0
                                    pu1_cur_pred,
7879
0
                                    pred_strd,
7880
0
                                    pu1_cur_recon,
7881
0
                                    i4_recon_stride,
7882
0
                                    trans_size,
7883
0
                                    trans_size,
7884
0
                                    V_PLANE);
7885
7886
0
                                ps_recon_datastore
7887
0
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7888
0
                                                                        [i4_subtu_idx] = 0;
7889
0
                            }
7890
7891
0
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7892
0
                            {
7893
0
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7894
0
                                    pu1_cur_src,
7895
0
                                    chrm_src_stride,
7896
0
                                    pu1_cur_pred,
7897
0
                                    pred_strd,
7898
0
                                    trans_ssd_v,
7899
0
                                    i4_alpha_stim_multiplier,
7900
0
                                    trans_size,
7901
0
                                    0,
7902
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7903
0
                                    V_PLANE);
7904
0
                            }
7905
7906
0
#if ENABLE_INTER_ZCU_COST
7907
#if !WEIGH_CHROMA_COST
7908
                            /* cbf = 0, accumulate cu not coded cost */
7909
                            ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
7910
#else
7911
                            /* cbf = 0, accumulate cu not coded cost */
7912
7913
0
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7914
0
                                (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7915
0
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7916
0
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7917
0
#endif
7918
0
#endif
7919
0
                        }
7920
7921
#if !WEIGH_CHROMA_COST
7922
                        curr_rdopt_cost += curr_cr_cod_cost;
7923
#else
7924
0
                        curr_rdopt_cost +=
7925
0
                            ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7926
0
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7927
0
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7928
0
#endif
7929
7930
0
                        chrm_cod_cost += curr_cr_cod_cost;
7931
0
                        i8_ssd_cr += trans_ssd_v;
7932
7933
0
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7934
0
                        {
7935
                            /* Early exit : If the current running cost exceeds
7936
                            the prev. best mode cost, break */
7937
0
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7938
0
                            {
7939
0
                                u1_is_early_exit_condition_satisfied = 1;
7940
0
                                break;
7941
0
                            }
7942
0
                        }
7943
7944
                        /* inter cu is coded if any of the tu is coded in it */
7945
0
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7946
7947
                        /* update CR related params */
7948
0
                        ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
7949
0
                            total_bytes_offset + init_bytes_offset;
7950
7951
0
                        if(0 == i4_subtu_idx)
7952
0
                        {
7953
0
                            ps_tu->s_tu.b1_cr_cbf = cbf;
7954
0
                        }
7955
0
                        else
7956
0
                        {
7957
0
                            ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
7958
0
                        }
7959
7960
0
                        total_bytes_offset += num_bytes;
7961
7962
0
                        ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
7963
0
                        ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
7964
0
                        ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
7965
0
                    }
7966
0
                    else
7967
0
                    {
7968
0
                        ps_recon_datastore
7969
0
                            ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
7970
0
                            UCHAR_MAX;
7971
0
                        ps_recon_datastore
7972
0
                            ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
7973
0
                            UCHAR_MAX;
7974
0
                    }
7975
0
                }
7976
7977
0
                if(u1_is_early_exit_condition_satisfied)
7978
0
                {
7979
0
                    break;
7980
0
                }
7981
7982
                /* loop increments */
7983
0
                ps_tu++;
7984
0
                ps_tu_temp_prms++;
7985
0
            }
7986
7987
            /* Signal as luma mode. HIGH_QUALITY may update it */
7988
0
            ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
7989
7990
            /* modify the cost chrm_cod_cost */
7991
0
            if(ps_ctxt->u1_enable_psyRDOPT)
7992
0
            {
7993
0
                UWORD8 *pu1_recon_cu;
7994
0
                WORD32 recon_stride;
7995
0
                WORD32 curr_pos_x;
7996
0
                WORD32 curr_pos_y;
7997
0
                WORD32 start_index;
7998
0
                WORD32 num_horz_cu_in_ctb;
7999
0
                WORD32 had_block_size;
8000
                /* tODO: sreenivasa ctb size has to be used appropriately */
8001
0
                had_block_size = 8;
8002
0
                num_horz_cu_in_ctb = 2 * 64 / had_block_size;
8003
8004
0
                curr_pos_x = cu_pos_x << 3; /* pel units */
8005
0
                curr_pos_y = cu_pos_y << 3; /* pel units */
8006
0
                recon_stride = i4_recon_stride;
8007
0
                pu1_recon_cu = pu1_recon;
8008
8009
                /* start index to index the source satd of curr cu int he current ctb*/
8010
0
                start_index = 2 * (curr_pos_x / had_block_size) +
8011
0
                              (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
8012
8013
0
                {
8014
0
                    chrm_cod_cost += ihevce_psy_rd_cost_croma(
8015
0
                        ps_ctxt->ai4_source_chroma_satd,
8016
0
                        pu1_recon,
8017
0
                        recon_stride,
8018
0
                        1,  //
8019
0
                        cu_size,
8020
0
                        0,  // pic type
8021
0
                        0,  //layer id
8022
0
                        ps_ctxt->i4_satd_lamda,  // lambda
8023
0
                        start_index,
8024
0
                        ps_ctxt->u1_is_input_data_hbd,  // 8 bit
8025
0
                        ps_ctxt->u1_chroma_array_type,
8026
0
                        &ps_ctxt->s_cmn_opt_func
8027
8028
0
                    );  // chroma subsampling 420
8029
0
                }
8030
0
            }
8031
0
        }
8032
0
        else
8033
0
        {
8034
0
            u1_is_mode_eq_chroma_satd_mode = 1;
8035
0
            chrm_cod_cost = MAX_COST_64;
8036
0
        }
8037
8038
        /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
8039
0
        if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
8040
0
           (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
8041
0
        {
8042
0
            if(64 == cu_size)
8043
0
            {
8044
0
                ASSERT(TU_EQ_CU != func_proc_mode);
8045
0
            }
8046
8047
0
            if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
8048
0
                   .i8_chroma_best_rdopt < chrm_cod_cost)
8049
0
            {
8050
0
                UWORD8 *pu1_src;
8051
0
                UWORD8 *pu1_ecd_data_src_cb;
8052
0
                UWORD8 *pu1_ecd_data_src_cr;
8053
8054
0
                chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
8055
0
                    &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
8056
8057
0
                UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
8058
0
                WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
8059
0
                WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
8060
8061
0
                pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
8062
0
                chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
8063
0
                chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
8064
0
                chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
8065
8066
0
                if(u1_is_mode_eq_chroma_satd_mode)
8067
0
                {
8068
0
                    chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
8069
0
                }
8070
8071
                /*Resetting total_num_bytes_to 0*/
8072
0
                total_bytes_offset = 0;
8073
8074
                /* Update the CABAC state corresponding to chroma only */
8075
                /* Chroma Cbf */
8076
0
                memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
8077
                /* Chroma transform skip */
8078
0
                memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
8079
                /* Chroma last coeff x prefix */
8080
0
                memcpy(
8081
0
                    pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
8082
0
                    pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
8083
0
                    3);
8084
                /* Chroma last coeff y prefix */
8085
0
                memcpy(
8086
0
                    pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
8087
0
                    pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
8088
0
                    3);
8089
                /* Chroma csbf */
8090
0
                memcpy(
8091
0
                    pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8092
0
                    pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8093
0
                    2);
8094
                /* Chroma sig coeff flags */
8095
0
                memcpy(
8096
0
                    pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
8097
                /* Chroma absgt1 flags */
8098
0
                memcpy(
8099
0
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8100
0
                    pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8101
0
                    8);
8102
                /* Chroma absgt2 flags */
8103
0
                memcpy(
8104
0
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8105
0
                    pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8106
0
                    2);
8107
8108
0
                ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
8109
0
                ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8110
8111
                /* update to luma decision as we update chroma in final mode */
8112
0
                ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
8113
8114
0
                for(ctr = 0; ctr < u1_num_tus; ctr++)
8115
0
                {
8116
0
                    for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
8117
0
                    {
8118
0
                        WORD32 cbf;
8119
0
                        WORD32 num_bytes;
8120
8121
0
                        pu1_ecd_data_src_cb =
8122
0
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
8123
0
                        pu1_ecd_data_src_cr =
8124
0
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
8125
8126
                        /* check if chroma present flag is set */
8127
0
                        if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
8128
0
                        {
8129
0
                            UWORD8 *pu1_cur_pred_dest;
8130
0
                            UWORD8 *pu1_cur_pred_src;
8131
0
                            WORD32 pred_src_strd;
8132
0
                            WORD16 *pi2_cur_deq_data_dest;
8133
0
                            WORD16 *pi2_cur_deq_data_src_cb;
8134
0
                            WORD16 *pi2_cur_deq_data_src_cr;
8135
0
                            WORD32 deq_src_strd;
8136
8137
0
                            WORD32 curr_pos_x, curr_pos_y;
8138
8139
0
                            trans_size = ps_tu->s_tu.b3_size;
8140
0
                            trans_size = (1 << (trans_size + 1)); /* in chroma units */
8141
8142
                            /*Deriving stride values*/
8143
0
                            pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
8144
0
                            deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
8145
8146
                            /* since 2x2 transform is not allowed for chroma*/
8147
0
                            if(2 == trans_size)
8148
0
                            {
8149
0
                                trans_size = 4;
8150
0
                            }
8151
8152
                            /* get the current tu posx and posy w.r.t to cu */
8153
0
                            curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
8154
0
                            curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
8155
0
                                         (i4_subtu_idx * trans_size);
8156
8157
                            /* 420sp case only vertical height will be half */
8158
0
                            if(0 == u1_is_422)
8159
0
                            {
8160
0
                                curr_pos_y >>= 1;
8161
0
                            }
8162
8163
                            /* increment the pointers to start of current TU  */
8164
0
                            pu1_cur_pred_src =
8165
0
                                ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
8166
0
                            pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
8167
0
                            pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
8168
0
                            pu1_cur_pred_dest += (curr_pos_y * pred_strd);
8169
8170
0
                            pi2_cur_deq_data_src_cb =
8171
0
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
8172
0
                            pi2_cur_deq_data_src_cr =
8173
0
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
8174
0
                            pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
8175
0
                            pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
8176
0
                            pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
8177
0
                            pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
8178
8179
                            /*Overwriting deq data with that belonging to the winning special mode
8180
                            (luma mode !=  chroma mode)
8181
                            ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
8182
                            correspondingly manipulate to copy WORD16 data*/
8183
8184
0
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8185
0
                                (UWORD8 *)pi2_cur_deq_data_dest,
8186
0
                                (deq_data_strd << 1),
8187
0
                                (UWORD8 *)pi2_cur_deq_data_src_cb,
8188
0
                                (deq_src_strd << 1),
8189
0
                                (trans_size << 1),
8190
0
                                trans_size);
8191
8192
0
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8193
0
                                (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
8194
0
                                (deq_data_strd << 1),
8195
0
                                (UWORD8 *)pi2_cur_deq_data_src_cr,
8196
0
                                (deq_src_strd << 1),
8197
0
                                (trans_size << 1),
8198
0
                                trans_size);
8199
8200
                            /*Overwriting pred data with that belonging to the winning special mode
8201
                            (luma mode !=  chroma mode)*/
8202
8203
0
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8204
0
                                pu1_cur_pred_dest,
8205
0
                                pred_strd,
8206
0
                                pu1_cur_pred_src,
8207
0
                                pred_src_strd,
8208
0
                                (trans_size << 1),
8209
0
                                trans_size);
8210
8211
0
                            num_bytes = ps_chr_intra_satd_ctxt
8212
0
                                            ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
8213
0
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
8214
                            /* inter cu is coded if any of the tu is coded in it */
8215
0
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8216
8217
                            /* update CB related params */
8218
0
                            ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
8219
0
                                total_bytes_offset + init_bytes_offset;
8220
8221
0
                            if(0 == i4_subtu_idx)
8222
0
                            {
8223
0
                                ps_tu->s_tu.b1_cb_cbf = cbf;
8224
0
                            }
8225
0
                            else
8226
0
                            {
8227
0
                                ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
8228
0
                            }
8229
8230
                            /*Overwriting the cb ecd data corresponding to the special mode*/
8231
0
                            if(0 != num_bytes)
8232
0
                            {
8233
0
                                memcpy(
8234
0
                                    (pu1_ecd_data + total_bytes_offset),
8235
0
                                    pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
8236
0
                                    num_bytes);
8237
0
                            }
8238
8239
0
                            total_bytes_offset += num_bytes;
8240
0
                            ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
8241
0
                            ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
8242
8243
0
                            num_bytes = ps_chr_intra_satd_ctxt
8244
0
                                            ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
8245
0
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
8246
                            /* inter cu is coded if any of the tu is coded in it */
8247
0
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8248
8249
                            /*Overwriting the cr ecd data corresponding to the special mode*/
8250
0
                            if(0 != num_bytes)
8251
0
                            {
8252
0
                                memcpy(
8253
0
                                    (pu1_ecd_data + total_bytes_offset),
8254
0
                                    pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
8255
0
                                    num_bytes);
8256
0
                            }
8257
8258
                            /* update CR related params */
8259
0
                            ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
8260
0
                                total_bytes_offset + init_bytes_offset;
8261
8262
0
                            if(0 == i4_subtu_idx)
8263
0
                            {
8264
0
                                ps_tu->s_tu.b1_cr_cbf = cbf;
8265
0
                            }
8266
0
                            else
8267
0
                            {
8268
0
                                ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
8269
0
                            }
8270
8271
0
                            total_bytes_offset += num_bytes;
8272
0
                            ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
8273
8274
                            /*Updating zero rows and zero cols*/
8275
0
                            ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
8276
0
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
8277
0
                            ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
8278
0
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
8279
0
                            ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
8280
0
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
8281
0
                            ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
8282
0
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
8283
8284
0
                            ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
8285
8286
0
                            if((u1_num_tus > 1) &&
8287
0
                               ps_recon_datastore->au1_is_chromaRecon_available[2])
8288
0
                            {
8289
0
                                ps_recon_datastore
8290
0
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8291
0
                                                                        [i4_subtu_idx] = 2;
8292
0
                                ps_recon_datastore
8293
0
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8294
0
                                                                        [i4_subtu_idx] = 2;
8295
0
                            }
8296
0
                            else if(
8297
0
                                (1 == u1_num_tus) &&
8298
0
                                ps_recon_datastore->au1_is_chromaRecon_available[1])
8299
0
                            {
8300
0
                                ps_recon_datastore
8301
0
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8302
0
                                                                        [i4_subtu_idx] = 1;
8303
0
                                ps_recon_datastore
8304
0
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8305
0
                                                                        [i4_subtu_idx] = 1;
8306
0
                            }
8307
0
                            else
8308
0
                            {
8309
0
                                ps_recon_datastore
8310
0
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8311
0
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8312
0
                                ps_recon_datastore
8313
0
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8314
0
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8315
0
                            }
8316
0
                        }
8317
0
                    }
8318
8319
                    /* loop increments */
8320
0
                    ps_tu++;
8321
0
                    ps_tu_temp_prms++;
8322
0
                }
8323
0
            }
8324
8325
0
            if(!u1_is_422)
8326
0
            {
8327
0
                if(chrm_pred_mode == luma_pred_mode)
8328
0
                {
8329
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8330
0
                }
8331
0
                else if(chrm_pred_mode == 0)
8332
0
                {
8333
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8334
0
                }
8335
0
                else if(chrm_pred_mode == 1)
8336
0
                {
8337
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8338
0
                }
8339
0
                else if(chrm_pred_mode == 10)
8340
0
                {
8341
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8342
0
                }
8343
0
                else if(chrm_pred_mode == 26)
8344
0
                {
8345
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8346
0
                }
8347
0
                else
8348
0
                {
8349
0
                    ASSERT(0); /*Should not come here*/
8350
0
                }
8351
0
            }
8352
0
            else
8353
0
            {
8354
0
                if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
8355
0
                {
8356
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8357
0
                }
8358
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
8359
0
                {
8360
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8361
0
                }
8362
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
8363
0
                {
8364
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8365
0
                }
8366
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
8367
0
                {
8368
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8369
0
                }
8370
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
8371
0
                {
8372
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8373
0
                }
8374
0
                else
8375
0
                {
8376
0
                    ASSERT(0); /*Should not come here*/
8377
0
                }
8378
0
            }
8379
0
        }
8380
8381
        /* Store the actual chroma mode */
8382
0
        ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
8383
0
    }
8384
8385
    /* update the total bytes produced */
8386
0
    ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
8387
8388
    /* store the final chrm bits accumulated */
8389
0
    *pi4_chrm_tu_bits = chrm_tu_bits;
8390
8391
0
    return (chrm_cod_cost);
8392
0
}
8393
8394
/*!
8395
******************************************************************************
8396
* \if Function name : ihevce_final_rdopt_mode_prcs \endif
8397
*
8398
* \brief
8399
*    Final RDOPT mode process function. Performs Recon computation for the
8400
*    final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
8401
*
8402
* \param[in] pv_ctxt : pointer to enc_loop module
8403
* \param[in] ps_prms : pointer to struct containing requisite parameters
8404
*
8405
* \return
8406
*    None
8407
*
8408
* \author
8409
*  Ittiam
8410
*
8411
*****************************************************************************
8412
*/
8413
void ihevce_final_rdopt_mode_prcs(
8414
    ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
8415
0
{
8416
0
    enc_loop_cu_final_prms_t *ps_best_cu_prms;
8417
0
    tu_enc_loop_out_t *ps_tu_enc_loop;
8418
0
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
8419
0
    nbr_avail_flags_t s_nbr;
8420
0
    recon_datastore_t *ps_recon_datastore;
8421
8422
0
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
8423
0
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
8424
0
    ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
8425
8426
0
    WORD32 num_tu_in_cu;
8427
0
    LWORD64 rd_opt_cost;
8428
0
    WORD32 ctr;
8429
0
    WORD32 i4_subtu_idx;
8430
0
    WORD32 cu_size;
8431
0
    WORD32 cu_pos_x, cu_pos_y;
8432
0
    WORD32 chrm_present_flag = 1;
8433
0
    WORD32 num_bytes, total_bytes = 0;
8434
0
    WORD32 chrm_ctr = 0;
8435
0
    WORD32 u1_is_cu_coded;
8436
0
    UWORD8 *pu1_old_ecd_data;
8437
0
    UWORD8 *pu1_chrm_old_ecd_data;
8438
0
    UWORD8 *pu1_cur_pred;
8439
0
    WORD16 *pi2_deq_data;
8440
0
    WORD16 *pi2_chrm_deq_data;
8441
0
    WORD16 *pi2_cur_deq_data;
8442
0
    WORD16 *pi2_cur_deq_data_chrm;
8443
0
    UWORD8 *pu1_cur_luma_recon;
8444
0
    UWORD8 *pu1_cur_chroma_recon;
8445
0
    UWORD8 *pu1_cur_src;
8446
0
    UWORD8 *pu1_cur_src_chrm;
8447
0
    UWORD8 *pu1_cur_pred_chrm;
8448
0
    UWORD8 *pu1_intra_pred_mode;
8449
0
    UWORD32 *pu4_nbr_flags;
8450
0
    LWORD64 i8_ssd;
8451
8452
0
    cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
8453
0
    cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
8454
0
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
8455
8456
0
    WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
8457
0
    WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
8458
0
    UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
8459
0
    WORD32 src_strd = ps_prms->src_strd;
8460
0
    UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
8461
0
    WORD32 pred_strd = ps_prms->pred_strd;
8462
0
    UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
8463
0
    WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
8464
0
    UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
8465
0
    UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
8466
0
    WORD32 csbf_strd = ps_prms->csbf_strd;
8467
0
    UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
8468
0
    WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
8469
0
    UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
8470
0
    WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
8471
0
    UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
8472
0
    UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
8473
0
    UWORD8 u1_cu_size = ps_prms->u1_cu_size;
8474
0
    WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
8475
0
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
8476
0
    UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
8477
    /* Get the Chroma pointer and parameters */
8478
0
    UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
8479
0
    WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
8480
0
    UWORD8 u1_compute_spatial_ssd_luma = 0;
8481
0
    UWORD8 u1_compute_spatial_ssd_chroma = 0;
8482
    /* Get the pointer for function selector */
8483
0
    ihevc_intra_pred_luma_ref_substitution_fptr =
8484
0
        ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
8485
8486
0
    ihevc_intra_pred_ref_filtering_fptr =
8487
0
        ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
8488
8489
0
    ihevc_intra_pred_chroma_ref_substitution_fptr =
8490
0
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
8491
8492
    /* Get the best CU parameters */
8493
0
    ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
8494
0
    num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
8495
0
    cu_size = ps_best_cu_prms->u1_cu_size;
8496
0
    cu_pos_x = u1_cu_pos_x;
8497
0
    cu_pos_y = u1_cu_pos_y;
8498
0
    pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
8499
0
    pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
8500
0
    ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
8501
8502
    /* get the first TU pointer */
8503
0
    ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8504
    /* get the first TU only enc_loop prms pointer */
8505
0
    ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8506
    /*modify quant related param in ctxt based on current cu qp*/
8507
0
    if((ps_ctxt->i1_cu_qp_delta_enable))
8508
0
    {
8509
        /*recompute quant related param at every cu level*/
8510
0
        ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
8511
8512
        /* get frame level lambda params */
8513
0
        ihevce_get_cl_cu_lambda_prms(
8514
0
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
8515
0
    }
8516
8517
0
    ps_best_cu_prms->i8_cu_ssd = 0;
8518
0
    ps_best_cu_prms->u4_cu_open_intra_sad = 0;
8519
8520
    /* For skip case : Set TU_size = CU_size and make cbf = 0
8521
    so that same TU loop can be used for all modes */
8522
0
    if(PRED_MODE_SKIP == packed_pred_mode)
8523
0
    {
8524
0
        for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8525
0
        {
8526
0
            ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
8527
8528
0
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
8529
8530
0
            ps_tu_enc_loop++;
8531
0
            ps_tu_enc_loop_temp_prms++;
8532
0
        }
8533
8534
        /* go back to the first TU pointer */
8535
0
        ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8536
0
        ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8537
0
    }
8538
    /**   For inter case, pred calculation is outside the loop     **/
8539
0
    if(PRED_MODE_INTRA != packed_pred_mode)
8540
0
    {
8541
        /**------------- Compute pred data if required --------------**/
8542
0
        if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8543
0
        {
8544
0
            nbr_4x4_t *ps_topleft_nbr_4x4;
8545
0
            nbr_4x4_t *ps_left_nbr_4x4;
8546
0
            nbr_4x4_t *ps_top_nbr_4x4;
8547
0
            WORD32 nbr_4x4_left_strd;
8548
8549
0
            ps_best_inter_cand->pu1_pred_data = pu1_pred;
8550
0
            ps_best_inter_cand->i4_pred_data_stride = pred_strd;
8551
8552
            /* Get the CU nbr information */
8553
0
            ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
8554
0
            ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
8555
0
            ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
8556
0
            nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
8557
8558
            /* MVP ,MVD calc and Motion compensation */
8559
0
            rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
8560
0
                ps_ctxt,
8561
0
                ps_best_inter_cand,
8562
0
                u1_cu_size,
8563
0
                cu_pos_x,
8564
0
                cu_pos_y,
8565
0
                ps_left_nbr_4x4,
8566
0
                ps_top_nbr_4x4,
8567
0
                ps_topleft_nbr_4x4,
8568
0
                nbr_4x4_left_strd,
8569
0
                rd_opt_best_idx);
8570
0
        }
8571
8572
        /** ------ Motion Compensation for Chroma -------- **/
8573
0
        if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
8574
0
        {
8575
0
            UWORD8 *pu1_cur_pred;
8576
0
            pu1_cur_pred = pu1_pred_chrm;
8577
8578
            /* run a loop over all the partitons in cu */
8579
0
            for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
8580
0
            {
8581
0
                pu_t *ps_pu;
8582
0
                WORD32 inter_pu_wd, inter_pu_ht;
8583
8584
0
                ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
8585
8586
                /* IF AMP then each partitions can have diff wd ht */
8587
0
                inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
8588
0
                inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
8589
0
                inter_pu_ht <<= u1_is_422;
8590
                /* chroma mc func */
8591
0
                ihevce_chroma_inter_pred_pu(
8592
0
                    &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
8593
0
                if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
8594
0
                {
8595
                    /* 2Nx__ partion case */
8596
0
                    if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
8597
0
                    {
8598
0
                        pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
8599
0
                    }
8600
                    /* __x2N partion case */
8601
0
                    if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
8602
0
                    {
8603
0
                        pu1_cur_pred += inter_pu_wd;
8604
0
                    }
8605
0
                }
8606
0
            }
8607
0
        }
8608
0
    }
8609
0
    pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
8610
0
    pi2_chrm_deq_data =
8611
0
        &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
8612
0
    pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
8613
0
    pu1_chrm_old_ecd_data =
8614
0
        &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
8615
8616
    /* default value for cu coded flag */
8617
0
    u1_is_cu_coded = 0;
8618
8619
    /* If we are re-computing coeff, set sad to 0 and start accumulating */
8620
    /* else use the best cand. sad from RDOPT stage                    */
8621
0
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8622
0
    {
8623
        /*init of ssd of CU accuumulated over all TU*/
8624
0
        ps_best_cu_prms->u4_cu_sad = 0;
8625
8626
        /* reset the luma residual bits */
8627
0
        ps_best_cu_prms->u4_cu_luma_res_bits = 0;
8628
0
    }
8629
8630
0
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
8631
0
    {
8632
        /* reset the chroma residual bits */
8633
0
        ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
8634
0
    }
8635
8636
0
    if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
8637
0
       (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
8638
0
    {
8639
        /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
8640
        the quantized coefficients might be changed.
8641
        We are copying only those states which correspond to the header from the cabac state
8642
        of the previous CU, because the header is going to be recomputed for this condition*/
8643
0
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
8644
0
        memcpy(
8645
0
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
8646
0
            &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
8647
0
            IHEVC_CAB_COEFFX_PREFIX);
8648
8649
0
        if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
8650
0
        {
8651
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8652
0
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8653
0
                (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
8654
0
                 IHEVC_CAB_COEFFX_PREFIX),
8655
0
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8656
0
        }
8657
0
        else
8658
0
        {
8659
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8660
0
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8661
0
                (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8662
0
                      .s_cabac_ctxt.au1_ctxt_models[0] +
8663
0
                 IHEVC_CAB_COEFFX_PREFIX),
8664
0
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8665
0
        }
8666
0
        ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
8667
0
    }
8668
0
    else
8669
0
    {
8670
0
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
8671
0
    }
8672
8673
    /* Zero cbf tool is disabled for intra CUs */
8674
0
    if(PRED_MODE_INTRA == packed_pred_mode)
8675
0
    {
8676
#if ENABLE_ZERO_CBF_IN_INTRA
8677
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8678
#else
8679
0
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8680
0
#endif
8681
0
    }
8682
0
    else
8683
0
    {
8684
#if DISABLE_ZERO_ZBF_IN_INTER
8685
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8686
#else
8687
0
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8688
0
#endif
8689
0
    }
8690
8691
    /** Loop for all tu blocks in current cu and do reconstruction **/
8692
0
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8693
0
    {
8694
0
        tu_t *ps_tu;
8695
0
        WORD32 trans_size, num_4x4_in_tu;
8696
0
        WORD32 cbf, zero_rows, zero_cols;
8697
0
        WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
8698
0
        WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
8699
0
        WORD32 luma_pred_mode, chroma_pred_mode = 0;
8700
0
        UWORD8 au1_is_recon_available[2];
8701
8702
0
        ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
8703
8704
0
        u1_compute_spatial_ssd_luma = 0;
8705
0
        u1_compute_spatial_ssd_chroma = 0;
8706
8707
0
        trans_size = 1 << (ps_tu->b3_size + 2);
8708
0
        num_4x4_in_tu = (trans_size >> 2);
8709
0
        cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
8710
0
        cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
8711
8712
        /* populate the coeffs scan idx */
8713
0
        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
8714
8715
        /* get the current pos x and pos y in pixels */
8716
0
        cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
8717
0
        cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
8718
8719
        /* Update pointers based on the location */
8720
0
        pu1_cur_src = pu1_src + cu_pos_x_in_pix;
8721
0
        pu1_cur_src += (cu_pos_y_in_pix * src_strd);
8722
0
        pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
8723
0
        pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
8724
8725
0
        pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
8726
0
        pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
8727
8728
0
        pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
8729
0
        pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
8730
8731
0
        pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
8732
0
        pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
8733
0
                            (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
8734
8735
0
        pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
8736
0
        pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
8737
0
                             (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
8738
8739
0
        pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
8740
0
        pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
8741
0
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
8742
8743
0
        pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
8744
0
        pi2_cur_deq_data_chrm +=
8745
0
            ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
8746
8747
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
8748
0
        chrm_present_flag = 1; /* by default chroma present is set to 1*/
8749
8750
0
        if(4 == trans_size)
8751
0
        {
8752
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
8753
0
            if(0 != chrm_ctr)
8754
0
            {
8755
0
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
8756
0
            }
8757
8758
            /* increment the chrm ctr unconditionally */
8759
0
            chrm_ctr++;
8760
            /* after ctr reached 4 reset it */
8761
0
            if(4 == chrm_ctr)
8762
0
            {
8763
0
                chrm_ctr = 0;
8764
0
            }
8765
0
        }
8766
8767
        /**------------- Compute pred data if required --------------**/
8768
0
        if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
8769
0
        {
8770
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8771
0
            luma_pred_mode = *pu1_intra_pred_mode;
8772
8773
0
            if((ps_ctxt->i4_rc_pass == 1) ||
8774
0
               (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8775
0
            {
8776
0
                WORD32 nbr_flags;
8777
0
                WORD32 luma_pred_func_idx;
8778
0
                UWORD8 *pu1_left;
8779
0
                UWORD8 *pu1_top;
8780
0
                UWORD8 *pu1_top_left;
8781
0
                WORD32 left_strd;
8782
8783
                /* left cu boundary */
8784
0
                if(0 == cu_pos_x_in_pix)
8785
0
                {
8786
0
                    left_strd = ps_cu_nbr_prms->cu_left_stride;
8787
0
                    pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
8788
0
                }
8789
0
                else
8790
0
                {
8791
0
                    pu1_left = pu1_cur_luma_recon - 1;
8792
0
                    left_strd = recon_luma_strd;
8793
0
                }
8794
8795
                /* top cu boundary */
8796
0
                if(0 == cu_pos_y_in_pix)
8797
0
                {
8798
0
                    pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
8799
0
                }
8800
0
                else
8801
0
                {
8802
0
                    pu1_top = pu1_cur_luma_recon - recon_luma_strd;
8803
0
                }
8804
8805
                /* by default top left is set to cu top left */
8806
0
                pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
8807
8808
                /* top left based on position */
8809
0
                if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
8810
0
                {
8811
0
                    pu1_top_left = pu1_left - left_strd;
8812
0
                }
8813
0
                else if(0 != cu_pos_x_in_pix)
8814
0
                {
8815
0
                    pu1_top_left = pu1_top - 1;
8816
0
                }
8817
8818
                /* get the neighbour availability flags */
8819
0
                nbr_flags = ihevce_get_nbr_intra(
8820
0
                    &s_nbr,
8821
0
                    ps_ctxt->pu1_ctb_nbr_map,
8822
0
                    ps_ctxt->i4_nbr_map_strd,
8823
0
                    cu_pos_x_in_4x4,
8824
0
                    cu_pos_y_in_4x4,
8825
0
                    num_4x4_in_tu);
8826
8827
0
                if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
8828
0
                {
8829
                    /* copy the nbr flags for chroma reuse */
8830
0
                    if(4 != trans_size)
8831
0
                    {
8832
0
                        *pu4_nbr_flags = nbr_flags;
8833
0
                    }
8834
0
                    else if(1 == chrm_present_flag)
8835
0
                    {
8836
                        /* compute the avail flags assuming luma trans is 8x8 */
8837
                        /* get the neighbour availability flags */
8838
0
                        *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8839
0
                            ps_ctxt->pu1_ctb_nbr_map,
8840
0
                            ps_ctxt->i4_nbr_map_strd,
8841
0
                            cu_pos_x_in_4x4,
8842
0
                            cu_pos_y_in_4x4,
8843
0
                            (num_4x4_in_tu << 1),
8844
0
                            (num_4x4_in_tu << 1));
8845
0
                    }
8846
8847
                    /* call reference array substitution */
8848
0
                    ihevc_intra_pred_luma_ref_substitution_fptr(
8849
0
                        pu1_top_left,
8850
0
                        pu1_top,
8851
0
                        pu1_left,
8852
0
                        left_strd,
8853
0
                        trans_size,
8854
0
                        nbr_flags,
8855
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8856
0
                        1);
8857
8858
                    /* call reference filtering */
8859
0
                    ihevc_intra_pred_ref_filtering_fptr(
8860
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8861
0
                        trans_size,
8862
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8863
0
                        luma_pred_mode,
8864
0
                        ps_ctxt->i1_strong_intra_smoothing_enable_flag);
8865
8866
                    /* use the look up to get the function idx */
8867
0
                    luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
8868
8869
                    /* call the intra prediction function */
8870
0
                    ps_ctxt->apf_lum_ip[luma_pred_func_idx](
8871
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8872
0
                        1,
8873
0
                        pu1_cur_pred,
8874
0
                        pred_strd,
8875
0
                        trans_size,
8876
0
                        luma_pred_mode);
8877
0
                }
8878
0
            }
8879
0
            else if(
8880
0
                (1 == chrm_present_flag) &&
8881
0
                (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
8882
0
            {
8883
0
                WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
8884
8885
0
                if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
8886
0
                {
8887
0
                    temp_num_4x4_in_tu = num_4x4_in_tu << 1;
8888
0
                }
8889
8890
0
                *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8891
0
                    ps_ctxt->pu1_ctb_nbr_map,
8892
0
                    ps_ctxt->i4_nbr_map_strd,
8893
0
                    cu_pos_x_in_4x4,
8894
0
                    cu_pos_y_in_4x4,
8895
0
                    temp_num_4x4_in_tu,
8896
0
                    temp_num_4x4_in_tu);
8897
0
            }
8898
8899
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8900
0
            chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
8901
0
        }
8902
8903
0
        if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8904
0
        {
8905
0
            WORD32 temp_bits;
8906
0
            LWORD64 temp_cost;
8907
0
            UWORD32 u4_tu_sad;
8908
0
            WORD32 perform_sbh, perform_rdoq;
8909
8910
0
            if(PRED_MODE_INTRA == packed_pred_mode)
8911
0
            {
8912
                /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
8913
0
                if(trans_size < 16)
8914
0
                {
8915
                    /* for modes from 22 upto 30 horizontal scan is used */
8916
0
                    if((luma_pred_mode > 21) && (luma_pred_mode < 31))
8917
0
                    {
8918
0
                        ps_ctxt->i4_scan_idx = SCAN_HORZ;
8919
0
                    }
8920
                    /* for modes from 6 upto 14 horizontal scan is used */
8921
0
                    else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
8922
0
                    {
8923
0
                        ps_ctxt->i4_scan_idx = SCAN_VERT;
8924
0
                    }
8925
0
                }
8926
0
            }
8927
8928
            /* RDOPT copy States :  TU init (best until prev TU) to current */
8929
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8930
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8931
0
                        .s_cabac_ctxt.au1_ctxt_models[0] +
8932
0
                    IHEVC_CAB_COEFFX_PREFIX,
8933
0
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
8934
0
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
8935
8936
0
            if(ps_prms->u1_recompute_sbh_and_rdoq)
8937
0
            {
8938
0
                perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
8939
0
                perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
8940
0
            }
8941
0
            else
8942
0
            {
8943
                /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
8944
0
                perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
8945
                /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
8946
                we would have to do RDOQ again.*/
8947
0
                perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
8948
0
            }
8949
8950
#if DISABLE_RDOQ_INTRA
8951
            if(PRED_MODE_INTRA == packed_pred_mode)
8952
            {
8953
                perform_rdoq = 0;
8954
            }
8955
#endif
8956
            /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
8957
            so that all candidates and best candidate are quantized with same rounding factor  */
8958
0
            if(1 == perform_rdoq)
8959
0
            {
8960
0
                ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
8961
0
            }
8962
8963
0
            cbf = ihevce_t_q_iq_ssd_scan_fxn(
8964
0
                ps_ctxt,
8965
0
                pu1_cur_pred,
8966
0
                pred_strd,
8967
0
                pu1_cur_src,
8968
0
                src_strd,
8969
0
                pi2_cur_deq_data,
8970
0
                cu_size, /*deq_data stride is cu_size*/
8971
0
                pu1_cur_luma_recon,
8972
0
                recon_luma_strd,
8973
0
                pu1_final_ecd_data,
8974
0
                pu1_csbf_buf,
8975
0
                csbf_strd,
8976
0
                trans_size,
8977
0
                packed_pred_mode,
8978
0
                &temp_cost,
8979
0
                &num_bytes,
8980
0
                &temp_bits,
8981
0
                &u4_tu_sad,
8982
0
                &zero_cols,
8983
0
                &zero_rows,
8984
0
                &au1_is_recon_available[0],
8985
0
                perform_rdoq,  //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
8986
0
                perform_sbh,
8987
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
8988
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
8989
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
8990
0
                                          (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
8991
0
                                             100.0,
8992
0
                ps_prms->u1_is_cu_noisy,
8993
0
#endif
8994
0
                u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
8995
0
                1 /*early cbf*/
8996
0
            );  //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
8997
8998
            /* Accumulate luma residual bits */
8999
0
            ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
9000
9001
            /* RDOPT copy States :  New updated after curr TU to TU init */
9002
0
            if(0 != cbf)
9003
0
            {
9004
                /* update to new state only if CBF is non zero */
9005
0
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9006
0
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9007
0
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9008
0
                            .s_cabac_ctxt.au1_ctxt_models[0] +
9009
0
                        IHEVC_CAB_COEFFX_PREFIX,
9010
0
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9011
0
            }
9012
9013
            /* accumulate the TU sad into cu sad */
9014
0
            ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
9015
0
            ps_tu->b1_y_cbf = cbf;
9016
0
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
9017
9018
            /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
9019
0
            if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
9020
0
            {
9021
0
                WORD32 num_4x4_in_cu = u1_cu_size >> 2;
9022
0
                nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
9023
0
                ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
9024
0
                ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
9025
                /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
9026
0
                ps_cur_nbr_4x4->b1_y_cbf = cbf;
9027
                /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
9028
0
                ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
9029
                /* Qp and cbf are stored for the all 4x4 in TU */
9030
0
                {
9031
0
                    WORD32 i, j;
9032
0
                    nbr_4x4_t *ps_tmp_4x4;
9033
0
                    ps_tmp_4x4 = ps_cur_nbr_4x4;
9034
9035
0
                    for(i = 0; i < num_4x4_in_tu; i++)
9036
0
                    {
9037
0
                        for(j = 0; j < num_4x4_in_tu; j++)
9038
0
                        {
9039
0
                            ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
9040
0
                            ps_tmp_4x4[j].b1_y_cbf = cbf;
9041
0
                        }
9042
                        /* row level update*/
9043
0
                        ps_tmp_4x4 += num_4x4_in_cu;
9044
0
                    }
9045
0
                }
9046
0
            }
9047
0
        }
9048
0
        else
9049
0
        {
9050
0
            zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
9051
0
            zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
9052
9053
0
            if(ps_prms->u1_will_cabac_state_change)
9054
0
            {
9055
0
                num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
9056
0
            }
9057
0
            else
9058
0
            {
9059
0
                num_bytes = 0;
9060
0
            }
9061
9062
            /* copy luma ecd data to final buffer */
9063
0
            memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
9064
9065
0
            pu1_old_ecd_data += num_bytes;
9066
9067
0
            au1_is_recon_available[0] = 0;
9068
0
        }
9069
9070
        /**-------- Compute Recon data (Do IT & Recon) : Luma  -----------**/
9071
0
        if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9072
0
           (!u1_compute_spatial_ssd_luma ||
9073
0
            (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
9074
0
        {
9075
0
            if(!ps_recon_datastore->u1_is_lumaRecon_available ||
9076
0
               (ps_recon_datastore->u1_is_lumaRecon_available &&
9077
0
                (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
9078
0
            {
9079
0
                ihevce_it_recon_fxn(
9080
0
                    ps_ctxt,
9081
0
                    pi2_cur_deq_data,
9082
0
                    cu_size,
9083
0
                    pu1_cur_pred,
9084
0
                    pred_strd,
9085
0
                    pu1_cur_luma_recon,
9086
0
                    recon_luma_strd,
9087
0
                    pu1_final_ecd_data,
9088
0
                    trans_size,
9089
0
                    packed_pred_mode,
9090
0
                    ps_tu->b1_y_cbf,
9091
0
                    zero_cols,
9092
0
                    zero_rows);
9093
0
            }
9094
0
            else if(
9095
0
                ps_recon_datastore->u1_is_lumaRecon_available &&
9096
0
                (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
9097
0
            {
9098
0
                UWORD8 *pu1_recon_src =
9099
0
                    ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
9100
0
                         [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
9101
0
                    cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
9102
9103
0
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
9104
0
                    pu1_cur_luma_recon,
9105
0
                    recon_luma_strd,
9106
0
                    pu1_recon_src,
9107
0
                    ps_recon_datastore->i4_lumaRecon_stride,
9108
0
                    trans_size,
9109
0
                    trans_size);
9110
0
            }
9111
0
        }
9112
9113
0
        if(ps_prms->u1_will_cabac_state_change)
9114
0
        {
9115
0
            ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
9116
0
        }
9117
9118
0
        pu1_final_ecd_data += num_bytes;
9119
        /* update total bytes consumed */
9120
0
        total_bytes += num_bytes;
9121
9122
0
        u1_is_cu_coded |= ps_tu->b1_y_cbf;
9123
9124
        /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
9125
0
        if(1 == chrm_present_flag)
9126
0
        {
9127
0
            pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
9128
0
            pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
9129
0
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
9130
9131
0
            pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
9132
0
            pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
9133
0
                                 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
9134
9135
0
            pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
9136
0
            pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
9137
0
                                    (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
9138
9139
0
            pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
9140
0
            pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
9141
0
                                     (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
9142
9143
0
            if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
9144
0
               (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
9145
0
               (PRED_MODE_INTRA != packed_pred_mode))
9146
0
            {
9147
0
                WORD32 i4_num_bytes;
9148
0
                UWORD8 *pu1_chroma_pred;
9149
0
                UWORD8 *pu1_chroma_recon;
9150
0
                WORD16 *pi2_chroma_deq;
9151
0
                UWORD32 u4_zero_col;
9152
0
                UWORD32 u4_zero_row;
9153
9154
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9155
0
                {
9156
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9157
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9158
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9159
9160
0
                    if(0 == u1_is_422)
9161
0
                    {
9162
0
                        i4_subtu_pos_y >>= 1;
9163
0
                    }
9164
9165
0
                    pu1_chroma_pred =
9166
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9167
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9168
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9169
0
                    pi2_chroma_deq =
9170
0
                        pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
9171
9172
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9173
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9174
9175
0
                    if(ps_prms->u1_will_cabac_state_change)
9176
0
                    {
9177
0
                        i4_num_bytes =
9178
0
                            ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9179
0
                    }
9180
0
                    else
9181
0
                    {
9182
0
                        i4_num_bytes = 0;
9183
0
                    }
9184
9185
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9186
9187
0
                    pu1_old_ecd_data += i4_num_bytes;
9188
9189
0
                    au1_is_recon_available[U_PLANE] = 0;
9190
9191
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9192
0
                       (!u1_compute_spatial_ssd_chroma ||
9193
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9194
0
                    {
9195
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9196
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9197
0
                            (UCHAR_MAX ==
9198
0
                             ps_recon_datastore
9199
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9200
0
                        {
9201
0
                            ihevce_chroma_it_recon_fxn(
9202
0
                                ps_ctxt,
9203
0
                                pi2_chroma_deq,
9204
0
                                cu_size,
9205
0
                                pu1_chroma_pred,
9206
0
                                pred_chrm_strd,
9207
0
                                pu1_chroma_recon,
9208
0
                                recon_chrma_strd,
9209
0
                                pu1_final_ecd_data,
9210
0
                                chroma_trans_size,
9211
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9212
0
                                u4_zero_col,
9213
0
                                u4_zero_row,
9214
0
                                U_PLANE);
9215
0
                        }
9216
0
                        else if(
9217
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9218
0
                            (UCHAR_MAX !=
9219
0
                             ps_recon_datastore
9220
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9221
0
                        {
9222
0
                            UWORD8 *pu1_recon_src =
9223
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9224
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9225
0
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9226
0
                                i4_subtu_pos_x +
9227
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9228
9229
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9230
0
                                pu1_recon_src,
9231
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9232
0
                                pu1_chroma_recon,
9233
0
                                recon_chrma_strd,
9234
0
                                chroma_trans_size,
9235
0
                                chroma_trans_size,
9236
0
                                U_PLANE);
9237
0
                        }
9238
0
                    }
9239
9240
0
                    u1_is_cu_coded |=
9241
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9242
9243
0
                    pu1_final_ecd_data += i4_num_bytes;
9244
0
                    total_bytes += i4_num_bytes;
9245
0
                }
9246
9247
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9248
0
                {
9249
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9250
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9251
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9252
9253
0
                    if(0 == u1_is_422)
9254
0
                    {
9255
0
                        i4_subtu_pos_y >>= 1;
9256
0
                    }
9257
9258
0
                    pu1_chroma_pred =
9259
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9260
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9261
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9262
0
                    pi2_chroma_deq = pi2_cur_deq_data_chrm +
9263
0
                                     (i4_subtu_idx * chroma_trans_size * cu_size) +
9264
0
                                     chroma_trans_size;
9265
9266
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9267
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9268
9269
0
                    if(ps_prms->u1_will_cabac_state_change)
9270
0
                    {
9271
0
                        i4_num_bytes =
9272
0
                            ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9273
0
                    }
9274
0
                    else
9275
0
                    {
9276
0
                        i4_num_bytes = 0;
9277
0
                    }
9278
9279
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9280
9281
0
                    pu1_old_ecd_data += i4_num_bytes;
9282
9283
0
                    au1_is_recon_available[V_PLANE] = 0;
9284
9285
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9286
0
                       (!u1_compute_spatial_ssd_chroma ||
9287
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9288
0
                    {
9289
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9290
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9291
0
                            (UCHAR_MAX ==
9292
0
                             ps_recon_datastore
9293
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9294
0
                        {
9295
0
                            ihevce_chroma_it_recon_fxn(
9296
0
                                ps_ctxt,
9297
0
                                pi2_chroma_deq,
9298
0
                                cu_size,
9299
0
                                pu1_chroma_pred,
9300
0
                                pred_chrm_strd,
9301
0
                                pu1_chroma_recon,
9302
0
                                recon_chrma_strd,
9303
0
                                pu1_final_ecd_data,
9304
0
                                chroma_trans_size,
9305
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9306
0
                                u4_zero_col,
9307
0
                                u4_zero_row,
9308
0
                                V_PLANE);
9309
0
                        }
9310
0
                        else if(
9311
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9312
0
                            (UCHAR_MAX !=
9313
0
                             ps_recon_datastore
9314
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9315
0
                        {
9316
0
                            UWORD8 *pu1_recon_src =
9317
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9318
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9319
0
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9320
0
                                i4_subtu_pos_x +
9321
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9322
9323
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9324
0
                                pu1_recon_src,
9325
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9326
0
                                pu1_chroma_recon,
9327
0
                                recon_chrma_strd,
9328
0
                                chroma_trans_size,
9329
0
                                chroma_trans_size,
9330
0
                                V_PLANE);
9331
0
                        }
9332
0
                    }
9333
9334
0
                    u1_is_cu_coded |=
9335
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9336
9337
0
                    pu1_final_ecd_data += i4_num_bytes;
9338
0
                    total_bytes += i4_num_bytes;
9339
0
                }
9340
0
            }
9341
0
            else
9342
0
            {
9343
0
                WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
9344
9345
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9346
0
                {
9347
0
                    WORD32 cb_cbf, cr_cbf;
9348
0
                    WORD32 cb_num_bytes, cr_num_bytes;
9349
9350
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9351
9352
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9353
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9354
9355
0
                    if(0 == u1_is_422)
9356
0
                    {
9357
0
                        i4_subtu_pos_y >>= 1;
9358
0
                    }
9359
9360
0
                    pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
9361
0
                    pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9362
0
                    pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9363
0
                    pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
9364
9365
0
                    if((PRED_MODE_INTRA == packed_pred_mode) &&
9366
0
                       (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
9367
0
                    {
9368
0
                        WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
9369
0
                        UWORD8 *pu1_left_chrm;
9370
0
                        UWORD8 *pu1_top_chrm;
9371
0
                        UWORD8 *pu1_top_left_chrm;
9372
9373
0
                        nbr_flags = ihevce_get_intra_chroma_tu_nbr(
9374
0
                            *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
9375
9376
                        /* left cu boundary */
9377
0
                        if(0 == i4_subtu_pos_x)
9378
0
                        {
9379
0
                            left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
9380
0
                            pu1_left_chrm =
9381
0
                                ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
9382
0
                        }
9383
0
                        else
9384
0
                        {
9385
0
                            pu1_left_chrm = pu1_cur_chroma_recon - 2;
9386
0
                            left_strd_chrm = recon_chrma_strd;
9387
0
                        }
9388
9389
                        /* top cu boundary */
9390
0
                        if(0 == i4_subtu_pos_y)
9391
0
                        {
9392
0
                            pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
9393
0
                        }
9394
0
                        else
9395
0
                        {
9396
0
                            pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
9397
0
                        }
9398
9399
                        /* by default top left is set to cu top left */
9400
0
                        pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
9401
9402
                        /* top left based on position */
9403
0
                        if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
9404
0
                        {
9405
0
                            pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
9406
0
                        }
9407
0
                        else if(0 != i4_subtu_pos_x)
9408
0
                        {
9409
0
                            pu1_top_left_chrm = pu1_top_chrm - 2;
9410
0
                        }
9411
9412
                        /* call the chroma reference array substitution */
9413
0
                        ihevc_intra_pred_chroma_ref_substitution_fptr(
9414
0
                            pu1_top_left_chrm,
9415
0
                            pu1_top_chrm,
9416
0
                            pu1_left_chrm,
9417
0
                            left_strd_chrm,
9418
0
                            chroma_trans_size,
9419
0
                            nbr_flags,
9420
0
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9421
0
                            1);
9422
9423
                        /* use the look up to get the function idx */
9424
0
                        chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
9425
9426
                        /* call the intra prediction function */
9427
0
                        ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
9428
0
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9429
0
                            1,
9430
0
                            pu1_cur_pred_chrm,
9431
0
                            pred_chrm_strd,
9432
0
                            chroma_trans_size,
9433
0
                            chroma_pred_mode);
9434
0
                    }
9435
9436
                    /**---------- Compute iq&coeff data if required : Chroma ------------**/
9437
0
                    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
9438
0
                    {
9439
0
                        WORD32 perform_sbh, perform_rdoq, temp_bits;
9440
9441
0
                        if(ps_prms->u1_recompute_sbh_and_rdoq)
9442
0
                        {
9443
0
                            perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
9444
0
                            perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
9445
0
                        }
9446
0
                        else
9447
0
                        {
9448
                            /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
9449
0
                            perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
9450
                            /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
9451
                        we would have to do RDOQ again.*/
9452
0
                            perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
9453
0
                        }
9454
9455
                        /* populate the coeffs scan idx */
9456
0
                        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
9457
9458
0
                        if(PRED_MODE_INTRA == packed_pred_mode)
9459
0
                        {
9460
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
9461
0
                            if(4 == chroma_trans_size)
9462
0
                            {
9463
                                /* for modes from 22 upto 30 horizontal scan is used */
9464
0
                                if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
9465
0
                                {
9466
0
                                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
9467
0
                                }
9468
                                /* for modes from 6 upto 14 horizontal scan is used */
9469
0
                                else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
9470
0
                                {
9471
0
                                    ps_ctxt->i4_scan_idx = SCAN_VERT;
9472
0
                                }
9473
0
                            }
9474
0
                        }
9475
9476
#if DISABLE_RDOQ_INTRA
9477
                        if(PRED_MODE_INTRA == packed_pred_mode)
9478
                        {
9479
                            perform_rdoq = 0;
9480
                        }
9481
#endif
9482
9483
                        /* RDOPT copy States :  TU init (best until prev TU) to current */
9484
0
                        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9485
0
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9486
0
                                    .s_cabac_ctxt.au1_ctxt_models[0] +
9487
0
                                IHEVC_CAB_COEFFX_PREFIX,
9488
0
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9489
0
                            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9490
9491
0
                        ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
9492
                        /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
9493
                    so that all candidates and best candidate are quantized with same rounding factor  */
9494
0
                        if(1 == perform_rdoq)
9495
0
                        {
9496
0
                            ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
9497
0
                        }
9498
9499
0
                        if(!ps_best_cu_prms->u1_skip_flag ||
9500
0
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9501
0
                        {
9502
                            /* Cb */
9503
0
                            cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9504
0
                                ps_ctxt,
9505
0
                                pu1_cur_pred_chrm,
9506
0
                                pred_chrm_strd,
9507
0
                                pu1_cur_src_chrm,
9508
0
                                src_chrm_strd,
9509
0
                                pi2_cur_deq_data_chrm,
9510
0
                                cu_size,
9511
0
                                pu1_chrm_recon,
9512
0
                                recon_chrma_strd,
9513
0
                                pu1_final_ecd_data,
9514
0
                                pu1_csbf_buf,
9515
0
                                csbf_strd,
9516
0
                                chroma_trans_size,
9517
0
                                ps_ctxt->i4_scan_idx,
9518
0
                                (PRED_MODE_INTRA == packed_pred_mode),
9519
0
                                &cb_num_bytes,
9520
0
                                &temp_bits,
9521
0
                                &cb_zero_col,
9522
0
                                &cb_zero_row,
9523
0
                                &au1_is_recon_available[U_PLANE],
9524
0
                                perform_sbh,
9525
0
                                perform_rdoq,
9526
0
                                &i8_ssd,
9527
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9528
0
                                !ps_ctxt->u1_is_refPic
9529
0
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9530
0
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9531
0
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9532
0
                                          100.0,
9533
0
                                ps_prms->u1_is_cu_noisy,
9534
0
#endif
9535
0
                                ps_best_cu_prms->u1_skip_flag &&
9536
0
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9537
0
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9538
0
                                                              : FREQUENCY_DOMAIN_SSD,
9539
0
                                U_PLANE);
9540
0
                        }
9541
0
                        else
9542
0
                        {
9543
0
                            cb_cbf = 0;
9544
0
                            temp_bits = 0;
9545
0
                            cb_num_bytes = 0;
9546
0
                            au1_is_recon_available[U_PLANE] = 0;
9547
0
                            cb_zero_col = 0;
9548
0
                            cb_zero_row = 0;
9549
0
                        }
9550
9551
                        /* Accumulate chroma residual bits */
9552
0
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9553
9554
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9555
0
                        if(0 != cb_cbf)
9556
0
                        {
9557
0
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9558
0
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9559
0
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9560
0
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9561
0
                                    IHEVC_CAB_COEFFX_PREFIX,
9562
0
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9563
0
                        }
9564
                        /* RDOPT copy States :  Restoring back the Cb init state to Cr */
9565
0
                        else
9566
0
                        {
9567
0
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9568
0
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9569
0
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9570
0
                                    IHEVC_CAB_COEFFX_PREFIX,
9571
0
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9572
0
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9573
0
                        }
9574
9575
0
                        if(!ps_best_cu_prms->u1_skip_flag ||
9576
0
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9577
0
                        {
9578
                            /* Cr */
9579
0
                            cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9580
0
                                ps_ctxt,
9581
0
                                pu1_cur_pred_chrm,
9582
0
                                pred_chrm_strd,
9583
0
                                pu1_cur_src_chrm,
9584
0
                                src_chrm_strd,
9585
0
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9586
0
                                cu_size,
9587
0
                                pu1_chrm_recon,
9588
0
                                recon_chrma_strd,
9589
0
                                pu1_final_ecd_data + cb_num_bytes,
9590
0
                                pu1_csbf_buf,
9591
0
                                csbf_strd,
9592
0
                                chroma_trans_size,
9593
0
                                ps_ctxt->i4_scan_idx,
9594
0
                                (PRED_MODE_INTRA == packed_pred_mode),
9595
0
                                &cr_num_bytes,
9596
0
                                &temp_bits,
9597
0
                                &cr_zero_col,
9598
0
                                &cr_zero_row,
9599
0
                                &au1_is_recon_available[V_PLANE],
9600
0
                                perform_sbh,
9601
0
                                perform_rdoq,
9602
0
                                &i8_ssd,
9603
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9604
0
                                !ps_ctxt->u1_is_refPic
9605
0
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9606
0
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9607
0
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9608
0
                                          100.0,
9609
0
                                ps_prms->u1_is_cu_noisy,
9610
0
#endif
9611
0
                                ps_best_cu_prms->u1_skip_flag &&
9612
0
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9613
0
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9614
0
                                                              : FREQUENCY_DOMAIN_SSD,
9615
0
                                V_PLANE);
9616
0
                        }
9617
0
                        else
9618
0
                        {
9619
0
                            cr_cbf = 0;
9620
0
                            temp_bits = 0;
9621
0
                            cr_num_bytes = 0;
9622
0
                            au1_is_recon_available[V_PLANE] = 0;
9623
0
                            cr_zero_col = 0;
9624
0
                            cr_zero_row = 0;
9625
0
                        }
9626
9627
                        /* Accumulate chroma residual bits */
9628
0
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9629
9630
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9631
0
                        if(0 != cr_cbf)
9632
0
                        {
9633
0
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9634
0
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9635
0
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9636
0
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9637
0
                                    IHEVC_CAB_COEFFX_PREFIX,
9638
0
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9639
0
                        }
9640
9641
0
                        if(0 == i4_subtu_idx)
9642
0
                        {
9643
0
                            ps_tu->b1_cb_cbf = cb_cbf;
9644
0
                            ps_tu->b1_cr_cbf = cr_cbf;
9645
0
                        }
9646
0
                        else
9647
0
                        {
9648
0
                            ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
9649
0
                            ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
9650
0
                        }
9651
0
                    }
9652
0
                    else
9653
0
                    {
9654
0
                        cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9655
0
                        cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9656
0
                        cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9657
0
                        cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9658
9659
0
                        if(ps_prms->u1_will_cabac_state_change)
9660
0
                        {
9661
0
                            cb_num_bytes =
9662
0
                                ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9663
0
                        }
9664
0
                        else
9665
0
                        {
9666
0
                            cb_num_bytes = 0;
9667
0
                        }
9668
9669
0
                        if(ps_prms->u1_will_cabac_state_change)
9670
0
                        {
9671
0
                            cr_num_bytes =
9672
0
                                ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9673
0
                        }
9674
0
                        else
9675
0
                        {
9676
0
                            cr_num_bytes = 0;
9677
0
                        }
9678
9679
                        /* copy cb ecd data to final buffer */
9680
0
                        memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
9681
9682
0
                        pu1_chrm_old_ecd_data += cb_num_bytes;
9683
9684
                        /* copy cb ecd data to final buffer */
9685
0
                        memcpy(
9686
0
                            (pu1_final_ecd_data + cb_num_bytes),
9687
0
                            pu1_chrm_old_ecd_data,
9688
0
                            cr_num_bytes);
9689
9690
0
                        pu1_chrm_old_ecd_data += cr_num_bytes;
9691
9692
0
                        au1_is_recon_available[U_PLANE] = 0;
9693
0
                        au1_is_recon_available[V_PLANE] = 0;
9694
0
                    }
9695
9696
                    /**-------- Compute Recon data (Do IT & Recon) : Chroma  -----------**/
9697
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9698
0
                       (!u1_compute_spatial_ssd_chroma ||
9699
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9700
0
                    {
9701
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9702
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9703
0
                            (UCHAR_MAX ==
9704
0
                             ps_recon_datastore
9705
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9706
0
                        {
9707
0
                            ihevce_chroma_it_recon_fxn(
9708
0
                                ps_ctxt,
9709
0
                                pi2_cur_deq_data_chrm,
9710
0
                                cu_size,
9711
0
                                pu1_cur_pred_chrm,
9712
0
                                pred_chrm_strd,
9713
0
                                pu1_cur_chroma_recon,
9714
0
                                recon_chrma_strd,
9715
0
                                pu1_final_ecd_data,
9716
0
                                chroma_trans_size,
9717
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9718
0
                                cb_zero_col,
9719
0
                                cb_zero_row,
9720
0
                                U_PLANE);
9721
0
                        }
9722
0
                        else if(
9723
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9724
0
                            (UCHAR_MAX !=
9725
0
                             ps_recon_datastore
9726
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9727
0
                        {
9728
0
                            UWORD8 *pu1_recon_src =
9729
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9730
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9731
0
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9732
0
                                i4_subtu_pos_x +
9733
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9734
9735
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9736
0
                                pu1_recon_src,
9737
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9738
0
                                pu1_cur_chroma_recon,
9739
0
                                recon_chrma_strd,
9740
0
                                chroma_trans_size,
9741
0
                                chroma_trans_size,
9742
0
                                U_PLANE);
9743
0
                        }
9744
0
                    }
9745
9746
0
                    u1_is_cu_coded |=
9747
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9748
9749
0
                    if(ps_prms->u1_will_cabac_state_change)
9750
0
                    {
9751
0
                        ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
9752
0
                    }
9753
9754
0
                    pu1_final_ecd_data += cb_num_bytes;
9755
                    /* update total bytes consumed */
9756
0
                    total_bytes += cb_num_bytes;
9757
9758
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9759
0
                       (!u1_compute_spatial_ssd_chroma ||
9760
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9761
0
                    {
9762
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9763
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9764
0
                            (UCHAR_MAX ==
9765
0
                             ps_recon_datastore
9766
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9767
0
                        {
9768
0
                            ihevce_chroma_it_recon_fxn(
9769
0
                                ps_ctxt,
9770
0
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9771
0
                                cu_size,
9772
0
                                pu1_cur_pred_chrm,
9773
0
                                pred_chrm_strd,
9774
0
                                pu1_cur_chroma_recon,
9775
0
                                recon_chrma_strd,
9776
0
                                pu1_final_ecd_data,
9777
0
                                chroma_trans_size,
9778
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9779
0
                                cr_zero_col,
9780
0
                                cr_zero_row,
9781
0
                                V_PLANE);
9782
0
                        }
9783
0
                        else if(
9784
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9785
0
                            (UCHAR_MAX !=
9786
0
                             ps_recon_datastore
9787
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9788
0
                        {
9789
0
                            UWORD8 *pu1_recon_src =
9790
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9791
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9792
0
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9793
0
                                i4_subtu_pos_x +
9794
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9795
9796
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9797
0
                                pu1_recon_src,
9798
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9799
0
                                pu1_cur_chroma_recon,
9800
0
                                recon_chrma_strd,
9801
0
                                chroma_trans_size,
9802
0
                                chroma_trans_size,
9803
0
                                V_PLANE);
9804
0
                        }
9805
0
                    }
9806
9807
0
                    u1_is_cu_coded |=
9808
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9809
9810
0
                    if(ps_prms->u1_will_cabac_state_change)
9811
0
                    {
9812
0
                        ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
9813
0
                    }
9814
9815
0
                    pu1_final_ecd_data += cr_num_bytes;
9816
                    /* update total bytes consumed */
9817
0
                    total_bytes += cr_num_bytes;
9818
0
                }
9819
0
            }
9820
0
        }
9821
0
        else
9822
0
        {
9823
0
            ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
9824
0
            ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
9825
0
            ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
9826
0
            ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
9827
0
            ps_tu->b1_cb_cbf = 0;
9828
0
            ps_tu->b1_cr_cbf = 0;
9829
0
            ps_tu->b1_cb_cbf_subtu1 = 0;
9830
0
            ps_tu->b1_cr_cbf_subtu1 = 0;
9831
0
        }
9832
9833
        /* Update to next TU */
9834
0
        ps_tu_enc_loop++;
9835
0
        ps_tu_enc_loop_temp_prms++;
9836
9837
0
        pu4_nbr_flags++;
9838
0
        pu1_intra_pred_mode++;
9839
9840
        /*Do not set the nbr map for last pu in cu */
9841
0
        if((num_tu_in_cu - 1) != ctr)
9842
0
        {
9843
            /* set the neighbour map to 1 */
9844
0
            ihevce_set_nbr_map(
9845
0
                ps_ctxt->pu1_ctb_nbr_map,
9846
0
                ps_ctxt->i4_nbr_map_strd,
9847
0
                cu_pos_x_in_4x4,
9848
0
                cu_pos_y_in_4x4,
9849
0
                (trans_size >> 2),
9850
0
                1);
9851
0
        }
9852
0
    }
9853
9854
0
    if(ps_prms->u1_will_cabac_state_change)
9855
0
    {
9856
0
        ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
9857
9858
        /* Modify skip flag, if luma is skipped & Chroma is coded */
9859
0
        if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
9860
0
        {
9861
0
            ps_best_cu_prms->u1_skip_flag = 0;
9862
0
        }
9863
0
    }
9864
9865
    /* during chroma evaluation if skip decision was over written     */
9866
    /* then the current skip candidate is set to a non skip candidate */
9867
0
    if(PRED_MODE_INTRA != packed_pred_mode)
9868
0
    {
9869
0
        ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
9870
0
    }
9871
9872
    /**------------- Compute header data if required --------------**/
9873
0
    if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
9874
0
    {
9875
0
        WORD32 cbf_bits;
9876
0
        WORD32 cu_bits;
9877
0
        WORD32 unit_4x4_size = cu_size >> 2;
9878
9879
        /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
9880
        be copied as the base reference for the next cu
9881
        Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
9882
        luma and chroma are being reevaluated*/
9883
0
        COPY_CABAC_STATES(
9884
0
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9885
0
                 .s_cabac_ctxt.au1_ctxt_models[0],
9886
0
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
9887
0
            IHEVC_CAB_CTXT_END);
9888
9889
        /* get the neighbour availability flags for current cu  */
9890
0
        ihevce_get_only_nbr_flag(
9891
0
            &s_nbr,
9892
0
            ps_ctxt->pu1_ctb_nbr_map,
9893
0
            ps_ctxt->i4_nbr_map_strd,
9894
0
            (cu_pos_x << 1),
9895
0
            (cu_pos_y << 1),
9896
0
            unit_4x4_size,
9897
0
            unit_4x4_size);
9898
9899
0
        cu_bits = ihevce_entropy_rdo_encode_cu(
9900
0
            &ps_ctxt->s_rdopt_entropy_ctxt,
9901
0
            ps_best_cu_prms,
9902
0
            cu_pos_x,
9903
0
            cu_pos_y,
9904
0
            cu_size,
9905
0
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
9906
0
                                           : s_nbr.u1_top_avail,
9907
0
            s_nbr.u1_left_avail,
9908
0
            (pu1_final_ecd_data - total_bytes),
9909
0
            &cbf_bits);
9910
9911
        /* cbf bits are excluded from header bits, instead considered as texture bits */
9912
0
        ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
9913
0
        ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
9914
0
    }
9915
9916
0
    if(ps_prms->u1_will_cabac_state_change)
9917
0
    {
9918
0
        ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
9919
0
    }
9920
0
}
9921
9922
/*!
9923
******************************************************************************
9924
* \if Function name : ihevce_set_eval_flags \endif
9925
*
9926
* \brief
9927
*    Function which decides which eval flags have to be set based on present
9928
*    and RDOQ conditions
9929
*
9930
* \param[in] ps_ctxt : encoder ctxt pointer
9931
* \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
9932
*
9933
* \return
9934
*    None
9935
*
9936
* \author
9937
*  Ittiam
9938
*
9939
*****************************************************************************
9940
*/
9941
void ihevce_set_eval_flags(
9942
    ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
9943
0
{
9944
0
    WORD32 count = 0;
9945
9946
0
    ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
9947
9948
0
    ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
9949
0
        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
9950
9951
0
    if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
9952
0
    {
9953
0
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
9954
0
    }
9955
0
    else
9956
0
    {
9957
0
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
9958
0
    }
9959
9960
0
    if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
9961
0
       (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
9962
0
    {
9963
        /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
9964
        RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
9965
        for the current CU will change. Therefore, we need to reevaluate the pred data*/
9966
0
        if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
9967
0
           (ps_enc_loop_bestprms->u1_intra_flag == 1))
9968
0
        {
9969
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
9970
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
9971
0
        }
9972
0
        if(ps_enc_loop_bestprms->u1_skip_flag == 1)
9973
0
        {
9974
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9975
0
            {
9976
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9977
0
                    .b1_eval_luma_iq_and_coeff_data = 0;
9978
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9979
0
                    .b1_eval_chroma_iq_and_coeff_data = 0;
9980
0
            }
9981
0
        }
9982
0
        else
9983
0
        {
9984
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9985
0
            {
9986
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9987
0
                    .b1_eval_luma_iq_and_coeff_data = 1;
9988
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9989
0
                    .b1_eval_chroma_iq_and_coeff_data = 1;
9990
0
            }
9991
0
        }
9992
0
    }
9993
0
    else
9994
0
    {
9995
0
        switch(ps_ctxt->i4_quality_preset)
9996
0
        {
9997
0
        case IHEVCE_QUALITY_P0:
9998
0
        case IHEVCE_QUALITY_P2:
9999
0
        case IHEVCE_QUALITY_P3:
10000
0
        {
10001
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10002
0
            {
10003
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10004
0
                    .b1_eval_luma_iq_and_coeff_data = 0;
10005
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10006
0
                    .b1_eval_chroma_iq_and_coeff_data =
10007
0
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10008
0
            }
10009
10010
0
            break;
10011
0
        }
10012
0
        case IHEVCE_QUALITY_P4:
10013
0
        case IHEVCE_QUALITY_P5:
10014
0
        {
10015
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10016
0
            {
10017
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10018
0
                    .b1_eval_luma_iq_and_coeff_data = 0;
10019
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10020
0
                    .b1_eval_chroma_iq_and_coeff_data =
10021
0
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10022
0
            }
10023
10024
0
            break;
10025
0
        }
10026
0
        case IHEVCE_QUALITY_P6:
10027
0
        {
10028
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10029
0
            {
10030
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10031
0
                    .b1_eval_luma_iq_and_coeff_data = 0;
10032
0
#if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
10033
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10034
0
                    .b1_eval_chroma_iq_and_coeff_data =
10035
0
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10036
#else
10037
                if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
10038
                   (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
10039
                {
10040
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10041
                        .b1_eval_chroma_iq_and_coeff_data =
10042
                        ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
10043
                }
10044
                else
10045
                {
10046
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10047
                        .b1_eval_chroma_iq_and_coeff_data =
10048
                        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10049
                }
10050
#endif
10051
0
            }
10052
10053
0
            break;
10054
0
        }
10055
0
        default:
10056
0
        {
10057
0
            break;
10058
0
        }
10059
0
        }
10060
0
    }
10061
10062
    /* Not recomputing Luma pred-data and header data for any preset now */
10063
0
    ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
10064
0
}
10065
10066
/**
10067
******************************************************************************
10068
*
10069
*  @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
10070
*         (not coded children) into a parent node(not coded).
10071
*
10072
*  @par   Description
10073
*         This is required post RDO evaluation as TU decisions are
10074
*         pre-determined(pre RDO) based on recursive SATD,
10075
*         while the quad children TU's can be skipped during RDO
10076
*
10077
*         The shrink process is applied iteratively till there are no
10078
*         more modes to shrink
10079
*
10080
*  @param[inout]   ps_tu_enc_loop
10081
*       pointer to tu enc loop params of inter cu
10082
*
10083
*  @param[inout]   ps_tu_enc_loop_temp_prms
10084
*       pointer to temp tu enc loop params of inter cu
10085
*
10086
*  @param[in]   num_tu_in_cu
10087
*       number of tus in cu
10088
*
10089
*  @return      modified number of tus in cu
10090
*
10091
******************************************************************************
10092
*/
10093
WORD32 ihevce_shrink_inter_tu_tree(
10094
    tu_enc_loop_out_t *ps_tu_enc_loop,
10095
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
10096
    recon_datastore_t *ps_recon_datastore,
10097
    WORD32 num_tu_in_cu,
10098
    UWORD8 u1_is_422)
10099
0
{
10100
0
    WORD32 recurse = 1;
10101
0
    WORD32 ctr;
10102
10103
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10104
    /* Post RDO, if all 4 child nodes are not coded the overheads of split TU    */
10105
    /* flags and cbf flags are saved by merging to parent node and marking       */
10106
    /* parent TU as not coded                                                    */
10107
    /*                                                                           */
10108
    /*                               ParentTUSplit=1                             */
10109
    /*                                      |                                    */
10110
    /*       ---------------------------------------------------------           */
10111
    /*       |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded)      */
10112
    /*                                     ||                                    */
10113
    /*                                     \/                                    */
10114
    /*                                                                           */
10115
    /*                              ParentTUSplit=0 (Not Coded)                  */
10116
    /*                                                                           */
10117
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10118
0
    while((num_tu_in_cu > 4) && recurse)
10119
0
    {
10120
0
        recurse = 0;
10121
10122
        /* Validate inter CU */
10123
        //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
10124
10125
        /* loop for all tu blocks in current cu */
10126
0
        for(ctr = 0; ctr < num_tu_in_cu;)
10127
0
        {
10128
            /* Get current tu posx, posy and size */
10129
0
            WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
10130
0
            WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
10131
            /* +1 is for parents size */
10132
0
            WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
10133
10134
            /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
10135
0
            WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
10136
0
            eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
10137
10138
            /* As TUs are published in encode order (Z SCAN),                      */
10139
            /* Four consecutive TUS of same size implies we have hit leaf nodes.   */
10140
0
            if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
10141
0
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
10142
0
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
10143
0
               eval_merge)
10144
0
            {
10145
0
                WORD32 merge_parent = 1;
10146
10147
                /* If any leaf noded is coded, it cannot be merged to parent */
10148
0
                if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
10149
0
                   (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
10150
10151
0
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
10152
0
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
10153
0
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
10154
10155
0
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
10156
0
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
10157
0
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
10158
10159
0
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
10160
0
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
10161
0
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
10162
0
                {
10163
0
                    merge_parent = 0;
10164
0
                }
10165
10166
0
                if(u1_is_422)
10167
0
                {
10168
0
                    if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
10169
0
                       (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
10170
10171
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
10172
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
10173
10174
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
10175
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
10176
10177
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
10178
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
10179
0
                    {
10180
0
                        merge_parent = 0;
10181
0
                    }
10182
0
                }
10183
10184
0
                if(merge_parent)
10185
0
                {
10186
                    /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
10187
10188
0
                    if(ps_recon_datastore->u1_is_lumaRecon_available)
10189
0
                    {
10190
0
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
10191
10192
0
                        memmove(
10193
0
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
10194
0
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
10195
0
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10196
0
                    }
10197
10198
0
                    if(ps_recon_datastore->au1_is_chromaRecon_available[0])
10199
0
                    {
10200
0
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
10201
0
                            UCHAR_MAX;
10202
0
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
10203
0
                            UCHAR_MAX;
10204
10205
0
                        memmove(
10206
0
                            &ps_recon_datastore
10207
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
10208
0
                            &ps_recon_datastore
10209
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
10210
0
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10211
10212
0
                        memmove(
10213
0
                            &ps_recon_datastore
10214
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
10215
0
                            &ps_recon_datastore
10216
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
10217
0
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10218
10219
0
                        if(u1_is_422)
10220
0
                        {
10221
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
10222
0
                                UCHAR_MAX;
10223
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
10224
0
                                UCHAR_MAX;
10225
10226
0
                            memmove(
10227
0
                                &ps_recon_datastore
10228
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
10229
0
                                &ps_recon_datastore
10230
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
10231
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10232
10233
0
                            memmove(
10234
0
                                &ps_recon_datastore
10235
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
10236
0
                                &ps_recon_datastore
10237
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
10238
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10239
0
                        }
10240
0
                    }
10241
10242
                    /* Parent node size is one more than that of child */
10243
0
                    ps_tu_enc_loop[ctr].s_tu.b3_size++;
10244
10245
0
                    ctr++;
10246
10247
                    /* move the subsequent TUs to next element */
10248
0
                    ASSERT(num_tu_in_cu >= (ctr + 3));
10249
0
                    memmove(
10250
0
                        (void *)(ps_tu_enc_loop + ctr),
10251
0
                        (void *)(ps_tu_enc_loop + ctr + 3),
10252
0
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
10253
10254
                    /* Also memmove the temp TU params */
10255
0
                    memmove(
10256
0
                        (void *)(ps_tu_enc_loop_temp_prms + ctr),
10257
0
                        (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
10258
0
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
10259
10260
                    /* Number of TUs in CU are now less by 3 */
10261
0
                    num_tu_in_cu -= 3;
10262
10263
                    /* Recurse again as new parent also be can be merged later */
10264
0
                    recurse = 1;
10265
0
                }
10266
0
                else
10267
0
                {
10268
                    /* Go to next set of leaf nodes */
10269
0
                    ctr += 4;
10270
0
                }
10271
0
            }
10272
0
            else
10273
0
            {
10274
0
                ctr++;
10275
0
            }
10276
0
        }
10277
0
    }
10278
10279
    /* return the modified num TUs*/
10280
0
    ASSERT(num_tu_in_cu > 0);
10281
0
    return (num_tu_in_cu);
10282
0
}
10283
10284
UWORD8 ihevce_intra_mode_nxn_hash_updater(
10285
    UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
10286
0
{
10287
0
    WORD32 i;
10288
0
    WORD32 i4_mode;
10289
10290
0
    for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
10291
0
    {
10292
0
        if(pu1_mode_array[i] < 35)
10293
0
        {
10294
0
            if(pu1_mode_array[i] != 0)
10295
0
            {
10296
0
                i4_mode = pu1_mode_array[i] - 1;
10297
10298
0
                if(!pu1_hash_table[i4_mode])
10299
0
                {
10300
0
                    pu1_hash_table[i4_mode] = 1;
10301
0
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10302
0
                    u1_num_ipe_modes++;
10303
0
                }
10304
0
            }
10305
10306
0
            if(pu1_mode_array[i] != 34)
10307
0
            {
10308
0
                i4_mode = pu1_mode_array[i] + 1;
10309
10310
0
                if((!pu1_hash_table[i4_mode]))
10311
0
                {
10312
0
                    pu1_hash_table[i4_mode] = 1;
10313
0
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10314
0
                    u1_num_ipe_modes++;
10315
0
                }
10316
0
            }
10317
0
        }
10318
0
    }
10319
10320
0
    if(!pu1_hash_table[INTRA_PLANAR])
10321
0
    {
10322
0
        pu1_hash_table[INTRA_PLANAR] = 1;
10323
0
        pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
10324
0
        u1_num_ipe_modes++;
10325
0
    }
10326
10327
0
    if(!pu1_hash_table[INTRA_DC])
10328
0
    {
10329
0
        pu1_hash_table[INTRA_DC] = 1;
10330
0
        pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
10331
0
        u1_num_ipe_modes++;
10332
0
    }
10333
10334
0
    return u1_num_ipe_modes;
10335
0
}
10336
10337
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
10338
WORD32 ihevce_determine_tu_tree_distribution(
10339
    cu_inter_cand_t *ps_cu_data,
10340
    me_func_selector_t *ps_func_selector,
10341
    WORD16 *pi2_scratch_mem,
10342
    UWORD8 *pu1_inp,
10343
    WORD32 i4_inp_stride,
10344
    WORD32 i4_lambda,
10345
    UWORD8 u1_lambda_q_shift,
10346
    UWORD8 u1_cu_size,
10347
    UWORD8 u1_max_tr_depth)
10348
{
10349
    err_prms_t s_err_prms;
10350
10351
    PF_SAD_FXN_TU_REC pf_err_compute[4];
10352
10353
    WORD32 i4_satd;
10354
10355
    s_err_prms.pi4_sad_grid = &i4_satd;
10356
    s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
10357
    s_err_prms.pu1_inp = pu1_inp;
10358
    s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
10359
    s_err_prms.i4_inp_stride = i4_inp_stride;
10360
    s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
10361
    s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
10362
10363
    if(u1_cu_size == 64)
10364
    {
10365
        s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
10366
    }
10367
    else
10368
    {
10369
        s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
10370
    }
10371
10372
    pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
10373
    pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
10374
    pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
10375
    pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
10376
10377
    i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
10378
        &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
10379
10380
    if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
10381
    {
10382
        ps_cu_data->ai4_tu_split_flag[0] = 1;
10383
    }
10384
10385
    return i4_satd;
10386
}
10387
#endif
10388
10389
void ihevce_populate_nbr_4x4_with_pu_data(
10390
    nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
10391
0
{
10392
0
    WORD32 i, j;
10393
10394
0
    nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
10395
10396
0
    WORD32 ht = (ps_pu->b4_ht + 1);
10397
0
    WORD32 wd = (ps_pu->b4_wd + 1);
10398
10399
0
    ps_nbr_4x4->b1_intra_flag = 0;
10400
0
    ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
10401
0
    ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
10402
0
    ps_nbr_4x4->mv = ps_pu->mv;
10403
10404
0
    for(i = 0; i < ht; i++)
10405
0
    {
10406
0
        for(j = 0; j < wd; j++)
10407
0
        {
10408
0
            ps_tmp_4x4[j] = *ps_nbr_4x4;
10409
0
        }
10410
10411
0
        ps_tmp_4x4 += i4_nbr_buf_stride;
10412
0
    }
10413
0
}
10414
10415
void ihevce_call_luma_inter_pred_rdopt_pass1(
10416
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
10417
0
{
10418
0
    pu_t *ps_pu;
10419
0
    UWORD8 *pu1_pred;
10420
0
    WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
10421
0
    WORD32 inter_pu_wd, inter_pu_ht;
10422
10423
0
    pu1_pred = ps_inter_cand->pu1_pred_data_scr;
10424
0
    pred_stride = ps_inter_cand->i4_pred_data_stride;
10425
0
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
10426
10427
0
    for(ctr = 0; ctr < num_cu_part; ctr++)
10428
0
    {
10429
0
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
10430
10431
        /* IF AMP then each partitions can have diff wd ht */
10432
0
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
10433
0
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
10434
10435
0
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
10436
        //if(0 == skip_or_merge_flag)
10437
0
        {
10438
0
            ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
10439
0
        }
10440
0
        if((2 == num_cu_part) && (0 == ctr))
10441
0
        {
10442
            /* 2Nx__ partion case */
10443
0
            if(inter_pu_wd == cu_size)
10444
0
            {
10445
0
                pu1_pred += (inter_pu_ht * pred_stride);
10446
0
            }
10447
10448
            /* __x2N partion case */
10449
0
            if(inter_pu_ht == cu_size)
10450
0
            {
10451
0
                pu1_pred += inter_pu_wd;
10452
0
            }
10453
0
        }
10454
0
    }
10455
0
}
10456
10457
LWORD64 ihevce_it_recon_ssd(
10458
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10459
    UWORD8 *pu1_src,
10460
    WORD32 i4_src_strd,
10461
    UWORD8 *pu1_pred,
10462
    WORD32 i4_pred_strd,
10463
    WORD16 *pi2_deq_data,
10464
    WORD32 i4_deq_data_strd,
10465
    UWORD8 *pu1_recon,
10466
    WORD32 i4_recon_stride,
10467
    UWORD8 *pu1_ecd_data,
10468
    UWORD8 u1_trans_size,
10469
    UWORD8 u1_pred_mode,
10470
    WORD32 i4_cbf,
10471
    WORD32 i4_zero_col,
10472
    WORD32 i4_zero_row,
10473
    CHROMA_PLANE_ID_T e_chroma_plane)
10474
0
{
10475
0
    if(NULL_PLANE == e_chroma_plane)
10476
0
    {
10477
0
        ihevce_it_recon_fxn(
10478
0
            ps_ctxt,
10479
0
            pi2_deq_data,
10480
0
            i4_deq_data_strd,
10481
0
            pu1_pred,
10482
0
            i4_pred_strd,
10483
0
            pu1_recon,
10484
0
            i4_recon_stride,
10485
0
            pu1_ecd_data,
10486
0
            u1_trans_size,
10487
0
            u1_pred_mode,
10488
0
            i4_cbf,
10489
0
            i4_zero_col,
10490
0
            i4_zero_row);
10491
10492
0
        return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
10493
0
            pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
10494
0
            e_chroma_plane);
10495
0
    }
10496
0
    else
10497
0
    {
10498
0
        ihevce_chroma_it_recon_fxn(
10499
0
            ps_ctxt,
10500
0
            pi2_deq_data,
10501
0
            i4_deq_data_strd,
10502
0
            pu1_pred,
10503
0
            i4_pred_strd,
10504
0
            pu1_recon,
10505
0
            i4_recon_stride,
10506
0
            pu1_ecd_data,
10507
0
            u1_trans_size,
10508
0
            i4_cbf,
10509
0
            i4_zero_col,
10510
0
            i4_zero_row,
10511
0
            e_chroma_plane);
10512
10513
0
        return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10514
0
            pu1_recon,
10515
0
            pu1_src,
10516
0
            i4_recon_stride,
10517
0
            i4_src_strd,
10518
0
            u1_trans_size,
10519
0
            u1_trans_size,
10520
0
            e_chroma_plane);
10521
0
    }
10522
0
}
10523
10524
/*!
10525
******************************************************************************
10526
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
10527
*
10528
* \brief
10529
*    Transform unit level (Chroma) enc_loop function
10530
*
10531
* \param[in] ps_ctxt    enc_loop module ctxt pointer
10532
* \param[in] pu1_pred       pointer to predicted data buffer
10533
* \param[in] pred_strd      predicted buffer stride
10534
* \param[in] pu1_src    pointer to source data buffer
10535
* \param[in] src_strd   source buffer stride
10536
* \param[in] pi2_deq_data   pointer to store iq data
10537
* \param[in] deq_data_strd  iq data buffer stride
10538
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
10539
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
10540
*                           block
10541
* \param[out] csbf_strd     csbf buffer stride
10542
* \param[in] trans_size     transform size (4, 8, 16)
10543
* \param[in] intra_flag     0:Inter/Skip 1:Intra
10544
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
10545
*                           coeff buffer
10546
the current TU in RDopt Mode
10547
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
10548
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
10549
*
10550
* \return
10551
*    CBF of the current block
10552
*
10553
* \author
10554
*  Ittiam
10555
*
10556
*****************************************************************************
10557
*/
10558
WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
10559
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10560
    UWORD8 *pu1_pred,
10561
    WORD32 pred_strd,
10562
    UWORD8 *pu1_src,
10563
    WORD32 src_strd,
10564
    WORD16 *pi2_deq_data,
10565
    WORD32 deq_data_strd,
10566
    UWORD8 *pu1_recon,
10567
    WORD32 i4_recon_stride,
10568
    UWORD8 *pu1_ecd_data,
10569
    UWORD8 *pu1_csbf_buf,
10570
    WORD32 csbf_strd,
10571
    WORD32 trans_size,
10572
    WORD32 i4_scan_idx,
10573
    WORD32 intra_flag,
10574
    WORD32 *pi4_coeff_off,
10575
    WORD32 *pi4_tu_bits,
10576
    WORD32 *pi4_zero_col,
10577
    WORD32 *pi4_zero_row,
10578
    UWORD8 *pu1_is_recon_available,
10579
    WORD32 i4_perform_sbh,
10580
    WORD32 i4_perform_rdoq,
10581
    LWORD64 *pi8_cost,
10582
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10583
    WORD32 i4_alpha_stim_multiplier,
10584
    UWORD8 u1_is_cu_noisy,
10585
#endif
10586
    UWORD8 u1_is_skip,
10587
    SSD_TYPE_T e_ssd_type,
10588
    CHROMA_PLANE_ID_T e_chroma_plane)
10589
0
{
10590
0
    WORD32 trans_idx, cbf, u4_blk_sad;
10591
0
    WORD16 *pi2_quant_coeffs;
10592
0
    WORD16 *pi2_trans_values;
10593
0
    WORD32 quant_scale_mat_offset;
10594
0
    WORD32 *pi4_trans_scratch;
10595
0
    WORD32 *pi4_subBlock2csbfId_map = NULL;
10596
10597
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10598
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
10599
#endif
10600
10601
0
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
10602
10603
0
    WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
10604
0
                             (!intra_flag && ENABLE_INTER_ZCU_COST);
10605
0
    WORD32 i4_perform_coeff_level_rdoq =
10606
0
        (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
10607
0
        (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
10608
10609
0
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
10610
0
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
10611
10612
0
    *pi4_coeff_off = 0;
10613
0
    *pi4_tu_bits = 0;
10614
0
    pu1_is_recon_available[0] = 0;
10615
10616
0
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
10617
0
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
10618
0
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
10619
10620
0
    if(2 == trans_size)
10621
0
    {
10622
0
        trans_size = 4;
10623
0
    }
10624
10625
    /* translate the transform size to index */
10626
0
    trans_idx = trans_size >> 2;
10627
10628
0
    if(16 == trans_size)
10629
0
    {
10630
0
        trans_idx = 3;
10631
0
    }
10632
10633
0
    if(u1_is_skip)
10634
0
    {
10635
0
        pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10636
0
            pu1_pred,
10637
0
            pu1_src,
10638
0
            pred_strd,
10639
0
            src_strd,
10640
0
            trans_size,
10641
0
            trans_size,
10642
0
            e_chroma_plane);
10643
10644
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10645
0
        {
10646
            /* buffer copy fromp pred to recon */
10647
0
            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
10648
0
                pu1_pred,
10649
0
                pred_strd,
10650
0
                pu1_recon,
10651
0
                i4_recon_stride,
10652
0
                trans_size,
10653
0
                trans_size,
10654
0
                e_chroma_plane);
10655
10656
0
            pu1_is_recon_available[0] = 1;
10657
0
        }
10658
10659
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10660
0
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
10661
0
        {
10662
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10663
0
                pu1_src,
10664
0
                src_strd,
10665
0
                pu1_pred,
10666
0
                pred_strd,
10667
0
                pi8_cost[0],
10668
0
                i4_alpha_stim_multiplier,
10669
0
                trans_size,
10670
0
                0,
10671
0
                ps_ctxt->u1_enable_psyRDOPT,
10672
0
                e_chroma_plane);
10673
0
        }
10674
0
#endif
10675
10676
0
#if ENABLE_INTER_ZCU_COST
10677
#if !WEIGH_CHROMA_COST
10678
        /* cbf = 0, accumulate cu not coded cost */
10679
        ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
10680
#else
10681
0
        ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
10682
0
                                          (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
10683
0
                                         CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
10684
0
#endif
10685
0
#endif
10686
10687
0
        return 0;
10688
0
    }
10689
10690
0
    if(intra_flag == 1)
10691
0
    {
10692
0
        quant_scale_mat_offset = 0;
10693
10694
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10695
        ai4_quant_rounding_factors[0][0] =
10696
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
10697
10698
        for(i = 0; i < trans_size * trans_size; i++)
10699
        {
10700
            ai4_quant_rounding_factors[1][i] =
10701
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
10702
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10703
            ai4_quant_rounding_factors[2][i] =
10704
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
10705
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10706
        }
10707
#endif
10708
0
    }
10709
0
    else
10710
0
    {
10711
0
        quant_scale_mat_offset = NUM_TRANS_TYPES;
10712
0
    }
10713
10714
0
    switch(trans_size)
10715
0
    {
10716
0
    case 4:
10717
0
    {
10718
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
10719
10720
0
        break;
10721
0
    }
10722
0
    case 8:
10723
0
    {
10724
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
10725
10726
0
        break;
10727
0
    }
10728
0
    case 16:
10729
0
    {
10730
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
10731
10732
0
        break;
10733
0
    }
10734
0
    case 32:
10735
0
    {
10736
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
10737
10738
0
        break;
10739
0
    }
10740
0
    }
10741
10742
    /* ---------- call residue and transform block ------- */
10743
0
    u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
10744
0
        pu1_src,
10745
0
        pu1_pred,
10746
0
        pi4_trans_scratch,
10747
0
        pi2_trans_values,
10748
0
        src_strd,
10749
0
        pred_strd,
10750
0
        trans_size,
10751
0
        e_chroma_plane);
10752
0
    (void)u4_blk_sad;
10753
    /* -------- calculate SSD calculation in Transform Domain ------ */
10754
10755
0
    cbf = ps_ctxt->apf_quant_iquant_ssd
10756
0
              [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
10757
10758
0
          (pi2_trans_values,
10759
0
           ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
10760
0
           pi2_quant_coeffs,
10761
0
           pi2_deq_data,
10762
0
           trans_size,
10763
0
           ps_ctxt->i4_chrm_cu_qp_div6,
10764
0
           ps_ctxt->i4_chrm_cu_qp_mod6,
10765
0
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10766
0
           ps_ctxt->i4_quant_rnd_factor[intra_flag],
10767
0
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10768
0
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10769
#else
10770
           intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
10771
           intra_flag ? ai4_quant_rounding_factors[1]
10772
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10773
           intra_flag ? ai4_quant_rounding_factors[2]
10774
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10775
#endif
10776
0
           trans_size,
10777
0
           trans_size,
10778
0
           deq_data_strd,
10779
0
           pu1_csbf_buf,
10780
0
           csbf_strd,
10781
0
           pi4_zero_col,
10782
0
           pi4_zero_row,
10783
0
           ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
10784
0
           pi8_cost);
10785
10786
0
    if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
10787
0
    {
10788
0
        pi8_cost[0] = UINT_MAX;
10789
0
    }
10790
10791
0
    if(0 != cbf)
10792
0
    {
10793
0
        if(i4_perform_sbh || i4_perform_rdoq)
10794
0
        {
10795
0
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
10796
0
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
10797
10798
0
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
10799
0
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
10800
0
            ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
10801
0
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
10802
0
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
10803
10804
0
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
10805
0
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
10806
0
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
10807
0
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
10808
0
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
10809
0
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
10810
0
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
10811
10812
0
            if((!i4_perform_rdoq))
10813
0
            {
10814
0
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10815
10816
0
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10817
0
            }
10818
0
        }
10819
10820
        /* ------- call coeffs scan function ------- */
10821
0
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10822
0
            pi2_quant_coeffs,
10823
0
            pi4_subBlock2csbfId_map,
10824
0
            i4_scan_idx,
10825
0
            trans_size,
10826
0
            pu1_ecd_data,
10827
0
            pu1_csbf_buf,
10828
0
            csbf_strd);
10829
0
    }
10830
10831
    /*  Normalize Cost. Note : trans_idx, not (trans_idx-1) */
10832
0
    pi8_cost[0] >>= ga_trans_shift[trans_idx];
10833
10834
0
#if RDOPT_ZERO_CBF_ENABLE
10835
0
    if((0 != cbf))
10836
0
    {
10837
0
        WORD32 tu_bits;
10838
0
        LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
10839
10840
0
        zero_cbf_cost_u = 0;
10841
10842
        /*Populating the feilds of rdoq_ctxt structure*/
10843
0
        if(i4_perform_rdoq)
10844
0
        {
10845
            //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
10846
            /* transform size to log2transform size */
10847
0
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
10848
0
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
10849
10850
0
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
10851
0
            ps_rdoq_sbh_ctxt->i4_is_luma = 0;
10852
0
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
10853
0
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
10854
0
                (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
10855
0
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
10856
0
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
10857
0
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
10858
0
        }
10859
0
        else if(i4_perform_zcbf)
10860
0
        {
10861
            /* cost of zero cbf encoding */
10862
0
            zero_cbf_cost_u =
10863
10864
0
                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10865
0
                    pu1_pred,
10866
0
                    pu1_src,
10867
0
                    pred_strd,
10868
0
                    src_strd,
10869
0
                    trans_size,
10870
0
                    trans_size,
10871
0
                    e_chroma_plane);
10872
0
        }
10873
10874
        /************************************************************************/
10875
        /* call the entropy rdo encode to get the bit estimate for current tu   */
10876
        /* note that tu includes only residual coding bits and does not include */
10877
        /* tu split, cbf and qp delta encoding bits for a TU                    */
10878
        /************************************************************************/
10879
0
        if(i4_perform_rdoq)
10880
0
        {
10881
0
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
10882
0
                &ps_ctxt->s_rdopt_entropy_ctxt,
10883
0
                pu1_ecd_data,
10884
0
                trans_size,
10885
0
                0,
10886
0
                ps_rdoq_sbh_ctxt,
10887
0
                pi8_cost,
10888
0
                &zero_cbf_cost_u,
10889
0
                0);
10890
            //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
10891
10892
0
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
10893
0
            {
10894
0
                cbf = 0;
10895
10896
                /* num bytes is set to 0 */
10897
0
                *pi4_coeff_off = 0;
10898
0
            }
10899
10900
0
            (*pi4_tu_bits) += tu_bits;
10901
10902
0
            if((i4_perform_sbh) && (0 != cbf))
10903
0
            {
10904
0
                ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
10905
10906
0
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10907
10908
0
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10909
0
            }
10910
10911
            /*Add round value before normalizing*/
10912
0
            pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
10913
0
            pi8_cost[0] >>= ga_trans_shift[trans_idx];
10914
10915
0
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
10916
0
            {
10917
0
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10918
0
                    pi2_quant_coeffs,
10919
0
                    pi4_subBlock2csbfId_map,
10920
0
                    i4_scan_idx,
10921
0
                    trans_size,
10922
0
                    pu1_ecd_data,
10923
0
                    ps_rdoq_sbh_ctxt->pu1_csbf_buf,
10924
0
                    csbf_strd);
10925
0
            }
10926
0
        }
10927
0
        else
10928
0
        {
10929
            /************************************************************************/
10930
            /* call the entropy rdo encode to get the bit estimate for current tu   */
10931
            /* note that tu includes only residual coding bits and does not include */
10932
            /* tu split, cbf and qp delta encoding bits for a TU                    */
10933
            /************************************************************************/
10934
0
            tu_bits = ihevce_entropy_rdo_encode_tu(
10935
0
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
10936
10937
0
            (*pi4_tu_bits) += tu_bits;
10938
0
        }
10939
10940
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10941
0
        {
10942
0
            pi8_cost[0] = ihevce_it_recon_ssd(
10943
0
                ps_ctxt,
10944
0
                pu1_src,
10945
0
                src_strd,
10946
0
                pu1_pred,
10947
0
                pred_strd,
10948
0
                pi2_deq_data,
10949
0
                deq_data_strd,
10950
0
                pu1_recon,
10951
0
                i4_recon_stride,
10952
0
                pu1_ecd_data,
10953
0
                trans_size,
10954
0
                PRED_MODE_INTRA,
10955
0
                cbf,
10956
0
                pi4_zero_col[0],
10957
0
                pi4_zero_row[0],
10958
0
                e_chroma_plane);
10959
10960
0
            pu1_is_recon_available[0] = 1;
10961
0
        }
10962
10963
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10964
0
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10965
0
        {
10966
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10967
0
                pu1_src,
10968
0
                src_strd,
10969
0
                pu1_recon,
10970
0
                i4_recon_stride,
10971
0
                pi8_cost[0],
10972
0
                i4_alpha_stim_multiplier,
10973
0
                trans_size,
10974
0
                0,
10975
0
                ps_ctxt->u1_enable_psyRDOPT,
10976
0
                e_chroma_plane);
10977
0
        }
10978
0
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10979
0
        {
10980
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10981
0
                pu1_src,
10982
0
                src_strd,
10983
0
                pu1_pred,
10984
0
                pred_strd,
10985
0
                pi8_cost[0],
10986
0
                i4_alpha_stim_multiplier,
10987
0
                trans_size,
10988
0
                0,
10989
0
                ps_ctxt->u1_enable_psyRDOPT,
10990
0
                e_chroma_plane);
10991
0
        }
10992
0
#endif
10993
10994
0
        curr_cb_cod_cost = pi8_cost[0];
10995
10996
        /* add the SSD cost to bits estimate given by ECD */
10997
0
        curr_cb_cod_cost +=
10998
0
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
10999
11000
0
        if(i4_perform_zcbf)
11001
0
        {
11002
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11003
0
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
11004
0
            {
11005
0
                zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
11006
0
                    pu1_src,
11007
0
                    src_strd,
11008
0
                    pu1_pred,
11009
0
                    pred_strd,
11010
0
                    zero_cbf_cost_u,
11011
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11012
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11013
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11014
0
                                                 100.0,
11015
0
                    trans_size,
11016
0
                    0,
11017
0
                    ps_ctxt->u1_enable_psyRDOPT,
11018
0
                    e_chroma_plane);
11019
0
            }
11020
0
#endif
11021
            /* force the tu as zero cbf if zero_cbf_cost is lower */
11022
0
            if(zero_cbf_cost_u < curr_cb_cod_cost)
11023
0
            {
11024
0
                *pi4_coeff_off = 0;
11025
0
                cbf = 0;
11026
0
                (*pi4_tu_bits) = 0;
11027
0
                pi8_cost[0] = zero_cbf_cost_u;
11028
11029
0
                pu1_is_recon_available[0] = 0;
11030
11031
0
                if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11032
0
                {
11033
0
                    ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
11034
0
                        pu1_pred,
11035
0
                        pred_strd,
11036
0
                        pu1_recon,
11037
0
                        i4_recon_stride,
11038
0
                        trans_size,
11039
0
                        trans_size,
11040
0
                        e_chroma_plane);
11041
11042
0
                    pu1_is_recon_available[0] = 1;
11043
0
                }
11044
0
            }
11045
11046
0
#if ENABLE_INTER_ZCU_COST
11047
0
            if(!intra_flag)
11048
0
            {
11049
#if !WEIGH_CHROMA_COST
11050
                ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
11051
#else
11052
0
                ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11053
0
                    (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
11054
0
                     (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11055
0
                    CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11056
0
#endif
11057
0
            }
11058
0
#endif
11059
0
        }
11060
0
    }
11061
0
    else
11062
0
    {
11063
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11064
0
        {
11065
0
            pi8_cost[0] = ihevce_it_recon_ssd(
11066
0
                ps_ctxt,
11067
0
                pu1_src,
11068
0
                src_strd,
11069
0
                pu1_pred,
11070
0
                pred_strd,
11071
0
                pi2_deq_data,
11072
0
                deq_data_strd,
11073
0
                pu1_recon,
11074
0
                i4_recon_stride,
11075
0
                pu1_ecd_data,
11076
0
                trans_size,
11077
0
                PRED_MODE_INTRA,
11078
0
                cbf,
11079
0
                pi4_zero_col[0],
11080
0
                pi4_zero_row[0],
11081
0
                e_chroma_plane);
11082
11083
0
            pu1_is_recon_available[0] = 1;
11084
0
        }
11085
11086
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11087
0
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11088
0
        {
11089
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11090
0
                pu1_src,
11091
0
                src_strd,
11092
0
                pu1_recon,
11093
0
                i4_recon_stride,
11094
0
                pi8_cost[0],
11095
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11096
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11097
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11098
0
                                             100.0,
11099
0
                trans_size,
11100
0
                0,
11101
0
                ps_ctxt->u1_enable_psyRDOPT,
11102
0
                e_chroma_plane);
11103
0
        }
11104
0
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11105
0
        {
11106
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11107
0
                pu1_src,
11108
0
                src_strd,
11109
0
                pu1_pred,
11110
0
                pred_strd,
11111
0
                pi8_cost[0],
11112
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11113
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11114
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11115
0
                                             100.0,
11116
0
                trans_size,
11117
0
                0,
11118
0
                ps_ctxt->u1_enable_psyRDOPT,
11119
0
                e_chroma_plane);
11120
0
        }
11121
0
#endif
11122
11123
0
#if ENABLE_INTER_ZCU_COST
11124
0
        if(!intra_flag)
11125
0
        {
11126
#if !WEIGH_CHROMA_COST
11127
            /* cbf = 0, accumulate cu not coded cost */
11128
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
11129
#else
11130
            /* cbf = 0, accumulate cu not coded cost */
11131
11132
0
            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11133
0
                (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
11134
0
                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11135
0
                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11136
0
#endif
11137
0
        }
11138
0
#endif
11139
0
    }
11140
0
#endif /* RDOPT_ZERO_CBF_ENABLE */
11141
11142
0
    return (cbf);
11143
0
}