Coverage Report

Created: 2025-10-10 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/ihevce_enc_loop_utils.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*!
22
******************************************************************************
23
* \file ihevce_enc_loop_utils.c
24
*
25
* \brief
26
*    This file contains utility functions of Encode loop
27
*
28
* \date
29
*    18/09/2012
30
*
31
* \author
32
*    Ittiam
33
*
34
*
35
* List of Functions
36
*
37
*
38
******************************************************************************
39
*/
40
41
/*****************************************************************************/
42
/* File Includes                                                             */
43
/*****************************************************************************/
44
/* System include files */
45
#include <stdio.h>
46
#include <string.h>
47
#include <stdlib.h>
48
#include <assert.h>
49
#include <stdarg.h>
50
#include <math.h>
51
#include <limits.h>
52
53
/* User include files */
54
#include "ihevc_typedefs.h"
55
#include "itt_video_api.h"
56
#include "ihevce_api.h"
57
58
#include "rc_cntrl_param.h"
59
#include "rc_frame_info_collector.h"
60
#include "rc_look_ahead_params.h"
61
62
#include "ihevc_defs.h"
63
#include "ihevc_macros.h"
64
#include "ihevc_debug.h"
65
#include "ihevc_structs.h"
66
#include "ihevc_platform_macros.h"
67
#include "ihevc_deblk.h"
68
#include "ihevc_itrans_recon.h"
69
#include "ihevc_chroma_itrans_recon.h"
70
#include "ihevc_chroma_intra_pred.h"
71
#include "ihevc_intra_pred.h"
72
#include "ihevc_inter_pred.h"
73
#include "ihevc_mem_fns.h"
74
#include "ihevc_padding.h"
75
#include "ihevc_weighted_pred.h"
76
#include "ihevc_sao.h"
77
#include "ihevc_resi_trans.h"
78
#include "ihevc_quant_iquant_ssd.h"
79
#include "ihevc_cabac_tables.h"
80
#include "ihevc_common_tables.h"
81
82
#include "ihevce_defs.h"
83
#include "ihevce_hle_interface.h"
84
#include "ihevce_lap_enc_structs.h"
85
#include "ihevce_multi_thrd_structs.h"
86
#include "ihevce_multi_thrd_funcs.h"
87
#include "ihevce_me_common_defs.h"
88
#include "ihevce_had_satd.h"
89
#include "ihevce_error_codes.h"
90
#include "ihevce_bitstream.h"
91
#include "ihevce_cabac.h"
92
#include "ihevce_rdoq_macros.h"
93
#include "ihevce_function_selector.h"
94
#include "ihevce_enc_structs.h"
95
#include "ihevce_entropy_structs.h"
96
#include "ihevce_cmn_utils_instr_set_router.h"
97
#include "ihevce_ipe_instr_set_router.h"
98
#include "ihevce_decomp_pre_intra_structs.h"
99
#include "ihevce_decomp_pre_intra_pass.h"
100
#include "ihevce_enc_loop_structs.h"
101
#include "ihevce_nbr_avail.h"
102
#include "ihevce_enc_loop_utils.h"
103
#include "ihevce_sub_pic_rc.h"
104
#include "ihevce_global_tables.h"
105
#include "ihevce_bs_compute_ctb.h"
106
#include "ihevce_cabac_rdo.h"
107
#include "ihevce_deblk.h"
108
#include "ihevce_frame_process.h"
109
#include "ihevce_rc_enc_structs.h"
110
#include "hme_datatype.h"
111
#include "hme_interface.h"
112
#include "hme_common_defs.h"
113
#include "hme_defs.h"
114
#include "hme_common_utils.h"
115
#include "ihevce_me_instr_set_router.h"
116
#include "ihevce_enc_subpel_gen.h"
117
#include "ihevce_inter_pred.h"
118
#include "ihevce_mv_pred.h"
119
#include "ihevce_mv_pred_merge.h"
120
#include "ihevce_enc_loop_inter_mode_sifter.h"
121
#include "ihevce_enc_cu_recursion.h"
122
#include "ihevce_enc_loop_pass.h"
123
#include "ihevce_common_utils.h"
124
#include "ihevce_dep_mngr_interface.h"
125
#include "ihevce_sao.h"
126
#include "ihevce_tile_interface.h"
127
#include "ihevce_profile.h"
128
#include "ihevce_stasino_helpers.h"
129
#include "ihevce_tu_tree_selector.h"
130
131
/*****************************************************************************/
132
/* Globals                                                                   */
133
/*****************************************************************************/
134
135
extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
136
extern const UWORD8 gu1_hevce_scan4x4[3][16];
137
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
138
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
139
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
140
141
/*****************************************************************************/
142
/* Constant Macros                                                           */
143
/*****************************************************************************/
144
#define ENABLE_ZERO_CBF 1
145
#define DISABLE_RDOQ_INTRA 0
146
147
/*****************************************************************************/
148
/* Function Definitions                                                      */
149
/*****************************************************************************/
150
void *ihevce_tu_tree_update(
151
    tu_prms_t *ps_tu_prms,
152
    WORD32 *pnum_tu_in_cu,
153
    WORD32 depth,
154
    WORD32 tu_split_flag,
155
    WORD32 tu_early_cbf,
156
    WORD32 i4_x_off,
157
    WORD32 i4_y_off)
158
1.30M
{
159
    //WORD32 tu_split_flag = p_tu_split_flag[0];
160
1.30M
    WORD32 p_tu_split_flag[4];
161
1.30M
    WORD32 p_tu_early_cbf[4];
162
163
1.30M
    WORD32 tu_size = ps_tu_prms->u1_tu_size;
164
165
1.30M
    if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
166
108k
    {
167
108k
        if((tu_size >> depth) == 32)
168
24.5k
        {
169
            /* Get the individual TU split flags */
170
24.5k
            p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
171
24.5k
            p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
172
24.5k
            p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
173
24.5k
            p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
174
175
            /* Get the early CBF flags */
176
24.5k
            p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
177
24.5k
            p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
178
24.5k
            p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
179
24.5k
            p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
180
24.5k
        }
181
83.6k
        else
182
83.6k
        {
183
            /* Get the individual TU split flags */
184
83.6k
            p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
185
83.6k
            p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
186
83.6k
            p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
187
83.6k
            p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
188
189
            /* Get the early CBF flags */
190
83.6k
            p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
191
83.6k
            p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
192
83.6k
            p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
193
83.6k
            p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
194
83.6k
        }
195
196
108k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
197
108k
            ps_tu_prms,
198
108k
            pnum_tu_in_cu,
199
108k
            depth + 1,
200
108k
            p_tu_split_flag[0],
201
108k
            p_tu_early_cbf[0],
202
108k
            i4_x_off,
203
108k
            i4_y_off);
204
205
108k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
206
108k
            ps_tu_prms,
207
108k
            pnum_tu_in_cu,
208
108k
            depth + 1,
209
108k
            p_tu_split_flag[1],
210
108k
            p_tu_early_cbf[1],
211
108k
            (i4_x_off + (tu_size >> (depth + 1))),
212
108k
            i4_y_off);
213
214
108k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
215
108k
            ps_tu_prms,
216
108k
            pnum_tu_in_cu,
217
108k
            depth + 1,
218
108k
            p_tu_split_flag[2],
219
108k
            p_tu_early_cbf[2],
220
108k
            i4_x_off,
221
108k
            (i4_y_off + (tu_size >> (depth + 1))));
222
223
108k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
224
108k
            ps_tu_prms,
225
108k
            pnum_tu_in_cu,
226
108k
            depth + 1,
227
108k
            p_tu_split_flag[3],
228
108k
            p_tu_early_cbf[3],
229
108k
            (i4_x_off + (tu_size >> (depth + 1))),
230
108k
            (i4_y_off + (tu_size >> (depth + 1))));
231
108k
    }
232
1.19M
    else
233
1.19M
    {
234
1.19M
        if(tu_split_flag & 0x1)
235
114k
        {
236
            /* This piece of code will be entered for the 8x8, if it is split
237
            Update the 4 child TU's accordingly. */
238
239
114k
            (*pnum_tu_in_cu) += 4;
240
241
            /* TL TU update */
242
114k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
243
244
114k
            ps_tu_prms->u1_x_off = i4_x_off;
245
246
114k
            ps_tu_prms->u1_y_off = i4_y_off;
247
248
            /* Early CBF is not done for 4x4 transforms */
249
114k
            ps_tu_prms->i4_early_cbf = 1;
250
251
114k
            ps_tu_prms++;
252
253
            /* TR TU update */
254
114k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
255
256
114k
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
257
258
114k
            ps_tu_prms->u1_y_off = i4_y_off;
259
260
            /* Early CBF is not done for 4x4 transforms */
261
114k
            ps_tu_prms->i4_early_cbf = 1;
262
263
114k
            ps_tu_prms++;
264
265
            /* BL TU update */
266
114k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
267
268
114k
            ps_tu_prms->u1_x_off = i4_x_off;
269
270
114k
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
271
272
            /* Early CBF is not done for 4x4 transforms */
273
114k
            ps_tu_prms->i4_early_cbf = 1;
274
275
114k
            ps_tu_prms++;
276
277
            /* BR TU update */
278
114k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
279
280
114k
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
281
282
114k
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
283
284
            /* Early CBF is not done for 4x4 transforms */
285
114k
            ps_tu_prms->i4_early_cbf = 1;
286
114k
        }
287
1.08M
        else
288
1.08M
        {
289
            /* Update the TU params */
290
1.08M
            ps_tu_prms->u1_tu_size = tu_size >> depth;
291
292
1.08M
            ps_tu_prms->u1_x_off = i4_x_off;
293
294
1.08M
            ps_tu_prms->u1_y_off = i4_y_off;
295
296
1.08M
            (*pnum_tu_in_cu)++;
297
298
            /* Early CBF update for current TU */
299
1.08M
            ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
300
1.08M
        }
301
1.19M
        if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
302
1.19M
        {
303
1.19M
            ps_tu_prms++;
304
305
1.19M
            ps_tu_prms->u1_tu_size = tu_size;
306
1.19M
        }
307
1.19M
    }
308
309
1.30M
    return ps_tu_prms;
310
1.30M
}
311
312
/*!
313
******************************************************************************
314
* \if Function name : ihevce_compute_quant_rel_param \endif
315
*
316
* \brief
317
*    This function updates quantization related parameters like qp_mod_6 etc in
318
*       context according to new qp
319
*
320
* \date
321
*    08/01/2013
322
*
323
* \author
324
*    Ittiam
325
*
326
* \return
327
*
328
* List of Functions
329
*
330
*
331
******************************************************************************
332
*/
333
void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
334
7.39M
{
335
7.39M
    WORD32 i4_div_factor;
336
337
7.39M
    ps_ctxt->i4_chrm_cu_qp =
338
7.39M
        (ps_ctxt->u1_chroma_array_type == 2)
339
7.39M
            ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
340
7.39M
            : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
341
7.39M
    ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
342
7.39M
    i4_div_factor = (i1_cu_qp + 3) / 6;
343
7.39M
    i4_div_factor = CLIP3(i4_div_factor, 3, 6);
344
7.39M
    ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
345
7.39M
    ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
346
7.39M
    ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
347
348
7.39M
#define INTER_RND_QP_BY_6
349
7.39M
#ifdef INTER_RND_QP_BY_6
350
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
351
7.39M
    {
352
7.39M
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
353
7.39M
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
354
7.39M
    }
355
#else
356
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
357
    ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
358
#endif
359
360
7.39M
    if(ISLICE == ps_ctxt->i1_slice_type)
361
2.94M
    {
362
        /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
363
2.94M
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
364
2.94M
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
365
2.94M
    }
366
4.45M
    else
367
4.45M
    {
368
4.45M
        if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
369
0
        {
370
            /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
371
0
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
372
0
                (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
373
0
        }
374
4.45M
        else
375
4.45M
        {
376
            /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
377
4.45M
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
378
4.45M
                ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
379
            /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
380
4.45M
        }
381
4.45M
    }
382
7.39M
}
383
384
/*!
385
******************************************************************************
386
* \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
387
*
388
* \brief
389
*    Function whihc calculates the Lambda params for current picture
390
*
391
* \param[in] ps_enc_ctxt : encoder ctxt pointer
392
* \param[in] ps_cur_pic_ctxt : current pic ctxt
393
* \param[in] i4_cur_frame_qp : current pic QP
394
* \param[in] first_field : is first field flag
395
* \param[in] i4_temporal_lyr_id : Current picture layer id
396
*
397
* \return
398
*    None
399
*
400
* \author
401
*  Ittiam
402
*
403
*****************************************************************************
404
*/
405
void ihevce_populate_cl_cu_lambda_prms(
406
    ihevce_enc_loop_ctxt_t *ps_ctxt,
407
    frm_lambda_ctxt_t *ps_frm_lamda,
408
    WORD32 i4_slice_type,
409
    WORD32 i4_temporal_lyr_id,
410
    WORD32 i4_lambda_type)
411
97.5k
{
412
97.5k
    WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
413
97.5k
    double lambda_modifier;
414
97.5k
    double lambda_uv_modifier;
415
97.5k
    double lambda;
416
97.5k
    double lambda_uv;
417
418
97.5k
    WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
419
420
    /*Populate lamda modifier */
421
97.5k
    ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
422
97.5k
    ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
423
97.5k
    ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
424
425
97.5k
    for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
426
5.07M
        i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
427
4.97M
        i4_curr_cu_qp++)
428
4.97M
    {
429
4.97M
        WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
430
4.97M
                               ? MIN(i4_curr_cu_qp, 51)
431
4.97M
                               : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
432
433
4.97M
        i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
434
435
4.97M
        lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
436
4.97M
        lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
437
438
4.97M
        if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
439
640k
        {
440
640k
            lambda_modifier = ps_frm_lamda->lambda_modifier *
441
640k
                              CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
442
640k
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
443
640k
                                 CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
444
640k
        }
445
4.33M
        else
446
4.33M
        {
447
4.33M
            lambda_modifier = ps_frm_lamda->lambda_modifier;
448
4.33M
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
449
4.33M
        }
450
4.97M
        if(ps_ctxt->i4_use_const_lamda_modifier)
451
0
        {
452
0
            if(ISLICE == ps_ctxt->i1_slice_type)
453
0
            {
454
0
                lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
455
0
                lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
456
0
            }
457
0
            else
458
0
            {
459
0
                lambda_modifier = CONST_LAMDA_MOD_VAL;
460
0
                lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
461
0
            }
462
0
        }
463
4.97M
        switch(i4_lambda_type)
464
4.97M
        {
465
0
        case 0:
466
0
        {
467
0
            i4_qp_bdoffset = 0;
468
469
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
470
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
471
472
0
            lambda *= lambda_modifier;
473
0
            lambda_uv *= lambda_uv_modifier;
474
475
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
476
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
477
478
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
479
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
480
481
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
482
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
483
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
484
0
            {
485
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
486
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
487
0
            }
488
0
            else
489
0
            {
490
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
491
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
492
0
            }
493
494
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
495
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
496
497
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
498
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
499
500
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
501
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
502
503
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
504
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
505
506
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
507
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
508
509
0
            break;
510
0
        }
511
0
        case 1:
512
0
        {
513
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
514
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
515
516
0
            lambda *= lambda_modifier;
517
0
            lambda_uv *= lambda_uv_modifier;
518
519
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
520
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
521
522
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
523
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
524
525
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
526
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
527
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
528
0
            {
529
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
530
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
531
0
            }
532
0
            else
533
0
            {
534
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
535
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
536
0
            }
537
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
538
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
539
540
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
541
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
542
543
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
544
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
545
546
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
547
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
548
549
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
550
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
551
552
0
            break;
553
0
        }
554
4.97M
        case 2:
555
4.97M
        {
556
4.97M
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
557
4.97M
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
558
559
4.97M
            lambda *= lambda_modifier;
560
4.97M
            lambda_uv *= lambda_uv_modifier;
561
562
4.97M
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
563
4.97M
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
564
565
4.97M
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
566
4.97M
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
567
568
4.97M
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
569
4.97M
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
570
571
4.97M
            if(ps_ctxt->i4_use_const_lamda_modifier)
572
0
            {
573
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
574
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
575
0
            }
576
4.97M
            else
577
4.97M
            {
578
4.97M
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
579
4.97M
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
580
4.97M
            }
581
4.97M
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
582
4.97M
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
583
584
            /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
585
4.97M
            lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
586
4.97M
            lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
587
588
4.97M
            lambda *= lambda_modifier;
589
4.97M
            lambda_uv *= lambda_uv_modifier;
590
591
4.97M
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
592
4.97M
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
593
594
4.97M
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
595
4.97M
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
596
597
4.97M
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
598
4.97M
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
599
4.97M
            if(ps_ctxt->i4_use_const_lamda_modifier)
600
0
            {
601
0
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
602
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
603
0
            }
604
4.97M
            else
605
4.97M
            {
606
4.97M
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
607
4.97M
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
608
4.97M
            }
609
610
4.97M
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
611
4.97M
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
612
613
4.97M
            break;
614
0
        }
615
0
        default:
616
0
        {
617
            /* Intended to be a barren wasteland! */
618
0
            ASSERT(0);
619
0
        }
620
4.97M
        }
621
4.97M
    }
622
97.5k
}
623
624
/*!
625
******************************************************************************
626
* \if Function name : ihevce_get_cl_cu_lambda_prms \endif
627
*
628
* \brief
629
*    Function whihc calculates the Lambda params for current picture
630
*
631
* \param[in] ps_enc_ctxt : encoder ctxt pointer
632
* \param[in] ps_cur_pic_ctxt : current pic ctxt
633
* \param[in] i4_cur_frame_qp : current pic QP
634
* \param[in] first_field : is first field flag
635
* \param[in] i4_temporal_lyr_id : Current picture layer id
636
*
637
* \return
638
*    None
639
*
640
* \author
641
*  Ittiam
642
*
643
*****************************************************************************
644
*/
645
void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
646
7.39M
{
647
7.39M
    WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
648
7.39M
                           ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
649
7.39M
                           : gai1_ihevc_chroma_qp_scale
650
7.39M
                                 [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
651
652
    /* closed loop ssd lambda is same as final lambda */
653
7.39M
    ps_ctxt->i8_cl_ssd_lambda_qf =
654
7.39M
        ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
655
7.39M
    ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
656
7.39M
        ps_ctxt
657
7.39M
            ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
658
7.39M
    ps_ctxt->u4_chroma_cost_weighing_factor =
659
7.39M
        ps_ctxt->au4_chroma_cost_weighing_factor_array
660
7.39M
            [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
661
    /* --- Initialized the lambda for SATD computations --- */
662
    /* --- 0.95 is the multiplication factor as per HM --- */
663
    /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
664
7.39M
    ps_ctxt->i4_satd_lamda =
665
7.39M
        ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
666
7.39M
    ps_ctxt->i4_sad_lamda =
667
7.39M
        ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
668
7.39M
}
669
670
/*!
671
******************************************************************************
672
* \if Function name : ihevce_update_pred_qp \endif
673
*
674
* \brief
675
*    Computes pred qp for the given CU
676
*
677
* \param[in]
678
*
679
* \return
680
*
681
*
682
* \author
683
*  Ittiam
684
*
685
*****************************************************************************
686
*/
687
void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
688
2.37M
{
689
2.37M
    WORD32 i4_pred_qp = 0x7FFFFFFF;
690
2.37M
    WORD32 i4_top, i4_left;
691
2.37M
    if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
692
292k
    {
693
292k
        i4_pred_qp = ps_ctxt->i4_prev_QP;
694
292k
    }
695
2.08M
    else
696
2.08M
    {
697
2.08M
        if(cu_pos_y == 0) /*CTB boundary*/
698
469k
        {
699
469k
            i4_top = ps_ctxt->i4_prev_QP;
700
469k
        }
701
1.61M
        else /*within CTB*/
702
1.61M
        {
703
1.61M
            i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
704
1.61M
        }
705
2.08M
        if(cu_pos_x == 0) /*CTB boundary*/
706
477k
        {
707
477k
            i4_left = ps_ctxt->i4_prev_QP;
708
477k
        }
709
1.60M
        else /*within CTB*/
710
1.60M
        {
711
1.60M
            i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
712
1.60M
        }
713
2.08M
        i4_pred_qp = (i4_left + i4_top + 1) >> 1;
714
2.08M
    }
715
2.37M
    ps_ctxt->i4_pred_qp = i4_pred_qp;
716
2.37M
    return;
717
2.37M
}
718
/*!
719
******************************************************************************
720
* \if Function name : ihevce_compute_cu_level_QP \endif
721
*
722
* \brief
723
*    Computes cu level QP with Traqo,Spatial Mod and In-frame RC
724
*
725
* \param[in]
726
*
727
* \return
728
*
729
*
730
* \author
731
*  Ittiam
732
*
733
*****************************************************************************
734
*/
735
void ihevce_compute_cu_level_QP(
736
    ihevce_enc_loop_ctxt_t *ps_ctxt,
737
    WORD32 i4_activity_for_qp,
738
    WORD32 i4_activity_for_lamda,
739
    WORD32 i4_reduce_qp)
740
6.46M
{
741
    /*modify quant related param in ctxt based on current cu qp*/
742
6.46M
    WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
743
6.46M
    WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
744
745
6.46M
    WORD32 i4_max_qp_allowed;
746
6.46M
    WORD32 i4_min_qp_allowed;
747
6.46M
    WORD32 i4_pred_qp;
748
749
6.46M
    i4_pred_qp = ps_ctxt->i4_pred_qp;
750
751
6.46M
    if(ps_ctxt->i4_sub_pic_level_rc)
752
0
    {
753
0
        i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
754
0
        i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
755
0
    }
756
6.46M
    else
757
6.46M
    {
758
6.46M
        i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
759
6.46M
        i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
760
6.46M
    }
761
6.46M
    if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
762
0
        return;
763
764
#if LAMDA_BASED_ON_QUANT
765
    i4_activity_for_lamda = i4_activity_for_qp;
766
#endif
767
768
6.46M
    if(i4_activity_for_qp != -1)
769
6.46M
    {
770
6.46M
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
771
6.46M
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
772
6.46M
        if(ps_ctxt->i4_qp_mod)
773
6.46M
        {
774
            /*Recompute the Qp as per enc thread's frame level Qp*/
775
6.46M
            ASSERT(i4_activity_for_qp > 0);
776
6.46M
            cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
777
6.46M
                    QP_LEVEL_MOD_ACT_FACTOR;
778
6.46M
        }
779
780
        // To avoid access of uninitialised Qscale to qp conversion table
781
6.46M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
782
231k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
783
6.23M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
784
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
785
786
6.46M
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
787
788
6.46M
        if((1 == i4_reduce_qp) && (cu_qp > 1))
789
0
            cu_qp--;
790
791
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
792
6.46M
        if(cu_qp > i4_max_qp_allowed)
793
0
            cu_qp = i4_max_qp_allowed;
794
6.46M
        else if(cu_qp < i4_min_qp_allowed)
795
0
            cu_qp = i4_min_qp_allowed;
796
797
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
798
6.46M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
799
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
800
6.46M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
801
460k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
802
803
        /*cu qp must be populated in cu_analyse_t struct*/
804
6.46M
        ps_ctxt->i4_cu_qp = cu_qp;
805
        /*recompute quant related param at every cu level*/
806
6.46M
        ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
807
6.46M
    }
808
809
    /*Decoupling qp and lamda calculation */
810
6.46M
    if(i4_activity_for_lamda != -1)
811
6.46M
    {
812
6.46M
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
813
6.46M
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
814
815
6.46M
        if(ps_ctxt->i4_qp_mod)
816
6.46M
        {
817
6.46M
#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
818
            /*Recompute the Qp as per enc thread's frame level Qp*/
819
6.46M
            ASSERT(i4_activity_for_lamda > 0);
820
6.46M
            cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
821
6.46M
                    QP_LEVEL_MOD_ACT_FACTOR;
822
6.46M
#endif
823
6.46M
        }
824
6.46M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
825
128k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
826
6.33M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
827
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
828
829
6.46M
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
830
831
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
832
6.46M
        if(cu_qp > i4_max_qp_allowed)
833
0
            cu_qp = i4_max_qp_allowed;
834
6.46M
        else if(cu_qp < i4_min_qp_allowed)
835
0
            cu_qp = i4_min_qp_allowed;
836
837
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
838
6.46M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
839
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
840
6.46M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
841
1.02M
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
842
        /* get frame level lambda params */
843
6.46M
        ihevce_get_cl_cu_lambda_prms(
844
6.46M
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
845
6.46M
    }
846
6.46M
}
847
848
void ihevce_update_cu_level_qp_lamda(
849
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra)
850
6.46M
{
851
6.46M
    WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
852
853
6.46M
    if(ps_cu_analyse->u1_cu_size == 64)
854
152k
    {
855
152k
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
856
152k
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
857
152k
        i4_act_counter_lamda = 3;
858
152k
    }
859
6.31M
    else if(ps_cu_analyse->u1_cu_size == 32)
860
1.11M
    {
861
1.11M
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
862
1.11M
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
863
1.11M
        i4_act_counter_lamda = 0;
864
1.11M
    }
865
5.19M
    else if(ps_cu_analyse->u1_cu_size == 16)
866
2.81M
    {
867
2.81M
        ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
868
2.81M
        i4_act_counter = (trans_size == 8) || (trans_size == 4);
869
2.81M
        i4_act_counter_lamda = 0;
870
2.81M
    }
871
2.38M
    else if(ps_cu_analyse->u1_cu_size == 8)
872
2.38M
    {
873
2.38M
        ASSERT((trans_size == 8) || (trans_size == 4));
874
2.38M
        i4_act_counter = 1;
875
2.38M
        i4_act_counter_lamda = 0;
876
2.38M
    }
877
0
    else
878
0
    {
879
0
        ASSERT(0);
880
0
    }
881
882
6.46M
    if(ps_ctxt->i4_use_ctb_level_lamda)
883
0
    {
884
0
        ihevce_compute_cu_level_QP(
885
0
            ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0);
886
0
    }
887
6.46M
    else
888
6.46M
    {
889
6.46M
        ihevce_compute_cu_level_QP(
890
6.46M
            ps_ctxt,
891
6.46M
            ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra],
892
6.46M
            ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra],
893
6.46M
            0);
894
6.46M
    }
895
896
6.46M
    ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
897
6.46M
}
898
899
/**
900
*******************************************************************************
901
* \if Function name : ihevce_scan_coeffs \endif
902
*
903
* @brief * Computes the coeff buffer for a coded TU for entropy coding
904
*
905
* @par   Description
906
* Computes the coeff buffer for a coded TU for entropy coding
907
*
908
* \param[in] pi2_quan_coeffs Quantized coefficient context
909
*
910
* \param[in] scan_idx Scan index specifying the scan order
911
*
912
* \param[in] trans_size Transform unit size
913
*
914
* \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
915
*
916
* \param[in] pu1_csbf_buf csb flag buffer
917
*
918
* @returns num_bytes
919
* Number of bytes written to pu1_out_data
920
*
921
* @remarks
922
*
923
* \author
924
*  Ittiam
925
*
926
*******************************************************************************
927
*/
928
929
WORD32 ihevce_scan_coeffs(
930
    WORD16 *pi2_quant_coeffs,
931
    WORD32 *pi4_subBlock2csbfId_map,
932
    WORD32 scan_idx,
933
    WORD32 trans_size,
934
    UWORD8 *pu1_out_data,
935
    UWORD8 *pu1_csbf_buf,
936
    WORD32 i4_csbf_stride)
937
18.8M
{
938
18.8M
    WORD32 i, trans_unit_idx, num_gt1_flag;
939
18.8M
    UWORD16 u2_csbf0flags;
940
18.8M
    WORD32 num_bytes = 0;
941
18.8M
    UWORD8 *pu1_trans_table;
942
18.8M
    UWORD8 *pu1_csb_table;
943
18.8M
    WORD32 shift_value, mask_value;
944
18.8M
    UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
945
18.8M
    UWORD16 u2_sign_flags;
946
18.8M
    UWORD16 u2_abs_coeff_remaining[16];
947
18.8M
    WORD32 blk_row, blk_col;
948
949
18.8M
    UWORD8 *pu1_out_data_header;
950
18.8M
    UWORD16 *pu2_out_data_coeff;
951
952
18.8M
    WORD32 x_pos, y_pos;
953
18.8M
    WORD32 quant_coeff;
954
955
18.8M
    WORD32 num_gt0_flag;
956
18.8M
    (void)i4_csbf_stride;
957
18.8M
    pu1_out_data_header = pu1_out_data;
958
    /* Need only last 3 bits, rest are reserved for debugging and making */
959
    /* WORD alignment */
960
18.8M
    u2_csbf0flags = 0xBAD0;
961
962
    /* Select proper order for your transform unit and csb based on scan_idx*/
963
    /* and the trans_size */
964
965
    /* scan order inside a csb */
966
18.8M
    pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
967
    /* GETRANGE will give the log_2 of trans_size to shift_value */
968
18.8M
    GETRANGE(shift_value, trans_size);
969
18.8M
    shift_value = shift_value - 3; /* for finding. row no. from scan index */
970
18.8M
    mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
971
18.8M
    switch(trans_size)
972
18.8M
    {
973
414k
    case 32:
974
414k
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
975
414k
        break;
976
1.44M
    case 16:
977
1.44M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
978
1.44M
        break;
979
3.51M
    case 8:
980
3.51M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
981
3.51M
        break;
982
13.4M
    case 4:
983
13.4M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
984
13.4M
        break;
985
0
    default:
986
0
        DBG_PRINTF("Invalid Trans Size\n");
987
0
        return -1;
988
0
        break;
989
18.8M
    }
990
991
    /*go through each csb in the scan order for first non-zero coded sub-block*/
992
40.7M
    for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
993
40.7M
    {
994
        /* check for the first csb flag in our scan order */
995
40.7M
        if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
996
18.8M
        {
997
18.8M
            UWORD8 u1_last_x, u1_last_y;
998
            /* row of csb */
999
18.8M
            blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
1000
            /* col of csb */
1001
18.8M
            blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
1002
1003
            /*check for the 1st non-0 values inside the csb in our scan order*/
1004
75.5M
            for(i = 15; i >= 0; i--)
1005
75.5M
            {
1006
75.5M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1007
75.5M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1008
1009
75.5M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1010
1011
75.5M
                if(quant_coeff != 0)
1012
18.8M
                    break;
1013
75.5M
            }
1014
1015
18.8M
            ASSERT(i >= 0);
1016
1017
18.8M
            u1_last_x = x_pos;
1018
18.8M
            u1_last_y = y_pos;
1019
1020
            /* storing last_x and last_y */
1021
18.8M
            *pu1_out_data_header = u1_last_x;
1022
18.8M
            pu1_out_data_header++;
1023
18.8M
            num_bytes++;
1024
18.8M
            *pu1_out_data_header = u1_last_y;
1025
18.8M
            pu1_out_data_header++;
1026
18.8M
            num_bytes++;
1027
1028
            /* storing the scan order */
1029
18.8M
            *pu1_out_data_header = scan_idx;
1030
18.8M
            pu1_out_data_header++;
1031
18.8M
            num_bytes++;
1032
            /* storing last_sub_block pos. in scan order count */
1033
18.8M
            *pu1_out_data_header = trans_unit_idx;
1034
18.8M
            pu1_out_data_header++;
1035
18.8M
            num_bytes++;
1036
1037
            /*stored the first 4 bytes, now all are word16. So word16 pointer*/
1038
18.8M
            pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
1039
1040
            /* u2_csbf0flags word */
1041
18.8M
            u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
1042
            /* storing u2_csbf0flags word */
1043
18.8M
            *pu2_out_data_coeff = u2_csbf0flags;
1044
18.8M
            pu2_out_data_coeff++;
1045
18.8M
            num_bytes += 2;
1046
1047
18.8M
            num_gt0_flag = 1;
1048
18.8M
            num_gt1_flag = 0;
1049
18.8M
            u2_sign_flags = 0;
1050
1051
            /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1052
18.8M
            u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
1053
18.8M
            if(abs(quant_coeff) > 1)
1054
10.0M
            {
1055
                /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1056
10.0M
                u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
1057
                /* update u2_abs_coeff_remaining */
1058
10.0M
                u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1059
1060
10.0M
                num_gt1_flag++;
1061
10.0M
            }
1062
1063
18.8M
            if(quant_coeff < 0)
1064
9.81M
            {
1065
                /* set the i th bit of u2_sign_flags */
1066
9.81M
                u2_sign_flags = u2_sign_flags | (1 << i);
1067
9.81M
            }
1068
1069
            /* Test remaining elements in our scan order */
1070
            /* Can optimize further by CLZ macro */
1071
245M
            for(i = i - 1; i >= 0; i--)
1072
226M
            {
1073
226M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1074
226M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1075
1076
226M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1077
1078
226M
                if(quant_coeff != 0)
1079
183M
                {
1080
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1081
183M
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1082
1083
183M
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1084
151M
                    {
1085
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1086
151M
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1087
1088
                        /* update u2_abs_coeff_remaining */
1089
151M
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1090
1091
151M
                        num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
1092
151M
                    }
1093
1094
183M
                    if(quant_coeff < 0)
1095
92.6M
                    {
1096
                        /* set the i th bit of u2_sign_flags */
1097
92.6M
                        u2_sign_flags |= (1 << i);
1098
92.6M
                    }
1099
1100
183M
                    num_gt0_flag++;
1101
183M
                }
1102
226M
            }
1103
1104
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1105
18.8M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1106
18.8M
            pu2_out_data_coeff++;
1107
18.8M
            num_bytes += 2;
1108
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1109
18.8M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1110
18.8M
            pu2_out_data_coeff++;
1111
18.8M
            num_bytes += 2;
1112
            /* storing u2_sign_flags 2 bytes */
1113
18.8M
            *pu2_out_data_coeff = u2_sign_flags;
1114
18.8M
            pu2_out_data_coeff++;
1115
18.8M
            num_bytes += 2;
1116
1117
            /* Store the u2_abs_coeff_remaining[] */
1118
180M
            for(i = 0; i < num_gt1_flag; i++)
1119
161M
            {
1120
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1121
161M
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1122
161M
                pu2_out_data_coeff++;
1123
161M
                num_bytes += 2;
1124
161M
            }
1125
1126
18.8M
            break; /*We just need this loop for finding 1st non-zero csb only*/
1127
18.8M
        }
1128
40.7M
    }
1129
1130
    /* go through remaining csb in the scan order */
1131
55.3M
    for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
1132
36.4M
    {
1133
36.4M
        blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
1134
36.4M
        blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
1135
1136
        /* u2_csbf0flags word */
1137
36.4M
        u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
1138
36.4M
                        (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
1139
1140
        /********************************************************************/
1141
        /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
1142
        /* block0, instead sig coeff map is directly signalled. This is     */
1143
        /* taken care by forcing csbf for block0 to be 1 even if it is 0    */
1144
        /********************************************************************/
1145
36.4M
        if(0 == trans_unit_idx)
1146
4.60M
        {
1147
4.60M
            u2_csbf0flags |= 1;
1148
4.60M
        }
1149
1150
36.4M
        if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
1151
30.7M
        {
1152
30.7M
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
1153
24.1M
            {
1154
                /* set the 2nd bit of u2_csbf0flags for right csbf */
1155
24.1M
                u2_csbf0flags = u2_csbf0flags | (1 << 1);
1156
24.1M
            }
1157
30.7M
        }
1158
36.4M
        if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
1159
30.0M
        {
1160
30.0M
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
1161
24.6M
            {
1162
                /* set the 3rd bit of u2_csbf0flags  for bottom csbf */
1163
24.6M
                u2_csbf0flags = u2_csbf0flags | (1 << 2);
1164
24.6M
            }
1165
30.0M
        }
1166
1167
        /* storing u2_csbf0flags word */
1168
36.4M
        *pu2_out_data_coeff = u2_csbf0flags;
1169
36.4M
        pu2_out_data_coeff++;
1170
36.4M
        num_bytes += 2;
1171
1172
        /* check for the csb flag in our scan order */
1173
36.4M
        if(u2_csbf0flags & 0x1)
1174
31.6M
        {
1175
31.6M
            u2_sig_coeff_abs_gt0_flags = 0;
1176
31.6M
            u2_sig_coeff_abs_gt1_flags = 0;
1177
31.6M
            u2_sign_flags = 0;
1178
1179
31.6M
            num_gt0_flag = 0;
1180
31.6M
            num_gt1_flag = 0;
1181
            /* check for the non-0 values inside the csb in our scan order */
1182
            /* Can optimize further by CLZ macro */
1183
538M
            for(i = 15; i >= 0; i--)
1184
507M
            {
1185
507M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1186
507M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1187
1188
507M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1189
1190
507M
                if(quant_coeff != 0)
1191
385M
                {
1192
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1193
385M
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1194
1195
385M
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1196
314M
                    {
1197
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1198
314M
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1199
1200
                        /* update u2_abs_coeff_remaining */
1201
314M
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1202
1203
314M
                        num_gt1_flag++;
1204
314M
                    }
1205
1206
385M
                    if(quant_coeff < 0)
1207
197M
                    {
1208
                        /* set the i th bit of u2_sign_flags */
1209
197M
                        u2_sign_flags = u2_sign_flags | (1 << i);
1210
197M
                    }
1211
1212
385M
                    num_gt0_flag++;
1213
385M
                }
1214
507M
            }
1215
1216
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1217
31.6M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1218
31.6M
            pu2_out_data_coeff++;
1219
31.6M
            num_bytes += 2;
1220
1221
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1222
31.6M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1223
31.6M
            pu2_out_data_coeff++;
1224
31.6M
            num_bytes += 2;
1225
1226
            /* storing u2_sign_flags 2 bytes */
1227
31.6M
            *pu2_out_data_coeff = u2_sign_flags;
1228
31.6M
            pu2_out_data_coeff++;
1229
31.6M
            num_bytes += 2;
1230
1231
            /* Store the u2_abs_coeff_remaining[] */
1232
346M
            for(i = 0; i < num_gt1_flag; i++)
1233
314M
            {
1234
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1235
314M
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1236
314M
                pu2_out_data_coeff++;
1237
314M
                num_bytes += 2;
1238
314M
            }
1239
31.6M
        }
1240
36.4M
    }
1241
1242
18.8M
    return num_bytes; /* Return the number of bytes written to out_data */
1243
18.8M
}
1244
1245
/**
1246
*******************************************************************************
1247
* \if Function name : ihevce_populate_intra_pred_mode \endif
1248
*
1249
* \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
1250
* b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
1251
*
1252
* \par   Description
1253
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1254
* for a CU
1255
*
1256
* \param[in] top_intra_mode Top intra mode
1257
* \param[in] left_intra_mode Left intra mode
1258
* \param[in] available_top Top availability flag
1259
* \param[in] available_left Left availability flag
1260
* \param[in] cu_pos_y CU 'y' position
1261
* \param[in] ps_cand_mode_list pointer to populate candidate list
1262
*
1263
* \returns none
1264
*
1265
* \author
1266
*  Ittiam
1267
*
1268
*******************************************************************************
1269
*/
1270
1271
void ihevce_populate_intra_pred_mode(
1272
    WORD32 top_intra_mode,
1273
    WORD32 left_intra_mode,
1274
    WORD32 available_top,
1275
    WORD32 available_left,
1276
    WORD32 cu_pos_y,
1277
    WORD32 *ps_cand_mode_list)
1278
1.52M
{
1279
    /* local variables */
1280
1.52M
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1281
1282
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1283
    /* N = top */
1284
1.52M
    if(0 == available_top)
1285
193k
    {
1286
193k
        cand_intra_pred_mode_top = INTRA_DC;
1287
193k
    }
1288
    /* for neighbour != INTRA, setting DC is done outside */
1289
1.33M
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1290
61.2k
    {
1291
61.2k
        cand_intra_pred_mode_top = INTRA_DC;
1292
61.2k
    }
1293
1.27M
    else
1294
1.27M
    {
1295
1.27M
        cand_intra_pred_mode_top = top_intra_mode;
1296
1.27M
    }
1297
1298
    /* N = left */
1299
1.52M
    if(0 == available_left)
1300
155k
    {
1301
155k
        cand_intra_pred_mode_left = INTRA_DC;
1302
155k
    }
1303
    /* for neighbour != INTRA, setting DC is done outside */
1304
1.36M
    else
1305
1.36M
    {
1306
1.36M
        cand_intra_pred_mode_left = left_intra_mode;
1307
1.36M
    }
1308
1309
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1310
1.52M
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1311
522k
    {
1312
522k
        if(cand_intra_pred_mode_left < 2)
1313
360k
        {
1314
360k
            ps_cand_mode_list[0] = INTRA_PLANAR;
1315
360k
            ps_cand_mode_list[1] = INTRA_DC;
1316
360k
            ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1317
360k
        }
1318
162k
        else
1319
162k
        {
1320
162k
            ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1321
162k
            ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1322
162k
            ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1323
162k
        }
1324
522k
    }
1325
1.00M
    else
1326
1.00M
    {
1327
1.00M
        ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1328
1.00M
        ps_cand_mode_list[1] = cand_intra_pred_mode_top;
1329
1330
1.00M
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1331
778k
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1332
572k
        {
1333
572k
            ps_cand_mode_list[2] = INTRA_PLANAR;
1334
572k
        }
1335
429k
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1336
168k
        {
1337
168k
            ps_cand_mode_list[2] = INTRA_DC;
1338
168k
        }
1339
261k
        else
1340
261k
        {
1341
261k
            ps_cand_mode_list[2] = INTRA_ANGULAR(26);
1342
261k
        }
1343
1.00M
    }
1344
1.52M
}
1345
/**
1346
*******************************************************************************
1347
* \if Function name : ihevce_intra_pred_mode_signaling \endif
1348
*
1349
* \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
1350
* b5_rem_intra_pred_mode for a CU
1351
*
1352
* \par   Description
1353
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1354
* for a CU
1355
*
1356
* \param[in] ps_nbr_top Top neighbour context
1357
* \param[in] ps_nbr_left Left neighbour context
1358
* \param[in] available_top Top availability flag
1359
* \param[in] available_left Left availability flag
1360
* \param[in] cu_pos_y CU 'y' position
1361
* \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
1362
* \param[inout] ps_intra_pred_mode_current
1363
* Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
1364
* b5_rem_intra_pred_mode
1365
*
1366
* \returns none
1367
*
1368
* \author
1369
*  Ittiam
1370
*
1371
*******************************************************************************
1372
*/
1373
1374
void ihevce_intra_pred_mode_signaling(
1375
    WORD32 top_intra_mode,
1376
    WORD32 left_intra_mode,
1377
    WORD32 available_top,
1378
    WORD32 available_left,
1379
    WORD32 cu_pos_y,
1380
    WORD32 luma_intra_pred_mode_current,
1381
    intra_prev_rem_flags_t *ps_intra_pred_mode_current)
1382
23.6M
{
1383
    /* local variables */
1384
23.6M
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1385
23.6M
    WORD32 cand_mode_list[3];
1386
1387
23.6M
    ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1388
23.6M
    ps_intra_pred_mode_current->b2_mpm_idx = 0;  // for safety purpose
1389
23.6M
    ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
1390
1391
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1392
    /* N = top */
1393
23.6M
    if(0 == available_top)
1394
2.95M
    {
1395
2.95M
        cand_intra_pred_mode_top = INTRA_DC;
1396
2.95M
    }
1397
    /* for neighbour != INTRA, setting DC is done outside */
1398
20.6M
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1399
1.55M
    {
1400
1.55M
        cand_intra_pred_mode_top = INTRA_DC;
1401
1.55M
    }
1402
19.1M
    else
1403
19.1M
    {
1404
19.1M
        cand_intra_pred_mode_top = top_intra_mode;
1405
19.1M
    }
1406
1407
    /* N = left */
1408
23.6M
    if(0 == available_left)
1409
2.39M
    {
1410
2.39M
        cand_intra_pred_mode_left = INTRA_DC;
1411
2.39M
    }
1412
    /* for neighbour != INTRA, setting DC is done outside */
1413
21.2M
    else
1414
21.2M
    {
1415
21.2M
        cand_intra_pred_mode_left = left_intra_mode;
1416
21.2M
    }
1417
1418
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1419
23.6M
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1420
10.8M
    {
1421
10.8M
        if(cand_intra_pred_mode_left < 2)
1422
8.18M
        {
1423
8.18M
            cand_mode_list[0] = INTRA_PLANAR;
1424
8.18M
            cand_mode_list[1] = INTRA_DC;
1425
8.18M
            cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1426
8.18M
        }
1427
2.64M
        else
1428
2.64M
        {
1429
2.64M
            cand_mode_list[0] = cand_intra_pred_mode_left;
1430
2.64M
            cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1431
2.64M
            cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1432
2.64M
        }
1433
10.8M
    }
1434
12.7M
    else
1435
12.7M
    {
1436
12.7M
        cand_mode_list[0] = cand_intra_pred_mode_left;
1437
12.7M
        cand_mode_list[1] = cand_intra_pred_mode_top;
1438
1439
12.7M
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1440
8.97M
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1441
6.10M
        {
1442
6.10M
            cand_mode_list[2] = INTRA_PLANAR;
1443
6.10M
        }
1444
6.69M
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1445
2.30M
        {
1446
2.30M
            cand_mode_list[2] = INTRA_DC;
1447
2.30M
        }
1448
4.39M
        else
1449
4.39M
        {
1450
4.39M
            cand_mode_list[2] = INTRA_ANGULAR(26);
1451
4.39M
        }
1452
12.7M
    }
1453
1454
    /* Signal Generation */
1455
1456
    /* Flag & mpm_index generation */
1457
23.6M
    if(cand_mode_list[0] == luma_intra_pred_mode_current)
1458
7.67M
    {
1459
7.67M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1460
7.67M
        ps_intra_pred_mode_current->b2_mpm_idx = 0;
1461
7.67M
    }
1462
15.9M
    else if(cand_mode_list[1] == luma_intra_pred_mode_current)
1463
6.25M
    {
1464
6.25M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1465
6.25M
        ps_intra_pred_mode_current->b2_mpm_idx = 1;
1466
6.25M
    }
1467
9.68M
    else if(cand_mode_list[2] == luma_intra_pred_mode_current)
1468
3.34M
    {
1469
3.34M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1470
3.34M
        ps_intra_pred_mode_current->b2_mpm_idx = 2;
1471
3.34M
    }
1472
    /* Flag & b5_rem_intra_pred_mode generation */
1473
6.34M
    else
1474
6.34M
    {
1475
6.34M
        WORD32 rem_mode;
1476
1477
6.34M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1478
1479
        /* sorting cand_mode_list */
1480
6.34M
        if(cand_mode_list[0] > cand_mode_list[1])
1481
2.86M
        {
1482
2.86M
            SWAP(cand_mode_list[0], cand_mode_list[1]);
1483
2.86M
        }
1484
6.34M
        if(cand_mode_list[0] > cand_mode_list[2])
1485
2.56M
        {
1486
2.56M
            SWAP(cand_mode_list[0], cand_mode_list[2]);
1487
2.56M
        }
1488
6.34M
        if(cand_mode_list[1] > cand_mode_list[2])
1489
3.36M
        {
1490
3.36M
            SWAP(cand_mode_list[1], cand_mode_list[2]);
1491
3.36M
        }
1492
1493
6.34M
        rem_mode = luma_intra_pred_mode_current;
1494
1495
6.34M
        if((rem_mode) >= cand_mode_list[2])
1496
2.13M
        {
1497
2.13M
            (rem_mode)--;
1498
2.13M
        }
1499
6.34M
        if((rem_mode) >= cand_mode_list[1])
1500
5.27M
        {
1501
5.27M
            (rem_mode)--;
1502
5.27M
        }
1503
6.34M
        if((rem_mode) >= cand_mode_list[0])
1504
5.81M
        {
1505
5.81M
            (rem_mode)--;
1506
5.81M
        }
1507
6.34M
        ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
1508
6.34M
    }
1509
23.6M
}
1510
1511
void ihevce_quant_rounding_factor_gen(
1512
    WORD32 i4_trans_size,
1513
    WORD32 is_luma,
1514
    rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
1515
    WORD32 *pi4_quant_round_0_1,
1516
    WORD32 *pi4_quant_round_1_2,
1517
    double i4_lamda_modifier,
1518
    UWORD8 i4_is_tu_level_quant_rounding)
1519
7.41M
{
1520
    //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
1521
7.41M
    UWORD8 *pu1_ctxt_model;
1522
7.41M
    WORD32 scan_pos;
1523
7.41M
    WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1524
7.41M
    WORD32 abs_gt1_base_ctxt;
1525
7.41M
    WORD32 log2_tr_size, i;
1526
7.41M
    UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
1527
7.41M
    UWORD16 u4_bits_estimated_r1_temp;
1528
7.41M
    WORD32 j = 0;
1529
7.41M
    WORD32 k = 0;
1530
7.41M
    WORD32 temp2;
1531
1532
7.41M
    double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
1533
7.41M
    LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
1534
    /* transform size to log2transform size */
1535
7.41M
    GETRANGE(log2_tr_size, i4_trans_size);
1536
7.41M
    log2_tr_size -= 1;
1537
1538
7.41M
    if(1 == i4_is_tu_level_quant_rounding)
1539
0
    {
1540
0
        entropy_context_t *ps_cur_tu_entropy;
1541
0
        cab_ctxt_t *ps_cabac;
1542
0
        WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1543
0
        ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
1544
1545
0
        ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
1546
1547
0
        pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1548
0
    }
1549
7.41M
    else
1550
7.41M
    {
1551
7.41M
        pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
1552
7.41M
    }
1553
    /*If transform size is 4x4, then only one sub-block*/
1554
7.41M
    if(is_luma)
1555
4.51M
    {
1556
4.51M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1557
4.51M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1558
1559
4.51M
        if(3 == log2_tr_size)
1560
1.61M
        {
1561
            /* 8x8 transform size */
1562
            /* Assuming diagnol scan idx for now */
1563
1.61M
            sig_coeff_base_ctxt += 9;
1564
1.61M
        }
1565
2.89M
        else if(3 < log2_tr_size)
1566
1.28M
        {
1567
            /* larger transform sizes */
1568
1.28M
            sig_coeff_base_ctxt += 21;
1569
1.28M
        }
1570
4.51M
    }
1571
2.89M
    else
1572
2.89M
    {
1573
        /* chroma context initializations */
1574
2.89M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1575
2.89M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1576
1577
2.89M
        if(3 == log2_tr_size)
1578
960k
        {
1579
            /* 8x8 transform size */
1580
960k
            sig_coeff_base_ctxt += 9;
1581
960k
        }
1582
1.93M
        else if(3 < log2_tr_size)
1583
319k
        {
1584
            /* larger transform sizes */
1585
319k
            sig_coeff_base_ctxt += 12;
1586
319k
        }
1587
2.89M
    }
1588
1589
    /*Transform size of 4x4 will have only a single CSB */
1590
    /* derive the context inc as per section 9.3.3.1.4 */
1591
1592
7.41M
    if(2 == log2_tr_size)
1593
3.23M
    {
1594
3.23M
        UWORD8 sig_ctxinc;
1595
3.23M
        WORD32 state_mps;
1596
3.23M
        WORD32 gt1_ctxt = 0;
1597
3.23M
        WORD32 ctxt_set = 0;
1598
3.23M
        WORD32 ctxt_idx = 0;
1599
1600
        /* context set based on luma subblock pos */
1601
1602
        /* Encodet the abs level gt1 bins */
1603
        /* Currently calculating trade off between mps(2) and mps(1)*/
1604
        /* The estimation has to be further done for mps(11) and mps(111)*/
1605
        /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
1606
        /* gt1_ctxt = 0 for the co-ef value to be 2 */
1607
1608
3.23M
        ctxt_set = gt1_ctxt = 0;
1609
3.23M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1610
1611
3.23M
        state_mps = pu1_ctxt_model[ctxt_idx];
1612
1613
3.23M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1614
1615
3.23M
        u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1616
1617
3.23M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
1618
55.0M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1619
51.8M
        {
1620
51.8M
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1621
51.8M
        }
1622
1623
55.0M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1624
51.8M
        {
1625
            //UWORD8 nbr_csbf = 1;
1626
            /* derive the x,y pos */
1627
51.8M
            UWORD8 y_pos_x_pos = scan_pos;  //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1628
1629
            /* 4x4 transform size increment uses lookup */
1630
51.8M
            sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1631
1632
            /*Get the mps state based on ctxt modes */
1633
51.8M
            state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
1634
1635
            /* Bits taken to encode sig co-ef flag as 0 */
1636
51.8M
            u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1637
1638
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1639
            //
1640
51.8M
            u4_bits_estimated_r1 =
1641
51.8M
                (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1642
1643
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1644
51.8M
            u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1645
1646
51.8M
            QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1647
51.8M
            *(pi4_quant_round_0_1 + scan_pos) = temp2;
1648
51.8M
        }
1649
3.23M
    }
1650
4.17M
    else
1651
4.17M
    {
1652
4.17M
        UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
1653
4.17M
        WORD32 is_nbr_csb_state_mps;
1654
1655
4.17M
        WORD32 state_mps;
1656
4.17M
        WORD32 gt1_ctxt = 0;
1657
4.17M
        WORD32 ctxt_set = 0;
1658
4.17M
        WORD32 ctxt_idx;
1659
        /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
1660
        /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
1661
1662
        /*ctxt_set = 0 DC subblock, the previous state did not have 2
1663
        ctxt_set = 1 DC subblock, the previous state did have >= 2
1664
        ctxt_set = 2 AC subblock, the previous state did not have 2
1665
        ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1666
4.17M
        i = 1;
1667
4.17M
        ctxt_set = (i && is_luma) ? 2 : 0;
1668
1669
4.17M
        ctxt_set++;
1670
1671
        /*0th position indicates the probability of 2 */
1672
        /*1th position indicates the probability of 1 */
1673
        /*2th position indicates the probability of 11 */
1674
        /*3th position indicates the probability of 111 */
1675
1676
4.17M
        gt1_ctxt = 0;
1677
4.17M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1678
1679
4.17M
        state_mps = pu1_ctxt_model[ctxt_idx];
1680
1681
4.17M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1682
1683
4.17M
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1684
4.17M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1685
1686
824M
        for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
1687
820M
        {
1688
820M
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1689
820M
        }
1690
1691
4.17M
        i = 0;
1692
4.17M
        ctxt_set = (i && is_luma) ? 2 : 0;
1693
4.17M
        ctxt_set++;
1694
1695
        /*0th position indicates the probability of 2 */
1696
        /*1th position indicates the probability of 1 */
1697
        /*2th position indicates the probability of 11 */
1698
        /*3th position indicates the probability of 111 */
1699
1700
4.17M
        gt1_ctxt = 0;
1701
4.17M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1702
1703
4.17M
        state_mps = pu1_ctxt_model[ctxt_idx];
1704
1705
4.17M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1706
1707
4.17M
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1708
4.17M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1709
1710
71.0M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1711
66.8M
        {
1712
66.8M
            *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1713
66.8M
        }
1714
1715
4.17M
        {
1716
4.17M
            WORD32 ctxt_idx;
1717
1718
4.17M
            WORD32 nbr_csbf_0, nbr_csbf_1;
1719
4.17M
            WORD32 state_mps_0, state_mps_1;
1720
4.17M
            ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1721
4.17M
            ctxt_idx += is_luma ? 0 : 2;
1722
1723
            /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1724
            /* if neibhor not available, ctxt idx = 0*/
1725
4.17M
            nbr_csbf_0 = 0;
1726
4.17M
            ctxt_idx += nbr_csbf_0 ? 1 : 0;
1727
4.17M
            state_mps_0 = pu1_ctxt_model[ctxt_idx];
1728
1729
4.17M
            nbr_csbf_1 = 1;
1730
4.17M
            ctxt_idx += nbr_csbf_1 ? 1 : 0;
1731
4.17M
            state_mps_1 = pu1_ctxt_model[ctxt_idx];
1732
1733
4.17M
            is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
1734
4.17M
        }
1735
1736
4.17M
        if(1 == is_nbr_csb_state_mps)
1737
841k
        {
1738
12.5M
            for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
1739
11.7M
            {
1740
11.7M
                UWORD8 sig_ctxinc;
1741
11.7M
                WORD32 state_mps;
1742
11.7M
                WORD32 gt1_ctxt = 0;
1743
11.7M
                WORD32 ctxt_set = 0;
1744
1745
11.7M
                WORD32 ctxt_idx;
1746
1747
                /*Check if the cabac states had previous nbr available */
1748
1749
11.7M
                if(i == 0)
1750
841k
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
1751
10.8M
                else if(i < (i4_trans_size >> 2))
1752
1.87M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
1753
9.02M
                else if((i % (i4_trans_size >> 2)) == 0)
1754
1.87M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
1755
7.15M
                else
1756
7.15M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1757
1758
11.7M
                if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
1759
1.87M
                    k++;
1760
1761
11.7M
                j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
1762
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1763
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1764
                ctxt_set = 2 AC subblock, the previous state did not have 2
1765
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1766
1767
11.7M
                ctxt_set = (i && is_luma) ? 2 : 0;
1768
1769
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1770
11.7M
                gt1_ctxt = 0;
1771
11.7M
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1772
1773
11.7M
                state_mps = pu1_ctxt_model[ctxt_idx];
1774
1775
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1776
11.7M
                u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1777
1778
199M
                for(scan_pos = 0; scan_pos < 16; scan_pos++)
1779
187M
                {
1780
187M
                    UWORD8 y_pos_x_pos;
1781
1782
187M
                    if(scan_pos || i)
1783
186M
                    {
1784
186M
                        y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1785
                        /* ctxt for AC coeff depends on curpos and neigbour csbf */
1786
186M
                        sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1787
1788
                        /* based on luma subblock pos */
1789
186M
                        sig_ctxinc += (i && is_luma) ? 3 : 0;
1790
1791
186M
                        sig_ctxinc += sig_coeff_base_ctxt;
1792
186M
                    }
1793
841k
                    else
1794
841k
                    {
1795
                        /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1796
                        /* DC coeff has fixed context for luma and chroma */
1797
841k
                        sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1798
841k
                    }
1799
1800
                    /*Get the mps state based on ctxt modes */
1801
187M
                    state_mps = pu1_ctxt_model[sig_ctxinc];
1802
1803
                    /* Bits taken to encode sig co-ef flag as 0 */
1804
187M
                    u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1805
1806
187M
                    u4_bits_estimated_r1 =
1807
187M
                        (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1808
1809
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1810
187M
                    u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1811
187M
                    {
1812
187M
                        QUANT_ROUND_FACTOR(
1813
187M
                            temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1814
187M
                        *(pi4_quant_round_0_1 +
1815
187M
                          ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
1816
187M
                    }
1817
187M
                }
1818
11.7M
            }
1819
841k
        }
1820
3.33M
        else
1821
3.33M
        {
1822
            /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
1823
            Hence will write the same value to all sub block, and overwrite for the 1st one */
1824
3.33M
            i = 1;
1825
3.33M
            {
1826
3.33M
                UWORD8 sig_ctxinc;
1827
3.33M
                UWORD8 y_pos_x_pos;
1828
3.33M
                WORD32 quant_rounding_0_1;
1829
1830
3.33M
                pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
1831
1832
3.33M
                scan_pos = 0;
1833
3.33M
                y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1834
                /* ctxt for AC coeff depends on curpos and neigbour csbf */
1835
3.33M
                sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1836
1837
                /* based on luma subblock pos */
1838
3.33M
                sig_ctxinc += (is_luma) ? 3 : 0;
1839
1840
3.33M
                sig_ctxinc += sig_coeff_base_ctxt;
1841
1842
                /*Get the mps state based on ctxt modes */
1843
3.33M
                state_mps = pu1_ctxt_model[sig_ctxinc];
1844
1845
                /* Bits taken to encode sig co-ef flag as 0 */
1846
3.33M
                u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1847
1848
3.33M
                u4_bits_estimated_r1 =
1849
3.33M
                    (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1850
1851
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1852
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1853
                ctxt_set = 2 AC subblock, the previous state did not have 2
1854
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1855
1856
3.33M
                ctxt_set = (i && is_luma) ? 2 : 0;
1857
1858
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1859
3.33M
                gt1_ctxt = 0;
1860
3.33M
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1861
1862
3.33M
                state_mps = pu1_ctxt_model[ctxt_idx];
1863
1864
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1865
3.33M
                u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1866
1867
3.33M
                QUANT_ROUND_FACTOR(
1868
3.33M
                    quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1869
1870
635M
                for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
1871
632M
                    scan_pos++)
1872
632M
                {
1873
632M
                    *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
1874
632M
                }
1875
3.33M
            }
1876
1877
            /*First Subblock*/
1878
3.33M
            i = 0;
1879
1880
3.33M
            {
1881
3.33M
                UWORD8 sig_ctxinc;
1882
3.33M
                WORD32 state_mps;
1883
3.33M
                WORD32 gt1_ctxt = 0;
1884
3.33M
                WORD32 ctxt_set = 0;
1885
1886
3.33M
                WORD32 ctxt_idx;
1887
1888
                /*Check if the cabac states had previous nbr available */
1889
1890
3.33M
                {
1891
3.33M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1892
1893
                    /*ctxt_set = 0 DC subblock, the previous state did not have 2
1894
                    ctxt_set = 1 DC subblock, the previous state did have >= 2
1895
                    ctxt_set = 2 AC subblock, the previous state did not have 2
1896
                    ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1897
3.33M
                    ctxt_set = (i && is_luma) ? 2 : 0;
1898
1899
                    /* gt1_ctxt = 1 for the co-ef value to be 1 */
1900
3.33M
                    gt1_ctxt = 0;
1901
3.33M
                    ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1902
1903
3.33M
                    state_mps = pu1_ctxt_model[ctxt_idx];
1904
1905
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1906
3.33M
                    u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1907
1908
56.7M
                    for(scan_pos = 0; scan_pos < 16; scan_pos++)
1909
53.4M
                    {
1910
53.4M
                        UWORD8 y_pos_x_pos;
1911
1912
53.4M
                        if(scan_pos)
1913
50.0M
                        {
1914
50.0M
                            y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1915
                            /* ctxt for AC coeff depends on curpos and neigbour csbf */
1916
50.0M
                            sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1917
1918
                            /* based on luma subblock pos */
1919
50.0M
                            sig_ctxinc += (i && is_luma) ? 3 : 0;
1920
1921
50.0M
                            sig_ctxinc += sig_coeff_base_ctxt;
1922
50.0M
                        }
1923
3.33M
                        else
1924
3.33M
                        {
1925
                            /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1926
                            /* DC coeff has fixed context for luma and chroma */
1927
3.33M
                            sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1928
3.33M
                        }
1929
1930
                        /*Get the mps state based on ctxt modes */
1931
53.4M
                        state_mps = pu1_ctxt_model[sig_ctxinc];
1932
1933
                        /* Bits taken to encode sig co-ef flag as 0 */
1934
53.4M
                        u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1935
1936
53.4M
                        u4_bits_estimated_r1 =
1937
53.4M
                            (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1938
1939
                        /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1940
53.4M
                        u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1941
53.4M
                        {
1942
53.4M
                            QUANT_ROUND_FACTOR(
1943
53.4M
                                temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1944
53.4M
                            *(pi4_quant_round_0_1 +
1945
53.4M
                              ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1946
53.4M
                        }
1947
53.4M
                    }
1948
3.33M
                }
1949
3.33M
            }
1950
3.33M
        }
1951
4.17M
    }
1952
7.41M
    return;
1953
7.41M
}
1954
1955
/*!
1956
******************************************************************************
1957
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
1958
*
1959
* \brief
1960
*    Transform unit level (Luma) enc_loop function
1961
*
1962
* \param[in] ps_ctxt    enc_loop module ctxt pointer
1963
* \param[in] pu1_pred   pointer to predicted data buffer
1964
* \param[in] pred_strd  predicted buffer stride
1965
* \param[in] pu1_src    pointer to source data buffer
1966
* \param[in] src_strd   source buffer stride
1967
* \param[in] pi2_deq_data   pointer to store iq data
1968
* \param[in] deq_data_strd  iq data buffer stride
1969
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
1970
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
1971
*                           block
1972
* \param[out] csbf_strd  csbf buffer stride
1973
* \param[in] trans_size transform size (4, 8, 16,32)
1974
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
1975
* \param[out] pi4_cost      pointer to store the cost
1976
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
1977
*                           coeff buffer
1978
* \param[out] pu4_tu_bits   pointer to store the best TU bits required encode
1979
the current TU in RDopt Mode
1980
* \param[out] pu4_blk_sad   pointer to store the block sad for RC
1981
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
1982
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
1983
* \param[in]  i4_perform_rdoq Indicates if RDOQ should be performed or not
1984
* \param[in]  i4_perform_sbh Indicates if SBH should be performed or not
1985
*
1986
* \return
1987
*    CBF of the current block
1988
*
1989
* \author
1990
*  Ittiam
1991
*
1992
*****************************************************************************
1993
*/
1994
1995
WORD32 ihevce_t_q_iq_ssd_scan_fxn(
1996
    ihevce_enc_loop_ctxt_t *ps_ctxt,
1997
    UWORD8 *pu1_pred,
1998
    WORD32 pred_strd,
1999
    UWORD8 *pu1_src,
2000
    WORD32 src_strd,
2001
    WORD16 *pi2_deq_data,
2002
    WORD32 deq_data_strd,
2003
    UWORD8 *pu1_recon,
2004
    WORD32 i4_recon_stride,
2005
    UWORD8 *pu1_ecd_data,
2006
    UWORD8 *pu1_csbf_buf,
2007
    WORD32 csbf_strd,
2008
    WORD32 trans_size,
2009
    WORD32 packed_pred_mode,
2010
    LWORD64 *pi8_cost,
2011
    WORD32 *pi4_coeff_off,
2012
    WORD32 *pi4_tu_bits,
2013
    UWORD32 *pu4_blk_sad,
2014
    WORD32 *pi4_zero_col,
2015
    WORD32 *pi4_zero_row,
2016
    UWORD8 *pu1_is_recon_available,
2017
    WORD32 i4_perform_rdoq,
2018
    WORD32 i4_perform_sbh,
2019
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2020
    WORD32 i4_alpha_stim_multiplier,
2021
    UWORD8 u1_is_cu_noisy,
2022
#endif
2023
    SSD_TYPE_T e_ssd_type,
2024
    WORD32 early_cbf)
2025
29.7M
{
2026
29.7M
    WORD32 cbf = 0;
2027
29.7M
    WORD32 trans_idx;
2028
29.7M
    WORD32 quant_scale_mat_offset;
2029
29.7M
    WORD32 *pi4_trans_scratch;
2030
29.7M
    WORD16 *pi2_trans_values;
2031
29.7M
    WORD16 *pi2_quant_coeffs;
2032
29.7M
    WORD32 *pi4_subBlock2csbfId_map = NULL;
2033
2034
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2035
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
2036
#endif
2037
2038
29.7M
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
2039
2040
29.7M
    WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
2041
23.6M
                             (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
2042
29.7M
    WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
2043
29.7M
    WORD8 intra_flag = 0;
2044
29.7M
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
2045
2046
29.7M
    *pi4_tu_bits = 0;
2047
29.7M
    *pi4_coeff_off = 0;
2048
29.7M
    pu1_is_recon_available[0] = 0;
2049
2050
29.7M
    if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
2051
1.26M
    {
2052
1.26M
        if(e_ssd_type != NULL_TYPE)
2053
1.26M
        {
2054
            /* SSD cost is stored to the pointer */
2055
1.26M
            pi8_cost[0] =
2056
2057
1.26M
                ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
2058
1.26M
                    pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
2059
2060
1.26M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2061
1.26M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2062
0
            {
2063
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2064
0
                    pu1_src,
2065
0
                    src_strd,
2066
0
                    pu1_pred,
2067
0
                    pred_strd,
2068
0
                    pi8_cost[0],
2069
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2070
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2071
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2072
0
                                                 100.0,
2073
0
                    trans_size,
2074
0
                    0,
2075
0
                    ps_ctxt->u1_enable_psyRDOPT,
2076
0
                    NULL_PLANE);
2077
0
            }
2078
1.26M
#endif
2079
2080
            /* copy pred to recon for skip mode */
2081
1.26M
            if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2082
529k
            {
2083
529k
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2084
529k
                    pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2085
529k
                pu1_is_recon_available[0] = 1;
2086
529k
            }
2087
736k
            else
2088
736k
            {
2089
736k
                pu1_is_recon_available[0] = 0;
2090
736k
            }
2091
2092
1.26M
#if ENABLE_INTER_ZCU_COST
2093
1.26M
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
2094
1.26M
#endif
2095
1.26M
        }
2096
0
        else
2097
0
        {
2098
0
            pi8_cost[0] = UINT_MAX;
2099
0
        }
2100
2101
        /* cbf is returned as 0 */
2102
1.26M
        return (0);
2103
1.26M
    }
2104
2105
    /* derive context variables */
2106
28.4M
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
2107
28.4M
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2108
28.4M
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
2109
2110
    /* translate the transform size to index for 4x4 and 8x8 */
2111
28.4M
    trans_idx = trans_size >> 2;
2112
2113
28.4M
    if(PRED_MODE_INTRA == packed_pred_mode)
2114
23.6M
    {
2115
23.6M
        quant_scale_mat_offset = 0;
2116
23.6M
        intra_flag = 1;
2117
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2118
        ai4_quant_rounding_factors[0][0] =
2119
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
2120
2121
        for(i = 0; i < trans_size * trans_size; i++)
2122
        {
2123
            ai4_quant_rounding_factors[1][i] =
2124
                MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
2125
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2126
            ai4_quant_rounding_factors[2][i] =
2127
                MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
2128
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2129
        }
2130
#endif
2131
23.6M
    }
2132
4.87M
    else
2133
4.87M
    {
2134
4.87M
        quant_scale_mat_offset = NUM_TRANS_TYPES;
2135
4.87M
    }
2136
    /* for intra 4x4 DST transform should be used */
2137
28.4M
    if((1 == trans_idx) && (1 == intra_flag))
2138
11.4M
    {
2139
11.4M
        trans_idx = 0;
2140
11.4M
    }
2141
    /* for 16x16 cases */
2142
17.0M
    else if(16 == trans_size)
2143
5.82M
    {
2144
5.82M
        trans_idx = 3;
2145
5.82M
    }
2146
    /* for 32x32 cases */
2147
11.1M
    else if(32 == trans_size)
2148
2.25M
    {
2149
2.25M
        trans_idx = 4;
2150
2.25M
    }
2151
2152
28.4M
    switch(trans_size)
2153
28.4M
    {
2154
12.9M
    case 4:
2155
12.9M
    {
2156
12.9M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
2157
2158
12.9M
        break;
2159
0
    }
2160
7.44M
    case 8:
2161
7.44M
    {
2162
7.44M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
2163
2164
7.44M
        break;
2165
0
    }
2166
5.82M
    case 16:
2167
5.82M
    {
2168
5.82M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
2169
2170
5.82M
        break;
2171
0
    }
2172
2.25M
    case 32:
2173
2.25M
    {
2174
2.25M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
2175
2176
2.25M
        break;
2177
0
    }
2178
28.4M
    }
2179
2180
    /* Do not call the FT and Quant functions if early_cbf is 0 */
2181
28.4M
    if(1 == early_cbf)
2182
28.4M
    {
2183
        /* ---------- call residue and transform block ------- */
2184
28.4M
        *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
2185
28.4M
            pu1_src,
2186
28.4M
            pu1_pred,
2187
28.4M
            pi4_trans_scratch,
2188
28.4M
            pi2_trans_values,
2189
28.4M
            src_strd,
2190
28.4M
            pred_strd,
2191
28.4M
            trans_size,
2192
28.4M
            NULL_PLANE);
2193
2194
28.4M
        cbf = ps_ctxt->apf_quant_iquant_ssd
2195
28.4M
                  [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
2196
28.4M
                      pi2_trans_values,
2197
28.4M
                      ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
2198
28.4M
                      pi2_quant_coeffs,
2199
28.4M
                      pi2_deq_data,
2200
28.4M
                      trans_size,
2201
28.4M
                      ps_ctxt->i4_cu_qp_div6,
2202
28.4M
                      ps_ctxt->i4_cu_qp_mod6,
2203
28.4M
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2204
28.4M
                      ps_ctxt->i4_quant_rnd_factor[intra_flag],
2205
28.4M
                      ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2206
28.4M
                      ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2207
#else
2208
                      intra_flag ? ai4_quant_rounding_factors[0][0]
2209
                                 : ps_ctxt->i4_quant_rnd_factor[intra_flag],
2210
                      intra_flag ? ai4_quant_rounding_factors[1]
2211
                                 : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2212
                      intra_flag ? ai4_quant_rounding_factors[2]
2213
                                 : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2214
#endif
2215
28.4M
                      trans_size,
2216
28.4M
                      trans_size,
2217
28.4M
                      deq_data_strd,
2218
28.4M
                      pu1_csbf_buf,
2219
28.4M
                      csbf_strd,
2220
28.4M
                      pi4_zero_col,
2221
28.4M
                      pi4_zero_row,
2222
28.4M
                      ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
2223
28.4M
                      pi8_cost);
2224
2225
28.4M
        if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
2226
11.8M
        {
2227
11.8M
            pi8_cost[0] = UINT_MAX;
2228
11.8M
        }
2229
28.4M
    }
2230
2231
28.4M
    if(0 != cbf)
2232
9.55M
    {
2233
9.55M
        if(i4_perform_sbh || i4_perform_rdoq)
2234
6.99M
        {
2235
6.99M
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
2236
6.99M
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
2237
6.99M
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
2238
2239
6.99M
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
2240
6.99M
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
2241
6.99M
            ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
2242
6.99M
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2243
6.99M
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
2244
2245
6.99M
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
2246
6.99M
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
2247
6.99M
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
2248
6.99M
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
2249
6.99M
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
2250
6.99M
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
2251
2252
            /* ------- call coeffs scan function ------- */
2253
6.99M
            if((!i4_perform_rdoq))
2254
3.26M
            {
2255
3.26M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2256
2257
3.26M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2258
3.26M
            }
2259
6.99M
        }
2260
2261
9.55M
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2262
9.55M
            pi2_quant_coeffs,
2263
9.55M
            pi4_subBlock2csbfId_map,
2264
9.55M
            ps_ctxt->i4_scan_idx,
2265
9.55M
            trans_size,
2266
9.55M
            pu1_ecd_data,
2267
9.55M
            pu1_csbf_buf,
2268
9.55M
            csbf_strd);
2269
9.55M
    }
2270
28.4M
    *pi8_cost >>= ga_trans_shift[trans_idx];
2271
2272
28.4M
#if RDOPT_ZERO_CBF_ENABLE
2273
    /* compare null cbf cost with encode tu rd-cost */
2274
28.4M
    if(cbf != 0)
2275
9.55M
    {
2276
9.55M
        WORD32 tu_bits;
2277
9.55M
        LWORD64 tu_rd_cost;
2278
2279
9.55M
        LWORD64 zero_cbf_cost = 0;
2280
2281
        /*Populating the feilds of rdoq_ctxt structure*/
2282
9.55M
        if(i4_perform_rdoq)
2283
3.73M
        {
2284
            /* transform size to log2transform size */
2285
3.73M
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
2286
3.73M
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
2287
3.73M
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
2288
3.73M
            ps_rdoq_sbh_ctxt->i4_is_luma = 1;
2289
3.73M
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
2290
3.73M
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
2291
3.73M
                (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
2292
3.73M
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
2293
3.73M
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
2294
3.73M
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
2295
3.73M
        }
2296
5.82M
        else if(i4_perform_zcbf)
2297
1.02M
        {
2298
1.02M
            zero_cbf_cost =
2299
2300
1.02M
                ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
2301
1.02M
                    pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
2302
1.02M
        }
2303
2304
        /************************************************************************/
2305
        /* call the entropy rdo encode to get the bit estimate for current tu   */
2306
        /* note that tu includes only residual coding bits and does not include */
2307
        /* tu split, cbf and qp delta encoding bits for a TU                    */
2308
        /************************************************************************/
2309
9.55M
        if(i4_perform_rdoq)
2310
3.73M
        {
2311
3.73M
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
2312
3.73M
                &ps_ctxt->s_rdopt_entropy_ctxt,
2313
3.73M
                (pu1_ecd_data),
2314
3.73M
                trans_size,
2315
3.73M
                1,
2316
3.73M
                ps_rdoq_sbh_ctxt,
2317
3.73M
                pi8_cost,
2318
3.73M
                &zero_cbf_cost,
2319
3.73M
                0);
2320
2321
3.73M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
2322
122k
            {
2323
122k
                cbf = 0;
2324
122k
                *pi4_coeff_off = 0;
2325
122k
            }
2326
2327
3.73M
            if((i4_perform_sbh) && (0 != cbf))
2328
3.61M
            {
2329
3.61M
                ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2330
3.61M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2331
3.61M
                *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2332
3.61M
            }
2333
2334
            /*Add round value before normalizing*/
2335
3.73M
            *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
2336
3.73M
            *pi8_cost >>= ga_trans_shift[trans_idx];
2337
2338
3.73M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
2339
3.61M
            {
2340
3.61M
                pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2341
3.61M
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2342
3.61M
                    pi2_quant_coeffs,
2343
3.61M
                    pi4_subBlock2csbfId_map,
2344
3.61M
                    ps_ctxt->i4_scan_idx,
2345
3.61M
                    trans_size,
2346
3.61M
                    pu1_ecd_data,
2347
3.61M
                    pu1_csbf_buf,
2348
3.61M
                    csbf_strd);
2349
3.61M
            }
2350
3.73M
        }
2351
5.82M
        else
2352
5.82M
        {
2353
5.82M
            tu_bits = ihevce_entropy_rdo_encode_tu(
2354
5.82M
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
2355
5.82M
        }
2356
2357
9.55M
        *pi4_tu_bits = tu_bits;
2358
2359
9.55M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2360
2.49M
        {
2361
2.49M
            *pi8_cost = ihevce_it_recon_ssd(
2362
2.49M
                ps_ctxt,
2363
2.49M
                pu1_src,
2364
2.49M
                src_strd,
2365
2.49M
                pu1_pred,
2366
2.49M
                pred_strd,
2367
2.49M
                pi2_deq_data,
2368
2.49M
                deq_data_strd,
2369
2.49M
                pu1_recon,
2370
2.49M
                i4_recon_stride,
2371
2.49M
                pu1_ecd_data,
2372
2.49M
                trans_size,
2373
2.49M
                packed_pred_mode,
2374
2.49M
                cbf,
2375
2.49M
                *pi4_zero_col,
2376
2.49M
                *pi4_zero_row,
2377
2.49M
                NULL_PLANE);
2378
2379
2.49M
            pu1_is_recon_available[0] = 1;
2380
2.49M
        }
2381
2382
9.55M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2383
9.55M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2384
0
        {
2385
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2386
0
                pu1_src,
2387
0
                src_strd,
2388
0
                pu1_recon,
2389
0
                i4_recon_stride,
2390
0
                pi8_cost[0],
2391
0
                i4_alpha_stim_multiplier,
2392
0
                trans_size,
2393
0
                0,
2394
0
                ps_ctxt->u1_enable_psyRDOPT,
2395
0
                NULL_PLANE);
2396
0
        }
2397
9.55M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2398
0
        {
2399
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2400
0
                pu1_src,
2401
0
                src_strd,
2402
0
                pu1_pred,
2403
0
                pred_strd,
2404
0
                pi8_cost[0],
2405
0
                i4_alpha_stim_multiplier,
2406
0
                trans_size,
2407
0
                0,
2408
0
                ps_ctxt->u1_enable_psyRDOPT,
2409
0
                NULL_PLANE);
2410
0
        }
2411
9.55M
#endif
2412
2413
        /* add the SSD cost to bits estimate given by ECD */
2414
9.55M
        tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
2415
9.55M
                                     tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
2416
2417
9.55M
        if(i4_perform_zcbf)
2418
1.68M
        {
2419
1.68M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2420
1.68M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2421
0
            {
2422
0
                zero_cbf_cost = ihevce_inject_stim_into_distortion(
2423
0
                    pu1_src,
2424
0
                    src_strd,
2425
0
                    pu1_pred,
2426
0
                    pred_strd,
2427
0
                    zero_cbf_cost,
2428
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2429
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2430
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2431
0
                                                 100.0,
2432
0
                    trans_size,
2433
0
                    0,
2434
0
                    ps_ctxt->u1_enable_psyRDOPT,
2435
0
                    NULL_PLANE);
2436
0
            }
2437
1.68M
#endif
2438
2439
            /* force the tu as zero cbf if zero_cbf_cost is lower */
2440
1.68M
            if(zero_cbf_cost < tu_rd_cost)
2441
43.6k
            {
2442
                /* num bytes is set to 0 */
2443
43.6k
                *pi4_coeff_off = 0;
2444
2445
                /* cbf is returned as 0 */
2446
43.6k
                cbf = 0;
2447
2448
                /* cost is returned as 0 cbf cost */
2449
43.6k
                *pi8_cost = zero_cbf_cost;
2450
2451
                /* TU bits is set to 0 */
2452
43.6k
                *pi4_tu_bits = 0;
2453
43.6k
                pu1_is_recon_available[0] = 0;
2454
2455
43.6k
                if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2456
7.24k
                {
2457
                    /* copy pred to recon for zcbf mode */
2458
2459
7.24k
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2460
7.24k
                        pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2461
2462
7.24k
                    pu1_is_recon_available[0] = 1;
2463
7.24k
                }
2464
43.6k
            }
2465
            /* accumulate cu not coded cost with zcbf cost */
2466
1.68M
#if ENABLE_INTER_ZCU_COST
2467
1.68M
            ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
2468
1.68M
#endif
2469
1.68M
        }
2470
9.55M
    }
2471
18.9M
    else
2472
18.9M
    {
2473
        /* cbf = 0, accumulate cu not coded cost */
2474
18.9M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2475
9.40M
        {
2476
9.40M
            *pi8_cost = ihevce_it_recon_ssd(
2477
9.40M
                ps_ctxt,
2478
9.40M
                pu1_src,
2479
9.40M
                src_strd,
2480
9.40M
                pu1_pred,
2481
9.40M
                pred_strd,
2482
9.40M
                pi2_deq_data,
2483
9.40M
                deq_data_strd,
2484
9.40M
                pu1_recon,
2485
9.40M
                i4_recon_stride,
2486
9.40M
                pu1_ecd_data,
2487
9.40M
                trans_size,
2488
9.40M
                packed_pred_mode,
2489
9.40M
                cbf,
2490
9.40M
                *pi4_zero_col,
2491
9.40M
                *pi4_zero_row,
2492
9.40M
                NULL_PLANE);
2493
2494
9.40M
            pu1_is_recon_available[0] = 1;
2495
9.40M
        }
2496
2497
18.9M
#if ENABLE_INTER_ZCU_COST
2498
18.9M
        {
2499
18.9M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2500
18.9M
            if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2501
0
            {
2502
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2503
0
                    pu1_src,
2504
0
                    src_strd,
2505
0
                    pu1_recon,
2506
0
                    i4_recon_stride,
2507
0
                    pi8_cost[0],
2508
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2509
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2510
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2511
0
                                                 100.0,
2512
0
                    trans_size,
2513
0
                    0,
2514
0
                    ps_ctxt->u1_enable_psyRDOPT,
2515
0
                    NULL_PLANE);
2516
0
            }
2517
18.9M
            else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2518
0
            {
2519
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2520
0
                    pu1_src,
2521
0
                    src_strd,
2522
0
                    pu1_pred,
2523
0
                    pred_strd,
2524
0
                    pi8_cost[0],
2525
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2526
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2527
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2528
0
                                                 100.0,
2529
0
                    trans_size,
2530
0
                    0,
2531
0
                    ps_ctxt->u1_enable_psyRDOPT,
2532
0
                    NULL_PLANE);
2533
0
            }
2534
18.9M
#endif
2535
2536
18.9M
            ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
2537
18.9M
        }
2538
18.9M
#endif /* ENABLE_INTER_ZCU_COST */
2539
18.9M
    }
2540
28.4M
#endif
2541
2542
28.4M
    return (cbf);
2543
28.4M
}
2544
2545
/*!
2546
******************************************************************************
2547
* \if Function name : ihevce_it_recon_fxn \endif
2548
*
2549
* \brief
2550
*    Transform unit level (Luma) IT Recon function
2551
*
2552
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2553
* \param[in] pi2_deq_data   pointer to iq data
2554
* \param[in] deq_data_strd  iq data buffer stride
2555
* \param[in] pu1_pred       pointer to predicted data buffer
2556
* \param[in] pred_strd      predicted buffer stride
2557
* \param[in] pu1_recon      pointer to recon buffer
2558
* \param[in] recon_strd     recon buffer stride
2559
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2560
* \param[in] trans_size     transform size (4, 8, 16,32)
2561
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
2562
* \param[in] cbf            CBF of the current block
2563
* \param[in] zero_cols      zero_cols of the current block
2564
* \param[in] zero_rows      zero_rows of the current block
2565
*
2566
* \return
2567
*
2568
* \author
2569
*  Ittiam
2570
*
2571
*****************************************************************************
2572
*/
2573
2574
void ihevce_it_recon_fxn(
2575
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2576
    WORD16 *pi2_deq_data,
2577
    WORD32 deq_dat_strd,
2578
    UWORD8 *pu1_pred,
2579
    WORD32 pred_strd,
2580
    UWORD8 *pu1_recon,
2581
    WORD32 recon_strd,
2582
    UWORD8 *pu1_ecd_data,
2583
    WORD32 trans_size,
2584
    WORD32 packed_pred_mode,
2585
    WORD32 cbf,
2586
    WORD32 zero_cols,
2587
    WORD32 zero_rows)
2588
18.8M
{
2589
18.8M
    WORD32 dc_add_flag = 0;
2590
18.8M
    WORD32 trans_idx;
2591
2592
    /* translate the transform size to index for 4x4 and 8x8 */
2593
18.8M
    trans_idx = trans_size >> 2;
2594
2595
    /* if SKIP mode needs to be evaluated the pred is copied to recon */
2596
18.8M
    if(PRED_MODE_SKIP == packed_pred_mode)
2597
196k
    {
2598
196k
        UWORD8 *pu1_curr_recon, *pu1_curr_pred;
2599
2600
196k
        pu1_curr_pred = pu1_pred;
2601
196k
        pu1_curr_recon = pu1_recon;
2602
2603
        /* 2D copy of data */
2604
2605
196k
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2606
196k
            pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
2607
2608
196k
        return;
2609
196k
    }
2610
2611
    /* for intra 4x4 DST transform should be used */
2612
18.6M
    if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
2613
6.14M
    {
2614
6.14M
        trans_idx = 0;
2615
6.14M
    }
2616
    /* for 16x16 cases */
2617
12.4M
    else if(16 == trans_size)
2618
4.38M
    {
2619
4.38M
        trans_idx = 3;
2620
4.38M
    }
2621
    /* for 32x32 cases */
2622
8.09M
    else if(32 == trans_size)
2623
1.73M
    {
2624
1.73M
        trans_idx = 4;
2625
1.73M
    }
2626
2627
    /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
2628
18.6M
    if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2629
3.85M
    {
2630
3.85M
        dc_add_flag = 1;
2631
3.85M
    }
2632
2633
18.6M
    if(0 == cbf)
2634
14.1M
    {
2635
        /* buffer copy */
2636
14.1M
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2637
14.1M
            pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
2638
14.1M
    }
2639
4.50M
    else if((1 == dc_add_flag) && (0 != trans_idx))
2640
125k
    {
2641
        /* dc add */
2642
125k
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2643
125k
            pu1_pred,
2644
125k
            pred_strd,
2645
125k
            pu1_recon,
2646
125k
            recon_strd,
2647
125k
            trans_size,
2648
125k
            pi2_deq_data[0],
2649
125k
            NULL_PLANE /* luma */
2650
125k
        );
2651
125k
    }
2652
4.38M
    else
2653
4.38M
    {
2654
4.38M
        ps_ctxt->apf_it_recon[trans_idx](
2655
4.38M
            pi2_deq_data,
2656
4.38M
            &ps_ctxt->ai2_scratch[0],
2657
4.38M
            pu1_pred,
2658
4.38M
            pu1_recon,
2659
4.38M
            deq_dat_strd,
2660
4.38M
            pred_strd,
2661
4.38M
            recon_strd,
2662
4.38M
            zero_cols,
2663
4.38M
            zero_rows);
2664
4.38M
    }
2665
18.6M
}
2666
2667
/*!
2668
******************************************************************************
2669
* \if Function name : ihevce_chroma_it_recon_fxn \endif
2670
*
2671
* \brief
2672
*    Transform unit level (Chroma) IT Recon function
2673
*
2674
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2675
* \param[in] pi2_deq_data   pointer to iq data
2676
* \param[in] deq_data_strd  iq data buffer stride
2677
* \param[in] pu1_pred       pointer to predicted data buffer
2678
* \param[in] pred_strd      predicted buffer stride
2679
* \param[in] pu1_recon      pointer to recon buffer
2680
* \param[in] recon_strd     recon buffer stride
2681
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2682
* \param[in] trans_size     transform size (4, 8, 16)
2683
* \param[in] cbf            CBF of the current block
2684
* \param[in] zero_cols      zero_cols of the current block
2685
* \param[in] zero_rows      zero_rows of the current block
2686
*
2687
* \return
2688
*
2689
* \author
2690
*  Ittiam
2691
*
2692
*****************************************************************************
2693
*/
2694
2695
void ihevce_chroma_it_recon_fxn(
2696
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2697
    WORD16 *pi2_deq_data,
2698
    WORD32 deq_dat_strd,
2699
    UWORD8 *pu1_pred,
2700
    WORD32 pred_strd,
2701
    UWORD8 *pu1_recon,
2702
    WORD32 recon_strd,
2703
    UWORD8 *pu1_ecd_data,
2704
    WORD32 trans_size,
2705
    WORD32 cbf,
2706
    WORD32 zero_cols,
2707
    WORD32 zero_rows,
2708
    CHROMA_PLANE_ID_T e_chroma_plane)
2709
24.2M
{
2710
24.2M
    WORD32 trans_idx;
2711
2712
24.2M
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
2713
2714
    /* since 2x2 transform is not allowed for chroma*/
2715
24.2M
    if(2 == trans_size)
2716
0
    {
2717
0
        trans_size = 4;
2718
0
    }
2719
2720
    /* translate the transform size to index */
2721
24.2M
    trans_idx = trans_size >> 2;
2722
2723
    /* for 16x16 cases */
2724
24.2M
    if(16 == trans_size)
2725
3.78M
    {
2726
3.78M
        trans_idx = 3;
2727
3.78M
    }
2728
2729
24.2M
    if(0 == cbf)
2730
21.5M
    {
2731
        /* buffer copy */
2732
21.5M
        ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
2733
21.5M
            pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
2734
21.5M
    }
2735
2.70M
    else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2736
233k
    {
2737
        /* dc add */
2738
233k
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2739
233k
            pu1_pred,
2740
233k
            pred_strd,
2741
233k
            pu1_recon,
2742
233k
            recon_strd,
2743
233k
            trans_size,
2744
233k
            pi2_deq_data[0],
2745
233k
            e_chroma_plane /* chroma plane */
2746
233k
        );
2747
233k
    }
2748
2.46M
    else
2749
2.46M
    {
2750
2.46M
        ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
2751
2.46M
            pi2_deq_data,
2752
2.46M
            &ps_ctxt->ai2_scratch[0],
2753
2.46M
            pu1_pred + (WORD32)e_chroma_plane,
2754
2.46M
            pu1_recon + (WORD32)e_chroma_plane,
2755
2.46M
            deq_dat_strd,
2756
2.46M
            pred_strd,
2757
2.46M
            recon_strd,
2758
2.46M
            zero_cols,
2759
2.46M
            zero_rows);
2760
2.46M
    }
2761
24.2M
}
2762
2763
/**
2764
*******************************************************************************
2765
* \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
2766
*
2767
* \brief * Filters the RDOPT candidates based on mpm_idx
2768
*
2769
* \par   Description
2770
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
2771
* for a CU
2772
*
2773
* \param[in] ps_ctxt : ptr to enc loop context
2774
* \param[in] ps_cu_analyse : ptr to CU analyse structure
2775
* \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
2776
* \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
2777
* \param[in] pu1_luma_mode luma mode
2778
*
2779
* \returns none
2780
*
2781
* \author
2782
*  Ittiam
2783
*
2784
*******************************************************************************
2785
*/
2786
2787
void ihevce_mpm_idx_based_filter_RDOPT_cand(
2788
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2789
    cu_analyse_t *ps_cu_analyse,
2790
    nbr_4x4_t *ps_left_nbr_4x4,
2791
    nbr_4x4_t *ps_top_nbr_4x4,
2792
    UWORD8 *pu1_luma_mode,
2793
    UWORD8 *pu1_eval_mark)
2794
332k
{
2795
332k
    WORD32 cu_pos_x;
2796
332k
    WORD32 cu_pos_y;
2797
332k
    nbr_avail_flags_t s_nbr;
2798
332k
    WORD32 trans_size;
2799
332k
    WORD32 au4_cand_mode_list[3];
2800
332k
    WORD32 nbr_flags;
2801
332k
    UWORD8 *pu1_intra_luma_modes;
2802
332k
    WORD32 rdopt_cand_ctr = 0;
2803
332k
    UWORD8 *pu1_luma_eval_mark;
2804
2805
332k
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
2806
332k
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
2807
332k
    trans_size = ps_cu_analyse->u1_cu_size;
2808
2809
    /* get the neighbour availability flags */
2810
332k
    nbr_flags = ihevce_get_nbr_intra(
2811
332k
        &s_nbr,
2812
332k
        ps_ctxt->pu1_ctb_nbr_map,
2813
332k
        ps_ctxt->i4_nbr_map_strd,
2814
332k
        cu_pos_x,
2815
332k
        cu_pos_y,
2816
332k
        trans_size >> 2);
2817
332k
    (void)nbr_flags;
2818
    /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
2819
    *TU=CU/2 also since the modes are same in both the cases.
2820
    */
2821
332k
    ihevce_populate_intra_pred_mode(
2822
332k
        ps_top_nbr_4x4->b6_luma_intra_mode,
2823
332k
        ps_left_nbr_4x4->b6_luma_intra_mode,
2824
332k
        s_nbr.u1_top_avail,
2825
332k
        s_nbr.u1_left_avail,
2826
332k
        cu_pos_y,
2827
332k
        &au4_cand_mode_list[0]);
2828
2829
    /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
2830
    *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
2831
    */
2832
2833
332k
    pu1_intra_luma_modes = pu1_luma_mode;
2834
332k
    pu1_luma_eval_mark = pu1_eval_mark;
2835
2836
1.16M
    while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
2837
834k
    {
2838
834k
        WORD32 i;
2839
834k
        WORD32 found_flag = 0;
2840
2841
        /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
2842
        *irrespective of whether the cand is present in the mpm idx list or not
2843
        */
2844
834k
        if(rdopt_cand_ctr == 0)
2845
294k
        {
2846
294k
            rdopt_cand_ctr++;
2847
294k
            continue;
2848
294k
        }
2849
2850
1.63M
        for(i = 0; i < 3; i++)
2851
1.35M
        {
2852
1.35M
            if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
2853
266k
            {
2854
266k
                found_flag = 1;
2855
266k
                break;
2856
266k
            }
2857
1.35M
        }
2858
2859
539k
        if(found_flag == 0)
2860
273k
        {
2861
273k
            pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
2862
273k
        }
2863
2864
539k
        rdopt_cand_ctr++;
2865
539k
    }
2866
332k
}
2867
2868
/*!
2869
******************************************************************************
2870
* \if Function name : ihevce_intra_rdopt_cu_ntu \endif
2871
*
2872
* \brief
2873
*    Intra Coding unit funtion for RD opt mode
2874
*
2875
* \param[in] ps_ctxt    enc_loop module ctxt pointer
2876
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
2877
* \param[in] pu1_luma_mode : pointer to luma mode
2878
* \param[in] ps_cu_analyse  pointer to cu analyse pointer
2879
* \param[in] pu1_src    pointer to source data buffer
2880
* \param[in] src_strd   source buffer stride
2881
* \param[in] pu1_cu_left pointer to left recon data buffer
2882
* \param[in] pu1_cu_top  pointer to top recon data buffer
2883
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
2884
* \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
2885
* \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
2886
* \param[in] nbr_4x4_left_strd left nbr4x4 stride
2887
* \param[in] cu_left_stride left recon buffer stride
2888
* \param[in] curr_buf_idx RD opt buffer index for current usage
2889
* \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
2890
*
2891
* \return
2892
*    RDopt cost
2893
*
2894
* \author
2895
*  Ittiam
2896
*
2897
*****************************************************************************
2898
*/
2899
LWORD64 ihevce_intra_rdopt_cu_ntu(
2900
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2901
    enc_loop_cu_prms_t *ps_cu_prms,
2902
    void *pv_pred_org,
2903
    WORD32 pred_strd_org,
2904
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
2905
    UWORD8 *pu1_luma_mode,
2906
    cu_analyse_t *ps_cu_analyse,
2907
    void *pv_curr_src,
2908
    void *pv_cu_left,
2909
    void *pv_cu_top,
2910
    void *pv_cu_top_left,
2911
    nbr_4x4_t *ps_left_nbr_4x4,
2912
    nbr_4x4_t *ps_top_nbr_4x4,
2913
    WORD32 nbr_4x4_left_strd,
2914
    WORD32 cu_left_stride,
2915
    WORD32 curr_buf_idx,
2916
    WORD32 func_proc_mode,
2917
    WORD32 i4_alpha_stim_multiplier)
2918
8.32M
{
2919
8.32M
    enc_loop_cu_final_prms_t *ps_final_prms;
2920
8.32M
    nbr_avail_flags_t s_nbr;
2921
8.32M
    nbr_4x4_t *ps_nbr_4x4;
2922
8.32M
    nbr_4x4_t *ps_tmp_lt_4x4;
2923
8.32M
    recon_datastore_t *ps_recon_datastore;
2924
2925
8.32M
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
2926
2927
8.32M
    UWORD32 *pu4_nbr_flags;
2928
8.32M
    UWORD8 *pu1_intra_pred_mode;
2929
8.32M
    WORD32 cu_pos_x;
2930
8.32M
    WORD32 cu_pos_y;
2931
8.32M
    WORD32 trans_size = 0;
2932
8.32M
    UWORD8 *pu1_left;
2933
8.32M
    UWORD8 *pu1_top;
2934
8.32M
    UWORD8 *pu1_top_left;
2935
8.32M
    UWORD8 *pu1_recon;
2936
8.32M
    UWORD8 *pu1_csbf_buf;
2937
8.32M
    UWORD8 *pu1_ecd_data;
2938
8.32M
    WORD16 *pi2_deq_data;
2939
8.32M
    WORD32 deq_data_strd;
2940
8.32M
    LWORD64 total_rdopt_cost;
2941
8.32M
    WORD32 ctr;
2942
8.32M
    WORD32 left_strd;
2943
8.32M
    WORD32 i4_recon_stride;
2944
8.32M
    WORD32 csbf_strd;
2945
8.32M
    WORD32 ecd_data_bytes_cons;
2946
8.32M
    WORD32 num_4x4_in_tu;
2947
8.32M
    WORD32 num_4x4_in_cu;
2948
8.32M
    WORD32 chrm_present_flag;
2949
8.32M
    WORD32 tx_size;
2950
8.32M
    WORD32 cu_bits;
2951
8.32M
    WORD32 num_cu_parts = 0;
2952
8.32M
    WORD32 num_cands = 0;
2953
8.32M
    WORD32 cu_pos_x_8pelunits;
2954
8.32M
    WORD32 cu_pos_y_8pelunits;
2955
8.32M
    WORD32 i4_perform_rdoq;
2956
8.32M
    WORD32 i4_perform_sbh;
2957
8.32M
    UWORD8 u1_compute_spatial_ssd;
2958
8.32M
    UWORD8 u1_compute_recon;
2959
8.32M
    UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
2960
2961
8.32M
    UWORD16 u2_num_tus_in_cu = 0;
2962
8.32M
    WORD32 is_sub_pu_in_hq = 0;
2963
    /* Get the RDOPT cost of the best CU mode for early_exit */
2964
8.32M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
2965
    /* cabac context of prev intra luma pred flag */
2966
8.32M
    UWORD8 u1_prev_flag_cabac_ctxt =
2967
8.32M
        ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
2968
8.32M
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
2969
2970
8.32M
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
2971
2972
8.32M
    total_rdopt_cost = 0;
2973
8.32M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
2974
8.32M
    ps_recon_datastore = &ps_final_prms->s_recon_datastore;
2975
8.32M
    i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
2976
8.32M
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
2977
8.32M
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
2978
8.32M
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
2979
8.32M
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
2980
8.32M
    deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
2981
8.32M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
2982
8.32M
    ps_tmp_lt_4x4 = ps_left_nbr_4x4;
2983
8.32M
    pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
2984
8.32M
    pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
2985
8.32M
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
2986
8.32M
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
2987
8.32M
    cu_pos_x_8pelunits = cu_pos_x;
2988
8.32M
    cu_pos_y_8pelunits = cu_pos_y;
2989
2990
    /* reset cu not coded cost */
2991
8.32M
    ps_ctxt->i8_cu_not_coded_cost = 0;
2992
2993
    /* based on the Processng mode */
2994
8.32M
    if(TU_EQ_CU == func_proc_mode)
2995
5.67M
    {
2996
5.67M
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
2997
5.67M
        trans_size = ps_cu_analyse->u1_cu_size;
2998
5.67M
        num_cu_parts = 1;
2999
5.67M
        num_cands = 1;
3000
5.67M
        u2_num_tus_in_cu = 1;
3001
5.67M
    }
3002
2.65M
    else if(TU_EQ_CU_DIV2 == func_proc_mode)
3003
2.17M
    {
3004
2.17M
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
3005
2.17M
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3006
2.17M
        num_cu_parts = 4;
3007
2.17M
        num_cands = 1;
3008
2.17M
        u2_num_tus_in_cu = 4;
3009
2.17M
    }
3010
473k
    else if(TU_EQ_SUBCU == func_proc_mode)
3011
473k
    {
3012
473k
        ps_final_prms->u1_part_mode = SIZE_NxN;
3013
473k
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3014
473k
        num_cu_parts = 4;
3015
        /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
3016
473k
        if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
3017
302k
        {
3018
302k
            if(ps_ctxt->i1_slice_type != BSLICE)
3019
278k
            {
3020
278k
                num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
3021
278k
            }
3022
23.8k
            else
3023
23.8k
            {
3024
23.8k
                num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
3025
23.8k
            }
3026
302k
        }
3027
171k
        else
3028
171k
        {
3029
171k
            num_cands = MAX_INTRA_CU_CANDIDATES;
3030
171k
        }
3031
473k
        u2_num_tus_in_cu = 4;
3032
473k
    }
3033
0
    else
3034
0
    {
3035
        /* should not enter here */
3036
0
        ASSERT(0);
3037
0
    }
3038
3039
8.32M
    if(ps_ctxt->i1_cu_qp_delta_enable)
3040
3.53M
    {
3041
3.53M
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1);
3042
3.53M
    }
3043
3044
8.32M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
3045
0
    {
3046
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
3047
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
3048
0
             100.0f);
3049
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
3050
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
3051
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
3052
0
    }
3053
3054
8.32M
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
3055
5.05M
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3056
3.83M
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3057
3058
8.32M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
3059
0
    {
3060
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
3061
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3062
0
    }
3063
3064
    /* populate the neigbours */
3065
8.32M
    pu1_left = (UWORD8 *)pv_cu_left;
3066
8.32M
    pu1_top = (UWORD8 *)pv_cu_top;
3067
8.32M
    pu1_top_left = (UWORD8 *)pv_cu_top_left;
3068
8.32M
    left_strd = cu_left_stride;
3069
8.32M
    num_4x4_in_tu = (trans_size >> 2);
3070
8.32M
    num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
3071
8.32M
    chrm_present_flag = 1;
3072
8.32M
    ecd_data_bytes_cons = 0;
3073
8.32M
    cu_bits = 0;
3074
3075
    /* get the 4x4 level postion of current cu */
3076
8.32M
    cu_pos_x = cu_pos_x << 1;
3077
8.32M
    cu_pos_y = cu_pos_y << 1;
3078
3079
    /* pouplate cu level params knowing that current is intra */
3080
8.32M
    ps_final_prms->u1_skip_flag = 0;
3081
8.32M
    ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
3082
8.32M
    ps_final_prms->u2_num_pus_in_cu = 1;
3083
    /*init the is_cu_coded flag*/
3084
8.32M
    ps_final_prms->u1_is_cu_coded = 0;
3085
8.32M
    ps_final_prms->u4_cu_sad = 0;
3086
3087
8.32M
    ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
3088
8.32M
    ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
3089
8.32M
    ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
3090
8.32M
    ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
3091
8.32M
    ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
3092
8.32M
    ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
3093
3094
8.32M
    ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
3095
3096
    /*copy qp directly as intra cant be skip*/
3097
8.32M
    ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
3098
8.32M
    ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
3099
8.32M
    ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
3100
8.32M
    ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
3101
8.32M
    ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
3102
8.32M
    ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
3103
8.32M
    ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
3104
8.32M
    ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
3105
8.32M
    ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
3106
3107
    /* RDOPT copy States :  TU init (best until prev TU) to current */
3108
8.32M
    memcpy(
3109
8.32M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3110
8.32M
             .s_cabac_ctxt.au1_ctxt_models[0],
3111
8.32M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3112
8.32M
        IHEVC_CAB_COEFFX_PREFIX);
3113
3114
    /* RDOPT copy States :update to init state if 0 cbf */
3115
8.32M
    memcpy(
3116
8.32M
        &au1_intra_nxn_rdopt_ctxt_models[0][0],
3117
8.32M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3118
8.32M
        IHEVC_CAB_COEFFX_PREFIX);
3119
8.32M
    memcpy(
3120
8.32M
        &au1_intra_nxn_rdopt_ctxt_models[1][0],
3121
8.32M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3122
8.32M
        IHEVC_CAB_COEFFX_PREFIX);
3123
3124
    /* loop for all partitions in CU  blocks */
3125
23.4M
    for(ctr = 0; ctr < num_cu_parts; ctr++)
3126
15.9M
    {
3127
15.9M
        UWORD8 *pu1_curr_mode;
3128
15.9M
        WORD32 cand_ctr;
3129
15.9M
        WORD32 nbr_flags;
3130
3131
        /* for NxN case to track the best mode       */
3132
        /* for other cases zeroth index will be used */
3133
15.9M
        intra_prev_rem_flags_t as_intra_prev_rem[2];
3134
15.9M
        LWORD64 ai8_cand_rdopt_cost[2];
3135
15.9M
        UWORD32 au4_tu_sad[2];
3136
15.9M
        WORD32 ai4_tu_bits[2];
3137
15.9M
        WORD32 ai4_cbf[2];
3138
15.9M
        WORD32 ai4_curr_bytes[2];
3139
15.9M
        WORD32 ai4_zero_col[2];
3140
15.9M
        WORD32 ai4_zero_row[2];
3141
        /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
3142
        cand. are there) ping-pong buffer to store the best and current */
3143
15.9M
        UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
3144
15.9M
        UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
3145
15.9M
        WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
3146
        /* Context models stored for RDopt store and restore purpose */
3147
3148
15.9M
        UWORD8 au1_recon_availability[2];
3149
3150
15.9M
        WORD32 best_cand_idx = 0;
3151
15.9M
        LWORD64 best_cand_cost = MAX_COST_64;
3152
        /* counters to toggle b/w best and current */
3153
15.9M
        WORD32 best_intra_buf_idx = 1;
3154
15.9M
        WORD32 curr_intra_buf_idx = 0;
3155
3156
        /* copy the mode pointer to be used in inner loop */
3157
15.9M
        pu1_curr_mode = pu1_luma_mode;
3158
3159
        /* get the neighbour availability flags */
3160
15.9M
        nbr_flags = ihevce_get_nbr_intra(
3161
15.9M
            &s_nbr,
3162
15.9M
            ps_ctxt->pu1_ctb_nbr_map,
3163
15.9M
            ps_ctxt->i4_nbr_map_strd,
3164
15.9M
            cu_pos_x,
3165
15.9M
            cu_pos_y,
3166
15.9M
            num_4x4_in_tu);
3167
3168
        /* copy the nbr flags for chroma reuse */
3169
15.9M
        if(4 != trans_size)
3170
12.1M
        {
3171
12.1M
            *pu4_nbr_flags = nbr_flags;
3172
12.1M
        }
3173
3.81M
        else if(1 == chrm_present_flag)
3174
975k
        {
3175
            /* compute the avail flags assuming luma trans is 8x8 */
3176
            /* get the neighbour availability flags */
3177
975k
            *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
3178
975k
                ps_ctxt->pu1_ctb_nbr_map,
3179
975k
                ps_ctxt->i4_nbr_map_strd,
3180
975k
                cu_pos_x,
3181
975k
                cu_pos_y,
3182
975k
                (num_4x4_in_tu << 1),
3183
975k
                (num_4x4_in_tu << 1));
3184
975k
        }
3185
3186
15.9M
        u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
3187
3188
15.9M
        if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
3189
5.53M
        {
3190
5.53M
            ps_recon_datastore->u1_is_lumaRecon_available = 1;
3191
5.53M
        }
3192
10.4M
        else if(!ctr)
3193
2.78M
        {
3194
2.78M
            ps_recon_datastore->u1_is_lumaRecon_available = 0;
3195
2.78M
        }
3196
3197
15.9M
        ihevc_intra_pred_luma_ref_substitution_fptr =
3198
15.9M
            ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3199
3200
        /* call reference array substitution */
3201
15.9M
        ihevc_intra_pred_luma_ref_substitution_fptr(
3202
15.9M
            pu1_top_left,
3203
15.9M
            pu1_top,
3204
15.9M
            pu1_left,
3205
15.9M
            left_strd,
3206
15.9M
            trans_size,
3207
15.9M
            nbr_flags,
3208
15.9M
            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3209
15.9M
            1);
3210
3211
        /* Intra Mode gating based on MPM cand list and encoder quality preset */
3212
15.9M
        if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
3213
650k
           (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
3214
131k
        {
3215
131k
            ihevce_mpm_idx_based_filter_RDOPT_cand(
3216
131k
                ps_ctxt,
3217
131k
                ps_cu_analyse,
3218
131k
                ps_left_nbr_4x4,
3219
131k
                ps_top_nbr_4x4,
3220
131k
                pu1_luma_mode,
3221
131k
                &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
3222
131k
        }
3223
3224
15.9M
        if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3225
1.19M
           (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
3226
1.19M
        {
3227
1.19M
            WORD32 ai4_mpm_mode_list[3];
3228
1.19M
            WORD32 i;
3229
3230
1.19M
            WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
3231
3232
1.19M
            ihevce_populate_intra_pred_mode(
3233
1.19M
                ps_top_nbr_4x4->b6_luma_intra_mode,
3234
1.19M
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3235
1.19M
                s_nbr.u1_top_avail,
3236
1.19M
                s_nbr.u1_left_avail,
3237
1.19M
                cu_pos_y,
3238
1.19M
                &ai4_mpm_mode_list[0]);
3239
3240
4.77M
            for(i = 0; i < 3; i++)
3241
3.57M
            {
3242
3.57M
                if(ps_cu_analyse->s_cu_intra_cand
3243
3.57M
                       .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
3244
642k
                {
3245
642k
                    ASSERT(ai4_mpm_mode_list[i] < 35);
3246
3247
642k
                    ps_cu_analyse->s_cu_intra_cand
3248
642k
                        .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
3249
642k
                    pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
3250
642k
                    ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
3251
642k
                    i4_curr_index++;
3252
642k
                }
3253
3.57M
            }
3254
3255
1.19M
            pu1_luma_mode[i4_curr_index] = 255;
3256
1.19M
        }
3257
3258
        /* loop over candidates for each partition */
3259
39.7M
        for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
3260
25.1M
        {
3261
25.1M
            WORD32 curr_pred_mode;
3262
25.1M
            WORD32 bits = 0;
3263
25.1M
            LWORD64 curr_cost;
3264
25.1M
            WORD32 luma_pred_func_idx;
3265
25.1M
            UWORD8 *pu1_curr_ecd_data;
3266
25.1M
            WORD16 *pi2_curr_deq_data;
3267
25.1M
            WORD32 curr_deq_data_strd;
3268
25.1M
            WORD32 pred_strd;
3269
25.1M
            UWORD8 *pu1_pred;
3270
3271
            /* if NXN case the recon and ecd data is stored in temp buffers */
3272
25.1M
            if(TU_EQ_SUBCU == func_proc_mode)
3273
11.0M
            {
3274
11.0M
                pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
3275
11.0M
                pred_strd = trans_size;
3276
11.0M
                pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
3277
11.0M
                pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
3278
11.0M
                curr_deq_data_strd = trans_size;
3279
3280
11.0M
                ASSERT(trans_size == MIN_TU_SIZE);
3281
11.0M
            }
3282
14.0M
            else
3283
14.0M
            {
3284
14.0M
                pu1_pred = (UWORD8 *)pv_pred_org;
3285
14.0M
                pred_strd = pred_strd_org;
3286
14.0M
                pu1_curr_ecd_data = pu1_ecd_data;
3287
14.0M
                pi2_curr_deq_data = pi2_deq_data;
3288
14.0M
                curr_deq_data_strd = deq_data_strd;
3289
14.0M
            }
3290
3291
25.1M
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
3292
25.1M
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3293
3294
25.1M
            if(is_sub_pu_in_hq == 1)
3295
0
            {
3296
0
                curr_pred_mode = cand_ctr;
3297
0
            }
3298
25.1M
            else
3299
25.1M
            {
3300
25.1M
                curr_pred_mode = pu1_curr_mode[cand_ctr];
3301
25.1M
            }
3302
3303
            /* If the candidate mode is 255, then break */
3304
25.1M
            if(255 == curr_pred_mode)
3305
1.36M
            {
3306
1.36M
                break;
3307
1.36M
            }
3308
23.7M
            else if(250 == curr_pred_mode)
3309
0
            {
3310
0
                continue;
3311
0
            }
3312
3313
            /* check if this mode needs to be evaluated or not. For 2nx2n cases, this   */
3314
            /* function will be called once per candidate, so this check has been done  */
3315
            /* outside this function call. For NxN case, this function will be called   */
3316
            /* only once, and all the candidates will be evaluated here.                */
3317
23.7M
            if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
3318
6.62M
            {
3319
6.62M
                if((TU_EQ_SUBCU == func_proc_mode) &&
3320
1.85M
                   (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
3321
142k
                {
3322
142k
                    continue;
3323
142k
                }
3324
6.62M
            }
3325
3326
            /* call reference filtering */
3327
23.6M
            ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
3328
23.6M
                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3329
23.6M
                trans_size,
3330
23.6M
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3331
23.6M
                curr_pred_mode,
3332
23.6M
                ps_ctxt->i1_strong_intra_smoothing_enable_flag);
3333
3334
            /* use the look up to get the function idx */
3335
23.6M
            luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
3336
3337
            /* call the intra prediction function */
3338
23.6M
            ps_ctxt->apf_lum_ip[luma_pred_func_idx](
3339
23.6M
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3340
23.6M
                1,
3341
23.6M
                pu1_pred,
3342
23.6M
                pred_strd,
3343
23.6M
                trans_size,
3344
23.6M
                curr_pred_mode);
3345
3346
            /* populate the coeffs scan idx */
3347
23.6M
            ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
3348
3349
            /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
3350
23.6M
            if(trans_size < 16)
3351
17.4M
            {
3352
                /* for modes from 22 upto 30 horizontal scan is used */
3353
17.4M
                if((curr_pred_mode > 21) && (curr_pred_mode < 31))
3354
4.68M
                {
3355
4.68M
                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
3356
4.68M
                }
3357
                /* for modes from 6 upto 14 horizontal scan is used */
3358
12.7M
                else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
3359
3.84M
                {
3360
3.84M
                    ps_ctxt->i4_scan_idx = SCAN_VERT;
3361
3.84M
                }
3362
17.4M
            }
3363
3364
            /* RDOPT copy States :  TU init (best until prev TU) to current */
3365
23.6M
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3366
23.6M
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3367
23.6M
                        .s_cabac_ctxt.au1_ctxt_models[0] +
3368
23.6M
                    IHEVC_CAB_COEFFX_PREFIX,
3369
23.6M
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3370
23.6M
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3371
3372
23.6M
            i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
3373
23.6M
            i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
3374
3375
#if DISABLE_RDOQ_INTRA
3376
            i4_perform_rdoq = 0;
3377
#endif
3378
3379
            /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
3380
            /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
3381
            /* Currently the complete array will contain only single value*/
3382
            /*The rounding factor is calculated with the formula
3383
            Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
3384
            rounding factor = (1 - DeadZone Val)
3385
3386
            Assumption: Cabac states of All the sub-blocks in the TU are considered independent
3387
            */
3388
23.6M
            if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
3389
17.1M
            {
3390
17.1M
                if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
3391
0
                {
3392
0
                    double i4_lamda_modifier;
3393
3394
0
                    if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
3395
0
                    {
3396
0
                        i4_lamda_modifier =
3397
0
                            ps_ctxt->i4_lamda_modifier *
3398
0
                            CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3399
0
                    }
3400
0
                    else
3401
0
                    {
3402
0
                        i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
3403
0
                    }
3404
0
                    if(ps_ctxt->i4_use_const_lamda_modifier)
3405
0
                    {
3406
0
                        if(ISLICE == ps_ctxt->i1_slice_type)
3407
0
                        {
3408
0
                            i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3409
0
                        }
3410
0
                        else
3411
0
                        {
3412
0
                            i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
3413
0
                        }
3414
0
                    }
3415
3416
0
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3417
0
                        &ps_ctxt->i4_quant_round_tu[0][0];
3418
0
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3419
0
                        &ps_ctxt->i4_quant_round_tu[1][0];
3420
3421
0
                    memset(
3422
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3423
0
                        0,
3424
0
                        trans_size * trans_size * sizeof(WORD32));
3425
0
                    memset(
3426
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3427
0
                        0,
3428
0
                        trans_size * trans_size * sizeof(WORD32));
3429
3430
0
                    ihevce_quant_rounding_factor_gen(
3431
0
                        trans_size,
3432
0
                        1,
3433
0
                        &ps_ctxt->s_rdopt_entropy_ctxt,
3434
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3435
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3436
0
                        i4_lamda_modifier,
3437
0
                        1);
3438
0
                }
3439
17.1M
                else
3440
17.1M
                {
3441
17.1M
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3442
17.1M
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
3443
17.1M
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3444
17.1M
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
3445
17.1M
                }
3446
17.1M
            }
3447
3448
            /* call T Q IT IQ and recon function */
3449
23.6M
            ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
3450
23.6M
                ps_ctxt,
3451
23.6M
                pu1_pred,
3452
23.6M
                pred_strd,
3453
23.6M
                (UWORD8 *)pv_curr_src,
3454
23.6M
                src_strd,
3455
23.6M
                pi2_curr_deq_data,
3456
23.6M
                curr_deq_data_strd,
3457
23.6M
                pu1_recon,
3458
23.6M
                i4_recon_stride,
3459
23.6M
                pu1_curr_ecd_data,
3460
23.6M
                pu1_csbf_buf,
3461
23.6M
                csbf_strd,
3462
23.6M
                trans_size,
3463
23.6M
                PRED_MODE_INTRA,
3464
23.6M
                &ai8_cand_rdopt_cost[curr_intra_buf_idx],
3465
23.6M
                &ai4_curr_bytes[curr_intra_buf_idx],
3466
23.6M
                &ai4_tu_bits[curr_intra_buf_idx],
3467
23.6M
                &au4_tu_sad[curr_intra_buf_idx],
3468
23.6M
                &ai4_zero_col[curr_intra_buf_idx],
3469
23.6M
                &ai4_zero_row[curr_intra_buf_idx],
3470
23.6M
                &au1_recon_availability[curr_intra_buf_idx],
3471
23.6M
                i4_perform_rdoq,
3472
23.6M
                i4_perform_sbh,
3473
23.6M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3474
23.6M
                i4_alpha_stim_multiplier,
3475
23.6M
                u1_is_cu_noisy,
3476
23.6M
#endif
3477
23.6M
                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
3478
23.6M
                1 /*early_cbf */
3479
23.6M
            );
3480
3481
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3482
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
3483
            {
3484
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
3485
                ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3486
                    pv_curr_src,
3487
                    src_strd,
3488
                    pu1_pred,
3489
                    pred_strd,
3490
                    ai8_cand_rdopt_cost[curr_intra_buf_idx],
3491
                    i4_alpha_stim_multiplier,
3492
                    trans_size,
3493
                    0,
3494
                    ps_ctxt->u1_enable_psyRDOPT,
3495
                    NULL_PLANE);
3496
#else
3497
                if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
3498
                {
3499
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3500
                        pv_curr_src,
3501
                        src_strd,
3502
                        pu1_recon,
3503
                        i4_recon_stride,
3504
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3505
                        i4_alpha_stim_multiplier,
3506
                        trans_size,
3507
                        0,
3508
                        ps_ctxt->u1_enable_psyRDOPT,
3509
                        NULL_PLANE);
3510
                }
3511
                else
3512
                {
3513
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3514
                        pv_curr_src,
3515
                        src_strd,
3516
                        pu1_pred,
3517
                        pred_strd,
3518
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3519
                        i4_alpha_stim_multiplier,
3520
                        trans_size,
3521
                        0,
3522
                        ps_ctxt->u1_enable_psyRDOPT,
3523
                        NULL_PLANE);
3524
                }
3525
#endif
3526
            }
3527
#endif
3528
3529
23.6M
            if(TU_EQ_SUBCU == func_proc_mode)
3530
9.54M
            {
3531
9.54M
                ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
3532
9.54M
            }
3533
3534
            /* based on CBF/No CBF copy the corresponding state */
3535
23.6M
            if(0 == ai4_cbf[curr_intra_buf_idx])
3536
15.8M
            {
3537
                /* RDOPT copy States :update to init state if 0 cbf */
3538
15.8M
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3539
15.8M
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3540
15.8M
                        IHEVC_CAB_COEFFX_PREFIX,
3541
15.8M
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3542
15.8M
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3543
15.8M
            }
3544
7.77M
            else
3545
7.77M
            {
3546
                /* RDOPT copy States :update to new state only if CBF is non zero */
3547
7.77M
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3548
7.77M
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3549
7.77M
                        IHEVC_CAB_COEFFX_PREFIX,
3550
7.77M
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3551
7.77M
                            .s_cabac_ctxt.au1_ctxt_models[0] +
3552
7.77M
                        IHEVC_CAB_COEFFX_PREFIX,
3553
7.77M
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3554
7.77M
            }
3555
3556
            /* call the function which perform intra mode prediction */
3557
23.6M
            ihevce_intra_pred_mode_signaling(
3558
23.6M
                ps_top_nbr_4x4->b6_luma_intra_mode,
3559
23.6M
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3560
23.6M
                s_nbr.u1_top_avail,
3561
23.6M
                s_nbr.u1_left_avail,
3562
23.6M
                cu_pos_y,
3563
23.6M
                curr_pred_mode,
3564
23.6M
                &as_intra_prev_rem[curr_intra_buf_idx]);
3565
            /******************************************************************/
3566
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3567
            The bits for these are evaluated for every RDO mode of current subcu
3568
            as they can significantly contribute to RDO cost.  Note that these
3569
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3570
            are accounted for in encode_cu call later */
3571
3572
            /******************************************************************/
3573
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3574
            The bits for these are evaluated for every RDO mode of current subcu
3575
            as they can significantly contribute to RDO cost.  Note that these
3576
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3577
            are accounted for in encode_cu call later */
3578
3579
            /* Estimate bits to encode prev rem flag  for NXN mode */
3580
23.6M
            {
3581
23.6M
                WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
3582
23.6M
                    [u1_prev_flag_cabac_ctxt ^
3583
23.6M
                     as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3584
3585
                /* rounding the fractional bits to nearest integer */
3586
23.6M
                bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
3587
23.6M
            }
3588
3589
            /* based on prev flag all the mpmidx bits and rem bits */
3590
23.6M
            if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
3591
17.2M
            {
3592
                /* mpm_idx */
3593
17.2M
                bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
3594
17.2M
            }
3595
6.34M
            else
3596
6.34M
            {
3597
                /* rem intra mode */
3598
6.34M
                bits += 5;
3599
6.34M
            }
3600
3601
23.6M
            bits += ai4_tu_bits[curr_intra_buf_idx];
3602
3603
            /* compute the total cost for current candidate */
3604
23.6M
            curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
3605
3606
            /* get the final ssd cost */
3607
23.6M
            curr_cost +=
3608
23.6M
                COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3609
3610
            /* check of the best candidate cost */
3611
23.6M
            if(curr_cost < best_cand_cost)
3612
17.0M
            {
3613
17.0M
                best_cand_cost = curr_cost;
3614
17.0M
                best_cand_idx = cand_ctr;
3615
17.0M
                best_intra_buf_idx = curr_intra_buf_idx;
3616
17.0M
                curr_intra_buf_idx = !curr_intra_buf_idx;
3617
17.0M
            }
3618
23.6M
        }
3619
3620
        /***************    For TU_EQ_SUBCU case    *****************/
3621
        /* Copy the pred for best cand. to the final pred array     */
3622
        /* Copy the iq-coeff for best cand. to the final array      */
3623
        /* copy the best coeffs data to final buffer                */
3624
15.9M
        if(TU_EQ_SUBCU == func_proc_mode)
3625
1.87M
        {
3626
            /* Copy the pred for best cand. to the final pred array */
3627
3628
1.87M
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3629
1.87M
                (UWORD8 *)pv_pred_org,
3630
1.87M
                pred_strd_org,
3631
1.87M
                &au1_cur_pred_data[best_intra_buf_idx][0],
3632
1.87M
                trans_size,
3633
1.87M
                trans_size,
3634
1.87M
                trans_size);
3635
3636
            /* Copy the deq-coeff for best cand. to the final array */
3637
3638
1.87M
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3639
1.87M
                (UWORD8 *)pi2_deq_data,
3640
1.87M
                deq_data_strd << 1,
3641
1.87M
                (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
3642
1.87M
                trans_size << 1,
3643
1.87M
                trans_size << 1,
3644
1.87M
                trans_size);
3645
            /* copy the coeffs to final cu ecd bytes buffer */
3646
1.87M
            memcpy(
3647
1.87M
                pu1_ecd_data,
3648
1.87M
                &au1_intra_coeffs[best_intra_buf_idx][0],
3649
1.87M
                ai4_curr_bytes[best_intra_buf_idx]);
3650
3651
1.87M
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
3652
1.87M
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3653
1.87M
        }
3654
3655
        /*----------   Calculate Recon for the best INTRA mode     ---------*/
3656
        /* TU_EQ_CU case : No need for recon, otherwise recon is required   */
3657
        /* Compute recon only for the best mode for TU_EQ_SUBCU case        */
3658
15.9M
        if(u1_compute_recon)
3659
5.00M
        {
3660
5.00M
            ihevce_it_recon_fxn(
3661
5.00M
                ps_ctxt,
3662
5.00M
                pi2_deq_data,
3663
5.00M
                deq_data_strd,
3664
5.00M
                (UWORD8 *)pv_pred_org,
3665
5.00M
                pred_strd_org,
3666
5.00M
                pu1_recon,
3667
5.00M
                i4_recon_stride,
3668
5.00M
                pu1_ecd_data,
3669
5.00M
                trans_size,
3670
5.00M
                PRED_MODE_INTRA,
3671
5.00M
                ai4_cbf[best_intra_buf_idx],
3672
5.00M
                ai4_zero_col[best_intra_buf_idx],
3673
5.00M
                ai4_zero_row[best_intra_buf_idx]);
3674
3675
5.00M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3676
5.00M
        }
3677
10.9M
        else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
3678
6.56M
        {
3679
6.56M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3680
6.56M
        }
3681
4.38M
        else
3682
4.38M
        {
3683
4.38M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
3684
4.38M
        }
3685
3686
        /* RDOPT copy States :update to best modes state */
3687
15.9M
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3688
15.9M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3689
15.9M
            &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
3690
15.9M
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3691
3692
        /* copy the prev,mpm_idx and rem modes from best cand */
3693
15.9M
        ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
3694
3695
        /* update the cabac context of prev intra pred mode flag */
3696
15.9M
        u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
3697
15.9M
            [(u1_prev_flag_cabac_ctxt << 1) |
3698
15.9M
             as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3699
3700
        /* accumulate the TU bits into cu bits */
3701
15.9M
        cu_bits += ai4_tu_bits[best_intra_buf_idx];
3702
3703
        /* copy the intra pred mode for chroma reuse */
3704
15.9M
        if(is_sub_pu_in_hq == 0)
3705
15.9M
        {
3706
15.9M
            *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
3707
15.9M
        }
3708
0
        else
3709
0
        {
3710
0
            *pu1_intra_pred_mode = best_cand_idx;
3711
0
        }
3712
3713
        /* Store luma mode as chroma mode. If chroma prcs happens, and
3714
        if a diff. mode wins, it should update this!! */
3715
15.9M
        if(1 == chrm_present_flag)
3716
13.1M
        {
3717
13.1M
            if(is_sub_pu_in_hq == 0)
3718
13.1M
            {
3719
13.1M
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3720
13.1M
                    ((ps_ctxt->u1_chroma_array_type == 2)
3721
13.1M
                         ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
3722
13.1M
                         : pu1_curr_mode[best_cand_idx]);
3723
13.1M
            }
3724
0
            else
3725
0
            {
3726
0
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3727
0
                    ((ps_ctxt->u1_chroma_array_type == 2)
3728
0
                         ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
3729
0
                         : best_cand_idx);
3730
0
            }
3731
3732
13.1M
            ps_final_prms->u1_chroma_intra_pred_mode = 4;
3733
13.1M
        }
3734
3735
        /*remember the cbf flag to replicate qp for 4x4 neighbour*/
3736
15.9M
        ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
3737
3738
        /*accumulate ssd over all TU of intra CU*/
3739
15.9M
        ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
3740
3741
        /* update the bytes */
3742
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3743
15.9M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
3744
15.9M
            ai4_curr_bytes[best_intra_buf_idx];
3745
        /* update the zero_row and col info for the final mode */
3746
15.9M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
3747
15.9M
            ai4_zero_col[best_intra_buf_idx];
3748
15.9M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
3749
15.9M
            ai4_zero_row[best_intra_buf_idx];
3750
3751
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3752
3753
        /* update the total bytes cons */
3754
15.9M
        ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
3755
15.9M
        pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
3756
3757
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3758
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
3759
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
3760
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
3761
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
3762
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
3763
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
3764
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
3765
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
3766
15.9M
        GETRANGE(tx_size, trans_size);
3767
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
3768
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
3769
15.9M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
3770
3771
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
3772
15.9M
        ps_nbr_4x4->b1_skip_flag = 0;
3773
15.9M
        ps_nbr_4x4->b1_intra_flag = 1;
3774
15.9M
        ps_nbr_4x4->b1_pred_l0_flag = 0;
3775
15.9M
        ps_nbr_4x4->b1_pred_l1_flag = 0;
3776
3777
15.9M
        if(is_sub_pu_in_hq == 0)
3778
15.9M
        {
3779
15.9M
            ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
3780
15.9M
        }
3781
0
        else
3782
0
        {
3783
0
            ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
3784
0
        }
3785
3786
15.9M
        ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3787
3788
        /* since tu size can be less than cusize, replication is done with strd */
3789
15.9M
        {
3790
15.9M
            WORD32 i, j;
3791
15.9M
            nbr_4x4_t *ps_tmp_4x4;
3792
3793
15.9M
            ps_tmp_4x4 = ps_nbr_4x4;
3794
3795
63.3M
            for(i = 0; i < num_4x4_in_tu; i++)
3796
47.3M
            {
3797
257M
                for(j = 0; j < num_4x4_in_tu; j++)
3798
209M
                {
3799
209M
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
3800
209M
                }
3801
                /* row level update*/
3802
47.3M
                ps_tmp_4x4 += num_4x4_in_cu;
3803
47.3M
            }
3804
15.9M
        }
3805
3806
15.9M
        if(TU_EQ_SUBCU == func_proc_mode)
3807
1.87M
        {
3808
1.87M
            pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
3809
1.87M
        }
3810
3811
15.9M
        if((num_cu_parts > 1) && (ctr < 3))
3812
7.77M
        {
3813
            /* set the neighbour map to 1 */
3814
7.77M
            ihevce_set_nbr_map(
3815
7.77M
                ps_ctxt->pu1_ctb_nbr_map,
3816
7.77M
                ps_ctxt->i4_nbr_map_strd,
3817
7.77M
                cu_pos_x,
3818
7.77M
                cu_pos_y,
3819
7.77M
                trans_size >> 2,
3820
7.77M
                1);
3821
3822
            /* block level updates block number (1 & 3 )*/
3823
7.77M
            pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
3824
7.77M
            pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
3825
7.77M
            pi2_deq_data += trans_size;
3826
3827
7.77M
            switch(ctr)
3828
7.77M
            {
3829
2.65M
            case 0:
3830
2.65M
            {
3831
2.65M
                pu1_left = pu1_recon + trans_size - 1;
3832
2.65M
                pu1_top += trans_size;
3833
2.65M
                pu1_top_left = pu1_top - 1;
3834
2.65M
                left_strd = i4_recon_stride;
3835
3836
2.65M
                break;
3837
0
            }
3838
2.58M
            case 1:
3839
2.58M
            {
3840
2.58M
                ASSERT(
3841
2.58M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
3842
2.58M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
3843
3844
                /* Since the 'lumaRefSubstitution' function expects both Top and */
3845
                /* TopRight recon pixels to be present in the same buffer */
3846
2.58M
                if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
3847
2.58M
                   ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
3848
130k
                {
3849
130k
                    UWORD8 *pu1_src =
3850
130k
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3851
130k
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3852
130k
                        trans_size;
3853
130k
                    UWORD8 *pu1_dst =
3854
130k
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3855
130k
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3856
130k
                        trans_size;
3857
3858
130k
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3859
130k
                        pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
3860
3861
130k
                    ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
3862
130k
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
3863
130k
                }
3864
3865
2.58M
                pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
3866
2.58M
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3867
2.58M
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3868
2.58M
                          (trans_size - 1) * i4_recon_stride;
3869
2.58M
                pu1_top_left = pu1_left - cu_left_stride;
3870
2.58M
                left_strd = cu_left_stride;
3871
3872
2.58M
                break;
3873
2.58M
            }
3874
2.54M
            case 2:
3875
2.54M
            {
3876
2.54M
                ASSERT(
3877
2.54M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
3878
2.54M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
3879
3880
2.54M
                pu1_left = pu1_recon + trans_size - 1;
3881
2.54M
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3882
2.54M
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3883
2.54M
                          (trans_size - 1) * i4_recon_stride + trans_size;
3884
2.54M
                pu1_top_left = pu1_top - 1;
3885
2.54M
                left_strd = i4_recon_stride;
3886
3887
2.54M
                break;
3888
2.54M
            }
3889
7.77M
            }
3890
3891
7.77M
            pu1_csbf_buf += num_4x4_in_tu;
3892
7.77M
            cu_pos_x += num_4x4_in_tu;
3893
7.77M
            ps_nbr_4x4 += num_4x4_in_tu;
3894
7.77M
            ps_top_nbr_4x4 += num_4x4_in_tu;
3895
7.77M
            ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
3896
3897
7.77M
            pu1_intra_pred_mode++;
3898
3899
            /* after 2 blocks increment the pointers to bottom blocks */
3900
7.77M
            if(1 == ctr)
3901
2.58M
            {
3902
2.58M
                pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
3903
2.58M
                pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
3904
3905
2.58M
                pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
3906
2.58M
                pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
3907
2.58M
                pi2_deq_data -= (trans_size << 1);
3908
2.58M
                pi2_deq_data += (trans_size * deq_data_strd);
3909
3910
2.58M
                pu1_csbf_buf -= (num_4x4_in_tu << 1);
3911
2.58M
                pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
3912
3913
2.58M
                ps_nbr_4x4 -= (num_4x4_in_tu << 1);
3914
2.58M
                ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
3915
2.58M
                ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
3916
2.58M
                ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
3917
3918
                /* decrement pos x to start */
3919
2.58M
                cu_pos_x -= (num_4x4_in_tu << 1);
3920
2.58M
                cu_pos_y += num_4x4_in_tu;
3921
2.58M
            }
3922
7.77M
        }
3923
3924
15.9M
#if RDOPT_ENABLE
3925
        /* compute the RDOPT cost for the current TU */
3926
15.9M
        ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
3927
15.9M
            ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3928
15.9M
#endif
3929
3930
        /* accumulate the costs */
3931
15.9M
        total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
3932
3933
15.9M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
3934
15.9M
        {
3935
            /* Early exit : If the current running cost exceeds
3936
            the prev. best mode cost, break */
3937
15.9M
            if(total_rdopt_cost > prev_best_rdopt_cost)
3938
775k
            {
3939
775k
                return (total_rdopt_cost);
3940
775k
            }
3941
15.9M
        }
3942
3943
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
3944
15.1M
        chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
3945
3946
15.1M
        pu4_nbr_flags++;
3947
15.1M
    }
3948
    /* Modify the cost function for this CU. */
3949
    /* loop in for 8x8 blocks */
3950
7.54M
    if(ps_ctxt->u1_enable_psyRDOPT)
3951
0
    {
3952
0
        UWORD8 *pu1_recon_cu;
3953
0
        WORD32 recon_stride;
3954
0
        WORD32 curr_pos_x;
3955
0
        WORD32 curr_pos_y;
3956
0
        WORD32 start_index;
3957
0
        WORD32 num_horz_cu_in_ctb;
3958
0
        WORD32 cu_size;
3959
0
        WORD32 had_block_size;
3960
3961
        /* tODO: sreenivasa ctb size has to be used appropriately */
3962
0
        had_block_size = 8;
3963
0
        cu_size = ps_cu_analyse->u1_cu_size; /* todo */
3964
0
        num_horz_cu_in_ctb = 64 / had_block_size;
3965
3966
0
        curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
3967
0
        curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
3968
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
3969
0
        pu1_recon_cu =
3970
0
            ((UWORD8 *)ps_final_prms->s_recon_datastore
3971
0
                 .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
3972
        /* + \  curr_pos_x + curr_pos_y * recon_stride; */
3973
3974
        /* start index to index the source satd of curr cu int he current ctb*/
3975
0
        start_index =
3976
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
3977
3978
0
        {
3979
0
            total_rdopt_cost += ihevce_psy_rd_cost(
3980
0
                ps_ctxt->ai4_source_satd_8x8,
3981
0
                pu1_recon_cu,
3982
0
                recon_stride,
3983
0
                1,  //
3984
0
                cu_size,
3985
0
                0,  // pic type
3986
0
                0,  //layer id
3987
0
                ps_ctxt->i4_satd_lamda,  // lambda
3988
0
                start_index,
3989
0
                ps_ctxt->u1_is_input_data_hbd,
3990
0
                ps_ctxt->u4_psy_strength,
3991
0
                &ps_ctxt->s_cmn_opt_func
3992
3993
0
            );  // 8 bit
3994
0
        }
3995
0
    }
3996
3997
#if !FORCE_INTRA_TU_DEPTH_TO_0  //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
3998
7.54M
    if(TU_EQ_SUBCU == func_proc_mode)
3999
457k
    {
4000
457k
        UWORD8 au1_tu_eq_cu_div2_modes[4];
4001
457k
        UWORD8 au1_freq_of_mode[4];
4002
4003
457k
        WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
4004
457k
            ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
4005
4006
457k
        if(1 == i4_num_clusters)
4007
117k
        {
4008
117k
            ps_final_prms->u2_num_pus_in_cu = 1;
4009
117k
            ps_final_prms->u1_part_mode = SIZE_2Nx2N;
4010
117k
        }
4011
457k
    }
4012
7.54M
#endif
4013
4014
    /* store the num TUs*/
4015
7.54M
    ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
4016
4017
    /* update the bytes consumed */
4018
7.54M
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4019
4020
    /* store the current cu size to final prms */
4021
7.54M
    ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
4022
4023
    /* cu bits will be having luma residual bits till this point    */
4024
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4025
7.54M
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4026
4027
    /* ------------- Chroma processing -------------- */
4028
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4029
7.54M
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4030
6.37M
    {
4031
6.37M
        LWORD64 chrm_rdopt_cost;
4032
6.37M
        WORD32 chrm_rdopt_tu_bits;
4033
4034
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4035
6.37M
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4036
4037
6.37M
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4038
6.37M
            ps_ctxt,
4039
6.37M
            curr_buf_idx,
4040
6.37M
            func_proc_mode,
4041
6.37M
            ps_chrm_cu_buf_prms->pu1_curr_src,
4042
6.37M
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4043
6.37M
            ps_chrm_cu_buf_prms->pu1_cu_left,
4044
6.37M
            ps_chrm_cu_buf_prms->pu1_cu_top,
4045
6.37M
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4046
6.37M
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4047
6.37M
            cu_pos_x_8pelunits,
4048
6.37M
            cu_pos_y_8pelunits,
4049
6.37M
            &chrm_rdopt_tu_bits,
4050
6.37M
            i4_alpha_stim_multiplier,
4051
6.37M
            u1_is_cu_noisy);
4052
4053
6.37M
#if WEIGH_CHROMA_COST
4054
6.37M
        chrm_rdopt_cost = (LWORD64)(
4055
6.37M
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4056
6.37M
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4057
6.37M
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4058
6.37M
#endif
4059
4060
6.37M
#if CHROMA_RDOPT_ENABLE
4061
6.37M
        total_rdopt_cost += chrm_rdopt_cost;
4062
6.37M
#endif
4063
6.37M
        cu_bits += chrm_rdopt_tu_bits;
4064
4065
        /* cu bits for chroma residual if chroma rdopt is on       */
4066
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4067
6.37M
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4068
4069
6.37M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4070
6.37M
        {
4071
            /* Early exit : If the current running cost exceeds
4072
            the prev. best mode cost, break */
4073
6.37M
            if(total_rdopt_cost > prev_best_rdopt_cost)
4074
578k
            {
4075
578k
                return (total_rdopt_cost);
4076
578k
            }
4077
6.37M
        }
4078
6.37M
    }
4079
1.16M
    else
4080
1.16M
    {}
4081
4082
    /* RDOPT copy States :  Best after all luma TUs to current */
4083
6.96M
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4084
6.96M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4085
6.96M
                .s_cabac_ctxt.au1_ctxt_models[0] +
4086
6.96M
            IHEVC_CAB_COEFFX_PREFIX,
4087
6.96M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4088
6.96M
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4089
4090
    /* get the neighbour availability flags for current cu  */
4091
6.96M
    ihevce_get_only_nbr_flag(
4092
6.96M
        &s_nbr,
4093
6.96M
        ps_ctxt->pu1_ctb_nbr_map,
4094
6.96M
        ps_ctxt->i4_nbr_map_strd,
4095
6.96M
        (cu_pos_x_8pelunits << 1),
4096
6.96M
        (cu_pos_y_8pelunits << 1),
4097
6.96M
        (trans_size << 1),
4098
6.96M
        (trans_size << 1));
4099
4100
    /* call the entropy rdo encode to get the bit estimate for current cu */
4101
    /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
4102
6.96M
    {
4103
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4104
6.96M
        WORD32 cbf_bits, header_bits;
4105
4106
6.96M
        header_bits = ihevce_entropy_rdo_encode_cu(
4107
6.96M
            &ps_ctxt->s_rdopt_entropy_ctxt,
4108
6.96M
            ps_final_prms,
4109
6.96M
            cu_pos_x_8pelunits,
4110
6.96M
            cu_pos_y_8pelunits,
4111
6.96M
            ps_cu_analyse->u1_cu_size,
4112
6.96M
            s_nbr.u1_top_avail,
4113
6.96M
            s_nbr.u1_left_avail,
4114
6.96M
            &ps_final_prms->pu1_cu_coeffs[0],
4115
6.96M
            &cbf_bits);
4116
4117
6.96M
        cu_bits += header_bits;
4118
4119
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4120
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4121
6.96M
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4122
6.96M
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4123
4124
6.96M
#if RDOPT_ENABLE
4125
        /* add the cost of coding the cu bits */
4126
6.96M
        total_rdopt_cost +=
4127
6.96M
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4128
6.96M
#endif
4129
6.96M
    }
4130
6.96M
    return (total_rdopt_cost);
4131
7.54M
}
4132
/*!
4133
******************************************************************************
4134
* \if Function name : ihevce_inter_rdopt_cu_ntu \endif
4135
*
4136
* \brief
4137
*    Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
4138
*
4139
* \param[in] ps_ctxt       enc_loop module ctxt pointer
4140
* \param[in] ps_inter_cand pointer to inter candidate structure
4141
* \param[in] pu1_src       pointer to source data buffer
4142
* \param[in] cu_size       Current CU size
4143
* \param[in] cu_pos_x      cu position x w.r.t to ctb
4144
* \param[in] cu_pos_y      cu position y w.r.t to ctb
4145
* \param[in] src_strd      source buffer stride
4146
* \param[in] curr_buf_idx  buffer index for current output storage
4147
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
4148
*
4149
* \return
4150
*    Rdopt cost
4151
*
4152
* \author
4153
*  Ittiam
4154
*
4155
*****************************************************************************
4156
*/
4157
LWORD64 ihevce_inter_rdopt_cu_ntu(
4158
    ihevce_enc_loop_ctxt_t *ps_ctxt,
4159
    enc_loop_cu_prms_t *ps_cu_prms,
4160
    void *pv_src,
4161
    WORD32 cu_size,
4162
    WORD32 cu_pos_x,
4163
    WORD32 cu_pos_y,
4164
    WORD32 curr_buf_idx,
4165
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
4166
    cu_inter_cand_t *ps_inter_cand,
4167
    cu_analyse_t *ps_cu_analyse,
4168
    WORD32 i4_alpha_stim_multiplier)
4169
789k
{
4170
789k
    enc_loop_cu_final_prms_t *ps_final_prms;
4171
789k
    nbr_4x4_t *ps_nbr_4x4;
4172
789k
    tu_prms_t s_tu_prms[64 * 4];
4173
789k
    tu_prms_t *ps_tu_prms;
4174
4175
789k
    WORD32 i4_perform_rdoq;
4176
789k
    WORD32 i4_perform_sbh;
4177
789k
    WORD32 ai4_tu_split_flags[4];
4178
789k
    WORD32 ai4_tu_early_cbf[4];
4179
789k
    WORD32 num_split_flags = 1;
4180
789k
    WORD32 i;
4181
789k
    UWORD8 u1_tu_size;
4182
789k
    UWORD8 *pu1_pred;
4183
789k
    UWORD8 *pu1_ecd_data;
4184
789k
    WORD16 *pi2_deq_data;
4185
789k
    UWORD8 *pu1_csbf_buf;
4186
789k
    UWORD8 *pu1_tu_sz_sft;
4187
789k
    UWORD8 *pu1_tu_posx;
4188
789k
    UWORD8 *pu1_tu_posy;
4189
789k
    LWORD64 total_rdopt_cost;
4190
789k
    WORD32 ctr;
4191
789k
    WORD32 chrm_ctr;
4192
789k
    WORD32 num_tu_in_cu = 0;
4193
789k
    WORD32 pred_stride;
4194
789k
    WORD32 recon_stride;
4195
789k
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
4196
789k
    WORD32 csbf_strd;
4197
789k
    WORD32 chrm_present_flag;
4198
789k
    WORD32 ecd_data_bytes_cons;
4199
789k
    WORD32 num_4x4_in_cu;
4200
789k
    WORD32 num_4x4_in_tu;
4201
789k
    WORD32 recon_func_mode;
4202
789k
    WORD32 cu_bits;
4203
789k
    UWORD8 u1_compute_spatial_ssd;
4204
4205
    /* min_trans_size is initialized to some huge number than usual TU sizes */
4206
789k
    WORD32 i4_min_trans_size = 256;
4207
    /* Get the RDOPT cost of the best CU mode for early_exit */
4208
789k
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
4209
789k
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
4210
4211
    /* model for no residue syntax qt root cbf flag */
4212
789k
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
4213
4214
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4215
789k
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
4216
4217
    /* for skip cases tables are not reqquired */
4218
789k
    UWORD8 u1_skip_tu_sz_sft = 0;
4219
789k
    UWORD8 u1_skip_tu_posx = 0;
4220
789k
    UWORD8 u1_skip_tu_posy = 0;
4221
789k
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
4222
4223
    /* get the pointers based on curbuf idx */
4224
789k
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
4225
789k
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
4226
789k
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
4227
789k
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
4228
789k
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
4229
789k
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
4230
4231
789k
    pred_stride = ps_inter_cand->i4_pred_data_stride;
4232
789k
    recon_stride = cu_size;
4233
789k
    pu1_pred = ps_inter_cand->pu1_pred_data;
4234
789k
    chrm_ctr = 0;
4235
789k
    ecd_data_bytes_cons = 0;
4236
789k
    total_rdopt_cost = 0;
4237
789k
    num_4x4_in_cu = cu_size >> 2;
4238
789k
    recon_func_mode = PRED_MODE_INTER;
4239
789k
    cu_bits = 0;
4240
4241
    /* get the 4x4 level postion of current cu */
4242
789k
    cu_pos_x = cu_pos_x << 1;
4243
789k
    cu_pos_y = cu_pos_y << 1;
4244
4245
    /* default value for cu coded flag */
4246
789k
    ps_final_prms->u1_is_cu_coded = 0;
4247
4248
    /*init of ssd of CU accuumulated over all TU*/
4249
789k
    ps_final_prms->u4_cu_sad = 0;
4250
4251
    /* populate the coeffs scan idx */
4252
789k
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
4253
4254
789k
#if ENABLE_INTER_ZCU_COST
4255
    /* reset cu not coded cost */
4256
789k
    ps_ctxt->i8_cu_not_coded_cost = 0;
4257
4258
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4259
789k
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
4260
789k
#endif
4261
4262
789k
    if(ps_cu_analyse->u1_cu_size == 64)
4263
27.2k
    {
4264
27.2k
        num_split_flags = 4;
4265
27.2k
        u1_tu_size = 32;
4266
27.2k
    }
4267
761k
    else
4268
761k
    {
4269
761k
        num_split_flags = 1;
4270
761k
        u1_tu_size = ps_cu_analyse->u1_cu_size;
4271
761k
    }
4272
4273
    /* ckeck for skip mode */
4274
789k
    if(1 == ps_final_prms->u1_skip_flag)
4275
271k
    {
4276
271k
        if(64 == cu_size)
4277
9.42k
        {
4278
            /* TU = CU/2 is set but no trnaform is evaluated  */
4279
9.42k
            num_tu_in_cu = 4;
4280
9.42k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4281
9.42k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4282
9.42k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4283
9.42k
        }
4284
262k
        else
4285
262k
        {
4286
            /* TU = CU is set but no trnaform is evaluated  */
4287
262k
            num_tu_in_cu = 1;
4288
262k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4289
262k
            pu1_tu_posx = &u1_skip_tu_posx;
4290
262k
            pu1_tu_posy = &u1_skip_tu_posy;
4291
262k
        }
4292
4293
271k
        recon_func_mode = PRED_MODE_SKIP;
4294
271k
    }
4295
    /* check for PU part mode being AMP or No AMP */
4296
517k
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
4297
460k
    {
4298
460k
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
4299
443k
        {
4300
            /* TU= CU is evaluated 2Nx2N inter case */
4301
443k
            num_tu_in_cu = 1;
4302
443k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4303
443k
            pu1_tu_posx = &u1_skip_tu_posx;
4304
443k
            pu1_tu_posy = &u1_skip_tu_posy;
4305
443k
        }
4306
17.4k
        else
4307
17.4k
        {
4308
            /* currently TU= CU/2 is evaluated for all inter case */
4309
17.4k
            num_tu_in_cu = 4;
4310
17.4k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4311
17.4k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4312
17.4k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4313
17.4k
        }
4314
460k
    }
4315
56.6k
    else
4316
56.6k
    {
4317
        /* for AMP cases one level of TU recurssion is done */
4318
        /* based on oreintation of the partitions           */
4319
56.6k
        num_tu_in_cu = 10;
4320
56.6k
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4321
56.6k
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4322
56.6k
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4323
56.6k
    }
4324
4325
789k
    ps_tu_prms = &s_tu_prms[0];
4326
789k
    num_tu_in_cu = 0;
4327
4328
1.65M
    for(i = 0; i < num_split_flags; i++)
4329
870k
    {
4330
870k
        WORD32 i4_x_off = 0, i4_y_off = 0;
4331
4332
870k
        if(i == 1 || i == 3)
4333
54.5k
        {
4334
54.5k
            i4_x_off = 32;
4335
54.5k
        }
4336
4337
870k
        if(i == 2 || i == 3)
4338
54.5k
        {
4339
54.5k
            i4_y_off = 32;
4340
54.5k
        }
4341
4342
870k
        if(1 == ps_final_prms->u1_skip_flag)
4343
299k
        {
4344
299k
            ai4_tu_split_flags[0] = 0;
4345
299k
            ps_inter_cand->ai4_tu_split_flag[i] = 0;
4346
4347
299k
            ai4_tu_early_cbf[0] = 0;
4348
299k
        }
4349
570k
        else
4350
570k
        {
4351
570k
            ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
4352
570k
            ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
4353
570k
        }
4354
4355
870k
        ps_tu_prms->u1_tu_size = u1_tu_size;
4356
4357
870k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
4358
870k
            ps_tu_prms,
4359
870k
            &num_tu_in_cu,
4360
870k
            0,
4361
870k
            ai4_tu_split_flags[0],
4362
870k
            ai4_tu_early_cbf[0],
4363
870k
            i4_x_off,
4364
870k
            i4_y_off);
4365
870k
    }
4366
4367
    /* loop for all tu blocks in current cu */
4368
789k
    ps_tu_prms = &s_tu_prms[0];
4369
2.32M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4370
1.53M
    {
4371
1.53M
        trans_size = ps_tu_prms->u1_tu_size;
4372
4373
1.53M
        if(i4_min_trans_size > trans_size)
4374
796k
        {
4375
796k
            i4_min_trans_size = trans_size;
4376
796k
        }
4377
1.53M
        ps_tu_prms++;
4378
1.53M
    }
4379
4380
789k
    if(ps_ctxt->i1_cu_qp_delta_enable)
4381
207k
    {
4382
207k
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
4383
207k
    }
4384
4385
789k
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
4386
0
    {
4387
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
4388
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
4389
0
             100.0f);
4390
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
4391
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
4392
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
4393
0
    }
4394
4395
789k
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
4396
574k
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
4397
75.8k
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4398
4399
789k
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
4400
0
    {
4401
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
4402
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4403
0
    }
4404
4405
789k
    if(!u1_compute_spatial_ssd)
4406
713k
    {
4407
713k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4408
713k
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4409
713k
    }
4410
75.8k
    else
4411
75.8k
    {
4412
75.8k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
4413
75.8k
    }
4414
4415
789k
    ps_tu_prms = &s_tu_prms[0];
4416
4417
789k
    ASSERT(num_tu_in_cu <= 256);
4418
4419
    /* RDOPT copy States :  TU init (best until prev TU) to current */
4420
789k
    memcpy(
4421
789k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4422
789k
             .s_cabac_ctxt.au1_ctxt_models[0],
4423
789k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
4424
789k
        IHEVC_CAB_COEFFX_PREFIX);
4425
4426
2.23M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4427
1.52M
    {
4428
1.52M
        WORD32 curr_bytes;
4429
1.52M
        WORD32 tx_size;
4430
1.52M
        WORD32 cbf, zero_col, zero_row;
4431
1.52M
        LWORD64 rdopt_cost;
4432
1.52M
        UWORD8 u1_is_recon_available;
4433
4434
1.52M
        WORD32 curr_pos_x;
4435
1.52M
        WORD32 curr_pos_y;
4436
1.52M
        nbr_4x4_t *ps_cur_nbr_4x4;
4437
1.52M
        UWORD8 *pu1_cur_pred;
4438
1.52M
        UWORD8 *pu1_cur_src;
4439
1.52M
        UWORD8 *pu1_cur_recon;
4440
1.52M
        WORD16 *pi2_cur_deq_data;
4441
1.52M
        UWORD32 u4_tu_sad;
4442
1.52M
        WORD32 tu_bits;
4443
4444
1.52M
        WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4445
4446
1.52M
        trans_size = ps_tu_prms->u1_tu_size;
4447
        /* get the current pos x and pos y in pixels */
4448
1.52M
        curr_pos_x = ps_tu_prms->u1_x_off;  //((cu_size >> 2) * pu1_tu_posx[ctr]);
4449
1.52M
        curr_pos_y = ps_tu_prms->u1_y_off;  //((cu_size >> 2) * pu1_tu_posy[ctr]);
4450
4451
1.52M
        num_4x4_in_tu = trans_size >> 2;
4452
4453
#if FORCE_8x8_TFR
4454
        if(cu_size == 64)
4455
        {
4456
            curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
4457
            curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
4458
        }
4459
#endif
4460
4461
        /* increment the pointers to start of current TU  */
4462
1.52M
        pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
4463
1.52M
        pu1_cur_src += (curr_pos_y * src_strd);
4464
1.52M
        pu1_cur_pred = (pu1_pred + curr_pos_x);
4465
1.52M
        pu1_cur_pred += (curr_pos_y * pred_stride);
4466
1.52M
        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
4467
1.52M
        pi2_cur_deq_data += (curr_pos_y * cu_size);
4468
1.52M
        pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
4469
1.52M
                        curr_pos_x + curr_pos_y * i4_recon_stride;
4470
4471
1.52M
        ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
4472
1.52M
        ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
4473
4474
        /* RDOPT copy States :  TU init (best until prev TU) to current */
4475
1.52M
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4476
1.52M
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4477
1.52M
                    .s_cabac_ctxt.au1_ctxt_models[0] +
4478
1.52M
                IHEVC_CAB_COEFFX_PREFIX,
4479
1.52M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4480
1.52M
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4481
4482
1.52M
        i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
4483
1.52M
        i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
4484
4485
        /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
4486
        /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
4487
        /* Currently the complete array will contain only single value*/
4488
        /*The rounding factor is calculated with the formula
4489
        Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
4490
        rounding factor = (1 - DeadZone Val)
4491
4492
        Assumption: Cabac states of All the sub-blocks in the TU are considered independent
4493
        */
4494
1.52M
        if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
4495
0
        {
4496
0
            double i4_lamda_modifier;
4497
4498
0
            if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
4499
0
            {
4500
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
4501
0
                                    CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
4502
0
            }
4503
0
            else
4504
0
            {
4505
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
4506
0
            }
4507
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
4508
0
            {
4509
0
                if(ISLICE == ps_ctxt->i1_slice_type)
4510
0
                {
4511
0
                    i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
4512
0
                }
4513
0
                else
4514
0
                {
4515
0
                    i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
4516
0
                }
4517
0
            }
4518
0
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4519
0
                &ps_ctxt->i4_quant_round_tu[0][0];
4520
0
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4521
0
                &ps_ctxt->i4_quant_round_tu[1][0];
4522
4523
0
            memset(
4524
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4525
0
                0,
4526
0
                trans_size * trans_size * sizeof(WORD32));
4527
0
            memset(
4528
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4529
0
                0,
4530
0
                trans_size * trans_size * sizeof(WORD32));
4531
4532
0
            ihevce_quant_rounding_factor_gen(
4533
0
                trans_size,
4534
0
                1,
4535
0
                &ps_ctxt->s_rdopt_entropy_ctxt,
4536
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4537
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4538
0
                i4_lamda_modifier,
4539
0
                1);
4540
0
        }
4541
1.52M
        else
4542
1.52M
        {
4543
1.52M
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4544
1.52M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
4545
1.52M
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4546
1.52M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
4547
1.52M
        }
4548
4549
        /* call T Q IT IQ and recon function */
4550
1.52M
        cbf = ihevce_t_q_iq_ssd_scan_fxn(
4551
1.52M
            ps_ctxt,
4552
1.52M
            pu1_cur_pred,
4553
1.52M
            pred_stride,
4554
1.52M
            pu1_cur_src,
4555
1.52M
            src_strd,
4556
1.52M
            pi2_cur_deq_data,
4557
1.52M
            cu_size,
4558
1.52M
            pu1_cur_recon,
4559
1.52M
            i4_recon_stride,
4560
1.52M
            pu1_ecd_data,
4561
1.52M
            pu1_csbf_buf,
4562
1.52M
            csbf_strd,
4563
1.52M
            trans_size,
4564
1.52M
            recon_func_mode,
4565
1.52M
            &rdopt_cost,
4566
1.52M
            &curr_bytes,
4567
1.52M
            &tu_bits,
4568
1.52M
            &u4_tu_sad,
4569
1.52M
            &zero_col,
4570
1.52M
            &zero_row,
4571
1.52M
            &u1_is_recon_available,
4572
1.52M
            i4_perform_rdoq,
4573
1.52M
            i4_perform_sbh,
4574
1.52M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4575
1.52M
            i4_alpha_stim_multiplier,
4576
1.52M
            u1_is_cu_noisy,
4577
1.52M
#endif
4578
1.52M
            u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
4579
1.52M
            ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
4580
4581
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4582
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
4583
        {
4584
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
4585
            rdopt_cost = ihevce_inject_stim_into_distortion(
4586
                pu1_cur_src,
4587
                src_strd,
4588
                pu1_cur_pred,
4589
                pred_stride,
4590
                rdopt_cost,
4591
                i4_alpha_stim_multiplier,
4592
                trans_size,
4593
                0,
4594
                ps_ctxt->u1_enable_psyRDOPT,
4595
                NULL_PLANE);
4596
#else
4597
            if(u1_compute_spatial_ssd && u1_is_recon_available)
4598
            {
4599
                rdopt_cost = ihevce_inject_stim_into_distortion(
4600
                    pu1_cur_src,
4601
                    src_strd,
4602
                    pu1_cur_recon,
4603
                    i4_recon_stride,
4604
                    rdopt_cost,
4605
                    i4_alpha_stim_multiplier,
4606
                    trans_size,
4607
                    0,
4608
                    NULL_PLANE);
4609
            }
4610
            else
4611
            {
4612
                rdopt_cost = ihevce_inject_stim_into_distortion(
4613
                    pu1_cur_src,
4614
                    src_strd,
4615
                    pu1_cur_pred,
4616
                    pred_stride,
4617
                    rdopt_cost,
4618
                    i4_alpha_stim_multiplier,
4619
                    trans_size,
4620
                    0,
4621
                    ps_ctxt->u1_enable_psyRDOPT,
4622
                    NULL_PLANE);
4623
            }
4624
#endif
4625
        }
4626
#endif
4627
4628
1.52M
        if(u1_compute_spatial_ssd && u1_is_recon_available)
4629
141k
        {
4630
141k
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
4631
141k
        }
4632
1.38M
        else
4633
1.38M
        {
4634
1.38M
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
4635
1.38M
        }
4636
4637
        /* accumulate the TU sad into cu sad */
4638
1.52M
        ps_final_prms->u4_cu_sad += u4_tu_sad;
4639
4640
        /* accumulate the TU bits into cu bits */
4641
1.52M
        cu_bits += tu_bits;
4642
4643
        /* inter cu is coded if any of the tu is coded in it */
4644
1.52M
        ps_final_prms->u1_is_cu_coded |= cbf;
4645
4646
        /* call the entropy function to get the bits */
4647
        /* add that to rd opt cost(SSD)              */
4648
4649
        /* update the bytes */
4650
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4651
1.52M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
4652
        /* update the zero_row and col info for the final mode */
4653
1.52M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
4654
1.52M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
4655
4656
        /* update the bytes */
4657
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4658
4659
        /* update the total bytes cons */
4660
1.52M
        ecd_data_bytes_cons += curr_bytes;
4661
1.52M
        pu1_ecd_data += curr_bytes;
4662
4663
        /* RDOPT copy States :  New updated after curr TU to TU init */
4664
1.52M
        if(0 != cbf)
4665
249k
        {
4666
            /* update to new state only if CBF is non zero */
4667
249k
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4668
249k
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4669
249k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4670
249k
                        .s_cabac_ctxt.au1_ctxt_models[0] +
4671
249k
                    IHEVC_CAB_COEFFX_PREFIX,
4672
249k
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4673
249k
        }
4674
4675
        /* by default chroma present is set to 1*/
4676
1.52M
        chrm_present_flag = 1;
4677
1.52M
        if(4 == trans_size)
4678
457k
        {
4679
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
4680
457k
            if(0 != chrm_ctr)
4681
343k
            {
4682
343k
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
4683
343k
            }
4684
4685
            /* increment the chrm ctr unconditionally */
4686
457k
            chrm_ctr++;
4687
4688
            /* after ctr reached 4 reset it */
4689
457k
            if(4 == chrm_ctr)
4690
114k
            {
4691
114k
                chrm_ctr = 0;
4692
114k
            }
4693
457k
        }
4694
4695
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
4696
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
4697
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
4698
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
4699
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
4700
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
4701
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
4702
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
4703
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
4704
1.52M
        GETRANGE(tx_size, trans_size);
4705
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
4706
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
4707
1.52M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
4708
4709
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
4710
1.52M
        ps_cur_nbr_4x4->b1_y_cbf = cbf;
4711
        /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
4712
1.52M
        ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
4713
4714
        /* Qp and cbf are stored for the all 4x4 in TU */
4715
1.52M
        {
4716
1.52M
            WORD32 i, j;
4717
1.52M
            nbr_4x4_t *ps_tmp_4x4;
4718
1.52M
            ps_tmp_4x4 = ps_cur_nbr_4x4;
4719
4720
6.86M
            for(i = 0; i < num_4x4_in_tu; i++)
4721
5.33M
            {
4722
34.7M
                for(j = 0; j < num_4x4_in_tu; j++)
4723
29.4M
                {
4724
29.4M
                    ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
4725
29.4M
                    ps_tmp_4x4[j].b1_y_cbf = cbf;
4726
29.4M
                }
4727
                /* row level update*/
4728
5.33M
                ps_tmp_4x4 += num_4x4_in_cu;
4729
5.33M
            }
4730
1.52M
        }
4731
4732
1.52M
#if RDOPT_ENABLE
4733
        /* compute the rdopt cost */
4734
1.52M
        rdopt_cost +=
4735
1.52M
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4736
1.52M
#endif
4737
        /* accumulate the costs */
4738
1.52M
        total_rdopt_cost += rdopt_cost;
4739
4740
1.52M
        ps_tu_prms++;
4741
4742
1.52M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4743
1.52M
        {
4744
            /* Early exit : If the current running cost exceeds
4745
            the prev. best mode cost, break */
4746
1.52M
            if(total_rdopt_cost > prev_best_rdopt_cost)
4747
79.2k
            {
4748
79.2k
                return (total_rdopt_cost);
4749
79.2k
            }
4750
1.52M
        }
4751
1.52M
    }
4752
4753
    /* Modify the cost function for this CU. */
4754
    /* loop in for 8x8 blocks */
4755
709k
    if(ps_ctxt->u1_enable_psyRDOPT)
4756
0
    {
4757
0
        UWORD8 *pu1_recon_cu;
4758
0
        WORD32 recon_stride;
4759
0
        WORD32 curr_pos_x;
4760
0
        WORD32 curr_pos_y;
4761
0
        WORD32 start_index;
4762
0
        WORD32 num_horz_cu_in_ctb;
4763
0
        WORD32 had_block_size;
4764
4765
        /* tODO: sreenivasa ctb size has to be used appropriately */
4766
0
        had_block_size = 8;
4767
0
        num_horz_cu_in_ctb = 64 / had_block_size;
4768
4769
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
4770
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
4771
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4772
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
4773
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
4774
        //+ \curr_pos_x + curr_pos_y * recon_stride;
4775
4776
        /* start index to index the source satd of curr cu int he current ctb*/
4777
0
        start_index =
4778
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
4779
4780
0
        {
4781
0
            total_rdopt_cost += ihevce_psy_rd_cost(
4782
0
                ps_ctxt->ai4_source_satd_8x8,
4783
0
                pu1_recon_cu,
4784
0
                recon_stride,
4785
0
                1,  //howz stride
4786
0
                cu_size,
4787
0
                0,  // pic type
4788
0
                0,  //layer id
4789
0
                ps_ctxt->i4_satd_lamda,  // lambda
4790
0
                start_index,
4791
0
                ps_ctxt->u1_is_input_data_hbd,
4792
0
                ps_ctxt->u4_psy_strength,
4793
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
4794
0
        }
4795
0
    }
4796
4797
    /* store the num TUs*/
4798
709k
    ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
4799
4800
    /* update the bytes consumed */
4801
709k
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4802
4803
    /* store the current cu size to final prms */
4804
709k
    ps_final_prms->u1_cu_size = cu_size;
4805
4806
    /* cu bits will be having luma residual bits till this point    */
4807
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4808
709k
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4809
4810
    /* ------------- Chroma processing -------------- */
4811
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4812
709k
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4813
316k
    {
4814
316k
        LWORD64 chrm_rdopt_cost;
4815
316k
        WORD32 chrm_rdopt_tu_bits;
4816
4817
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4818
316k
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4819
4820
316k
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4821
316k
            ps_ctxt,
4822
316k
            curr_buf_idx,
4823
316k
            0, /* TU mode : Don't care in Inter patrh */
4824
316k
            ps_chrm_cu_buf_prms->pu1_curr_src,
4825
316k
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4826
316k
            ps_chrm_cu_buf_prms->pu1_cu_left,
4827
316k
            ps_chrm_cu_buf_prms->pu1_cu_top,
4828
316k
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4829
316k
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4830
316k
            (cu_pos_x >> 1),
4831
316k
            (cu_pos_y >> 1),
4832
316k
            &chrm_rdopt_tu_bits,
4833
316k
            i4_alpha_stim_multiplier,
4834
316k
            u1_is_cu_noisy);
4835
4836
316k
#if WEIGH_CHROMA_COST
4837
316k
        chrm_rdopt_cost = (LWORD64)(
4838
316k
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4839
316k
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4840
316k
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4841
316k
#endif
4842
4843
316k
#if CHROMA_RDOPT_ENABLE
4844
316k
        total_rdopt_cost += chrm_rdopt_cost;
4845
316k
#endif
4846
316k
        cu_bits += chrm_rdopt_tu_bits;
4847
4848
        /* during chroma evaluation if skip decision was over written     */
4849
        /* then the current skip candidate is set to a non skip candidate */
4850
316k
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
4851
4852
        /* cu bits for chroma residual if chroma rdopt is on       */
4853
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4854
316k
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4855
4856
316k
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4857
316k
        {
4858
            /* Early exit : If the current running cost exceeds
4859
            the prev. best mode cost, break */
4860
316k
            if(total_rdopt_cost > prev_best_rdopt_cost)
4861
19.6k
            {
4862
19.6k
                return (total_rdopt_cost);
4863
19.6k
            }
4864
316k
        }
4865
316k
    }
4866
392k
    else
4867
392k
    {}
4868
4869
690k
#if SHRINK_INTER_TUTREE
4870
    /* ------------- Quadtree TU split  optimization ------------  */
4871
690k
    if(ps_final_prms->u1_is_cu_coded)
4872
108k
    {
4873
108k
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
4874
108k
            &ps_final_prms->as_tu_enc_loop[0],
4875
108k
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
4876
108k
            &ps_final_prms->s_recon_datastore,
4877
108k
            num_tu_in_cu,
4878
108k
            (ps_ctxt->u1_chroma_array_type == 2));
4879
108k
    }
4880
690k
#endif
4881
4882
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
4883
690k
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4884
690k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4885
690k
                .s_cabac_ctxt.au1_ctxt_models[0] +
4886
690k
            IHEVC_CAB_COEFFX_PREFIX,
4887
690k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4888
690k
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4889
4890
    /* -------- Bit estimate for RD opt -------------- */
4891
690k
    {
4892
690k
        nbr_avail_flags_t s_nbr;
4893
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4894
690k
        WORD32 cbf_bits, header_bits;
4895
4896
        /* get the neighbour availability flags for current cu  */
4897
690k
        ihevce_get_only_nbr_flag(
4898
690k
            &s_nbr,
4899
690k
            ps_ctxt->pu1_ctb_nbr_map,
4900
690k
            ps_ctxt->i4_nbr_map_strd,
4901
690k
            cu_pos_x,
4902
690k
            cu_pos_y,
4903
690k
            (cu_size >> 2),
4904
690k
            (cu_size >> 2));
4905
4906
        /* call the entropy rdo encode to get the bit estimate for current cu */
4907
690k
        header_bits = ihevce_entropy_rdo_encode_cu(
4908
690k
            &ps_ctxt->s_rdopt_entropy_ctxt,
4909
690k
            ps_final_prms,
4910
690k
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
4911
690k
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
4912
690k
            cu_size,
4913
690k
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
4914
690k
                                           : s_nbr.u1_top_avail,
4915
690k
            s_nbr.u1_left_avail,
4916
690k
            &ps_final_prms->pu1_cu_coeffs[0],
4917
690k
            &cbf_bits);
4918
4919
690k
        cu_bits += header_bits;
4920
4921
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4922
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4923
690k
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4924
690k
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4925
4926
690k
#if RDOPT_ENABLE
4927
        /* add the cost of coding the header bits */
4928
690k
        total_rdopt_cost +=
4929
690k
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4930
4931
690k
#if ENABLE_INTER_ZCU_COST
4932
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
4933
690k
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
4934
108k
        {
4935
108k
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
4936
4937
108k
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
4938
87.2k
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
4939
4940
108k
            cab_ctxt_t *ps_cab_ctxt =
4941
108k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
4942
4943
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
4944
108k
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
4945
4946
            /* account for coding qt_root_cbf = 0 */
4947
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
4948
108k
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
4949
108k
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
4950
189
                u4_cu_hdr_bits_q12 = 0;
4951
108k
            else
4952
108k
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
4953
4954
            /* add the cost of coding the header bits */
4955
108k
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
4956
108k
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
4957
108k
                ps_ctxt->i8_cl_ssd_lambda_qf,
4958
108k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
4959
4960
108k
            if(ps_ctxt->u1_enable_psyRDOPT)
4961
0
            {
4962
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
4963
0
            }
4964
4965
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
4966
108k
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
4967
2.14k
            {
4968
2.14k
                WORD32 tx_size;
4969
4970
                /* force cu as not coded and update the cost */
4971
2.14k
                ps_final_prms->u1_is_cu_coded = 0;
4972
2.14k
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4973
2.14k
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4974
4975
2.14k
                total_rdopt_cost = i8_cu_not_coded_cost;
4976
4977
                /* reset num TUs to 1 unless cu size id 64 */
4978
2.14k
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
4979
2.14k
                trans_size = (64 == cu_size) ? 32 : cu_size;
4980
2.14k
                GETRANGE(tx_size, trans_size);
4981
4982
                /* reset the bytes consumed */
4983
2.14k
                ps_final_prms->i4_num_bytes_ecd_data = 0;
4984
4985
                /* reset texture related bits and roll back header bits*/
4986
2.14k
                ps_final_prms->u4_cu_cbf_bits = 0;
4987
2.14k
                ps_final_prms->u4_cu_luma_res_bits = 0;
4988
2.14k
                ps_final_prms->u4_cu_chroma_res_bits = 0;
4989
2.14k
                ps_final_prms->u4_cu_hdr_bits =
4990
2.14k
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
4991
4992
                /* update cabac model with qtroot cbf = 0 decision */
4993
2.14k
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
4994
2.14k
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
4995
4996
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
4997
2.14k
                memcpy(
4998
2.14k
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
4999
2.14k
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5000
2.14k
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5001
5002
                /* mark all tus as not coded for final eval */
5003
6.49k
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5004
4.34k
                {
5005
4.34k
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5006
4.34k
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5007
5008
4.34k
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5009
4.34k
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5010
5011
4.34k
                    num_4x4_in_tu = trans_size >> 2;
5012
5013
4.34k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5014
4.34k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5015
4.34k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5016
5017
4.34k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5018
4.34k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5019
4.34k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5020
5021
4.34k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5022
4.34k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5023
5024
4.34k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5025
4.34k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5026
4.34k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5027
5028
                    /* reset cbf for the all 4x4 in TU */
5029
4.34k
                    {
5030
4.34k
                        WORD32 i, j;
5031
4.34k
                        nbr_4x4_t *ps_tmp_4x4;
5032
4.34k
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5033
5034
34.6k
                        for(i = 0; i < num_4x4_in_tu; i++)
5035
30.3k
                        {
5036
258k
                            for(j = 0; j < num_4x4_in_tu; j++)
5037
228k
                            {
5038
228k
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5039
228k
                            }
5040
                            /* row level update*/
5041
30.3k
                            ps_tmp_4x4 += num_4x4_in_cu;
5042
30.3k
                        }
5043
4.34k
                    }
5044
4.34k
                }
5045
2.14k
            }
5046
108k
        }
5047
690k
#endif /* ENABLE_INTER_ZCU_COST */
5048
5049
690k
#endif /* RDOPT_ENABLE */
5050
690k
    }
5051
5052
690k
    return (total_rdopt_cost);
5053
709k
}
5054
5055
#if ENABLE_RDO_BASED_TU_RECURSION
5056
LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
5057
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5058
    enc_loop_cu_prms_t *ps_cu_prms,
5059
    void *pv_src,
5060
    WORD32 cu_size,
5061
    WORD32 cu_pos_x,
5062
    WORD32 cu_pos_y,
5063
    WORD32 curr_buf_idx,
5064
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
5065
    cu_inter_cand_t *ps_inter_cand,
5066
    cu_analyse_t *ps_cu_analyse,
5067
    WORD32 i4_alpha_stim_multiplier)
5068
2.65M
{
5069
2.65M
    tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
5070
2.65M
    buffer_data_for_tu_t s_buffer_data_for_tu;
5071
2.65M
    enc_loop_cu_final_prms_t *ps_final_prms;
5072
2.65M
    nbr_4x4_t *ps_nbr_4x4;
5073
5074
2.65M
    WORD32 num_split_flags = 1;
5075
2.65M
    UWORD8 u1_tu_size;
5076
2.65M
    UWORD8 *pu1_pred;
5077
2.65M
    UWORD8 *pu1_ecd_data;
5078
2.65M
    WORD16 *pi2_deq_data;
5079
2.65M
    UWORD8 *pu1_csbf_buf;
5080
2.65M
    UWORD8 *pu1_tu_sz_sft;
5081
2.65M
    UWORD8 *pu1_tu_posx;
5082
2.65M
    UWORD8 *pu1_tu_posy;
5083
2.65M
    LWORD64 total_rdopt_cost;
5084
2.65M
    WORD32 ctr;
5085
2.65M
    WORD32 chrm_ctr;
5086
2.65M
    WORD32 pred_stride;
5087
2.65M
    WORD32 recon_stride;
5088
2.65M
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
5089
2.65M
    WORD32 csbf_strd;
5090
2.65M
    WORD32 ecd_data_bytes_cons;
5091
2.65M
    WORD32 num_4x4_in_cu;
5092
2.65M
    WORD32 num_4x4_in_tu;
5093
2.65M
    WORD32 recon_func_mode;
5094
2.65M
    WORD32 cu_bits;
5095
2.65M
    UWORD8 u1_compute_spatial_ssd;
5096
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5097
2.65M
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
5098
5099
2.65M
    WORD32 i4_min_trans_size = 256;
5100
2.65M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
5101
2.65M
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
5102
    /* model for no residue syntax qt root cbf flag */
5103
2.65M
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
5104
2.65M
    UWORD8 u1_skip_tu_sz_sft = 0;
5105
2.65M
    UWORD8 u1_skip_tu_posx = 0;
5106
2.65M
    UWORD8 u1_skip_tu_posy = 0;
5107
2.65M
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
5108
5109
2.65M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5110
2.65M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5111
2.65M
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
5112
2.65M
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
5113
2.65M
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
5114
2.65M
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
5115
2.65M
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5116
2.65M
    recon_stride = cu_size;
5117
2.65M
    pu1_pred = ps_inter_cand->pu1_pred_data;
5118
2.65M
    chrm_ctr = 0;
5119
2.65M
    ecd_data_bytes_cons = 0;
5120
2.65M
    total_rdopt_cost = 0;
5121
2.65M
    num_4x4_in_cu = cu_size >> 2;
5122
2.65M
    recon_func_mode = PRED_MODE_INTER;
5123
2.65M
    cu_bits = 0;
5124
5125
    /* get the 4x4 level postion of current cu */
5126
2.65M
    cu_pos_x = cu_pos_x << 1;
5127
2.65M
    cu_pos_y = cu_pos_y << 1;
5128
5129
2.65M
    ps_final_prms->u1_is_cu_coded = 0;
5130
2.65M
    ps_final_prms->u4_cu_sad = 0;
5131
5132
    /* populate the coeffs scan idx */
5133
2.65M
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
5134
5135
2.65M
#if ENABLE_INTER_ZCU_COST
5136
    /* reset cu not coded cost */
5137
2.65M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5138
5139
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5140
2.65M
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
5141
2.65M
#endif
5142
5143
2.65M
    if(ps_cu_analyse->u1_cu_size == 64)
5144
34.4k
    {
5145
34.4k
        num_split_flags = 4;
5146
34.4k
        u1_tu_size = 32;
5147
34.4k
    }
5148
2.62M
    else
5149
2.62M
    {
5150
2.62M
        num_split_flags = 1;
5151
2.62M
        u1_tu_size = ps_cu_analyse->u1_cu_size;
5152
2.62M
    }
5153
5154
2.65M
    if(1 == ps_final_prms->u1_skip_flag)
5155
787k
    {
5156
787k
        if(64 == cu_size)
5157
9.15k
        {
5158
            /* TU = CU/2 is set but no trnaform is evaluated  */
5159
9.15k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5160
9.15k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5161
9.15k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5162
9.15k
        }
5163
777k
        else
5164
777k
        {
5165
            /* TU = CU is set but no trnaform is evaluated  */
5166
777k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5167
777k
            pu1_tu_posx = &u1_skip_tu_posx;
5168
777k
            pu1_tu_posy = &u1_skip_tu_posy;
5169
777k
        }
5170
5171
787k
        recon_func_mode = PRED_MODE_SKIP;
5172
787k
    }
5173
    /* check for PU part mode being AMP or No AMP */
5174
1.86M
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
5175
1.36M
    {
5176
1.36M
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
5177
1.27M
        {
5178
            /* TU= CU is evaluated 2Nx2N inter case */
5179
1.27M
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5180
1.27M
            pu1_tu_posx = &u1_skip_tu_posx;
5181
1.27M
            pu1_tu_posy = &u1_skip_tu_posy;
5182
1.27M
        }
5183
84.0k
        else
5184
84.0k
        {
5185
            /* currently TU= CU/2 is evaluated for all inter case */
5186
84.0k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5187
84.0k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5188
84.0k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5189
84.0k
        }
5190
1.36M
    }
5191
505k
    else
5192
505k
    {
5193
        /* for AMP cases one level of TU recurssion is done */
5194
        /* based on oreintation of the partitions           */
5195
505k
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5196
505k
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5197
505k
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5198
505k
    }
5199
5200
2.65M
    i4_min_trans_size = 4;
5201
5202
2.65M
    if(ps_ctxt->i1_cu_qp_delta_enable)
5203
1.33M
    {
5204
1.33M
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
5205
1.33M
    }
5206
5207
2.65M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
5208
0
    {
5209
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
5210
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
5211
0
             100.0f);
5212
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
5213
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
5214
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
5215
0
    }
5216
5217
2.65M
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
5218
1.69M
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
5219
1.69M
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5220
5221
2.65M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
5222
0
    {
5223
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
5224
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5225
0
    }
5226
5227
2.65M
    if(!u1_compute_spatial_ssd)
5228
955k
    {
5229
955k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5230
955k
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5231
955k
    }
5232
1.69M
    else
5233
1.69M
    {
5234
1.69M
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
5235
5236
1.69M
        if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5237
0
        {
5238
0
            ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
5239
0
        }
5240
1.69M
    }
5241
5242
    /* RDOPT copy States :  TU init (best until prev TU) to current */
5243
2.65M
    memcpy(
5244
2.65M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5245
2.65M
             .s_cabac_ctxt.au1_ctxt_models[0],
5246
2.65M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
5247
2.65M
        IHEVC_CAB_COEFFX_PREFIX);
5248
5249
2.65M
    ihevce_tu_tree_init(
5250
2.65M
        as_tu_nodes,
5251
2.65M
        cu_size,
5252
2.65M
        (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
5253
2.65M
        ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
5254
2.65M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5255
2.65M
        ps_ctxt->u1_chroma_array_type == 2);
5256
5257
2.65M
    if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
5258
0
    {
5259
0
        ihevce_tuSplitArray_to_tuTree_mapper(
5260
0
            as_tu_nodes,
5261
0
            ps_inter_cand->ai4_tu_split_flag,
5262
0
            cu_size,
5263
0
            cu_size,
5264
0
            MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
5265
0
            MIN(MAX_TU_SIZE, cu_size),
5266
0
            ps_inter_cand->b1_skip_flag);
5267
0
    }
5268
5269
2.65M
    ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
5270
5271
2.65M
#if ENABLE_INTER_ZCU_COST
5272
2.65M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5273
2.65M
#endif
5274
5275
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
5276
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
5277
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
5278
2.65M
        ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
5279
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
5280
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
5281
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
5282
2.65M
        ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5283
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
5284
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
5285
2.65M
        ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
5286
2.65M
        curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
5287
2.65M
                                                              (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
5288
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
5289
2.65M
        ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
5290
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
5291
2.65M
        ps_chrm_cu_buf_prms->i4_chrm_src_stride;
5292
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
5293
2.65M
        ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
5294
2.65M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
5295
2.65M
        ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
5296
2.65M
    s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
5297
2.65M
    s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
5298
2.65M
    s_buffer_data_for_tu.pi2_deq_data_chroma =
5299
2.65M
        pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
5300
2.65M
    s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
5301
2.65M
    s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
5302
2.65M
    s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
5303
2.65M
    s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
5304
5305
2.65M
    if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5306
0
    {
5307
0
        UWORD8 i;
5308
5309
0
        UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
5310
5311
0
        for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
5312
0
        {
5313
0
            pu_t *ps_pu;
5314
5315
0
            WORD32 inter_pu_wd;
5316
0
            WORD32 inter_pu_ht;
5317
5318
0
            ps_pu = ps_inter_cand->as_inter_pu + i;
5319
5320
0
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
5321
0
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
5322
0
            inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
5323
0
            ihevce_chroma_inter_pred_pu(
5324
0
                &ps_ctxt->s_mc_ctxt,
5325
0
                ps_pu,
5326
0
                pu1_pred,
5327
0
                s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5328
0
            if(!!ps_inter_cand->b3_part_size)
5329
0
            {
5330
                /* 2Nx__ partion case */
5331
0
                if(inter_pu_wd == cu_size)
5332
0
                {
5333
0
                    pu1_pred +=
5334
0
                        (inter_pu_ht *
5335
0
                         s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5336
0
                }
5337
5338
                /* __x2N partion case */
5339
0
                if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
5340
0
                {
5341
0
                    pu1_pred += inter_pu_wd;
5342
0
                }
5343
0
            }
5344
0
        }
5345
0
    }
5346
5347
#if !ENABLE_TOP_DOWN_TU_RECURSION
5348
    total_rdopt_cost = ihevce_tu_tree_selector(
5349
        ps_ctxt,
5350
        as_tu_nodes,
5351
        &s_buffer_data_for_tu,
5352
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5353
             .s_cabac_ctxt.au1_ctxt_models[0],
5354
        recon_func_mode,
5355
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5356
        i4_alpha_stim_multiplier,
5357
        u1_is_cu_noisy,
5358
#endif
5359
        0,
5360
        ps_ctxt->u1_max_inter_tr_depth,
5361
        ps_inter_cand->b3_part_size,
5362
        u1_compute_spatial_ssd);
5363
#else
5364
2.65M
    total_rdopt_cost = ihevce_topDown_tu_tree_selector(
5365
2.65M
        ps_ctxt,
5366
2.65M
        as_tu_nodes,
5367
2.65M
        &s_buffer_data_for_tu,
5368
2.65M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5369
2.65M
             .s_cabac_ctxt.au1_ctxt_models[0],
5370
2.65M
        recon_func_mode,
5371
2.65M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5372
2.65M
        i4_alpha_stim_multiplier,
5373
2.65M
        u1_is_cu_noisy,
5374
2.65M
#endif
5375
2.65M
        0,
5376
2.65M
        ps_ctxt->u1_max_inter_tr_depth,
5377
2.65M
        ps_inter_cand->b3_part_size,
5378
2.65M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5379
2.65M
        u1_compute_spatial_ssd);
5380
2.65M
#endif
5381
5382
2.65M
    ps_final_prms->u2_num_tus_in_cu = 0;
5383
2.65M
    ps_final_prms->u4_cu_luma_res_bits = 0;
5384
2.65M
    ps_final_prms->u4_cu_sad = 0;
5385
2.65M
    total_rdopt_cost = 0;
5386
2.65M
    ecd_data_bytes_cons = 0;
5387
2.65M
    cu_bits = 0;
5388
2.65M
#if ENABLE_INTER_ZCU_COST
5389
2.65M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5390
2.65M
#endif
5391
2.65M
    ps_final_prms->u1_is_cu_coded = 0;
5392
2.65M
    ps_final_prms->u1_cu_size = cu_size;
5393
5394
2.65M
    ihevce_tu_selector_debriefer(
5395
2.65M
        as_tu_nodes,
5396
2.65M
        ps_final_prms,
5397
2.65M
        &total_rdopt_cost,
5398
2.65M
#if ENABLE_INTER_ZCU_COST
5399
2.65M
        &ps_ctxt->i8_cu_not_coded_cost,
5400
2.65M
#endif
5401
2.65M
        &ecd_data_bytes_cons,
5402
2.65M
        &cu_bits,
5403
2.65M
        &ps_final_prms->u2_num_tus_in_cu,
5404
2.65M
        ps_ctxt->i4_cu_qp,
5405
2.65M
        cu_pos_x * 4,
5406
2.65M
        cu_pos_y * 4,
5407
2.65M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5408
2.65M
        (ps_ctxt->u1_chroma_array_type == 2),
5409
2.65M
        POS_TL);
5410
5411
2.65M
    if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5412
2.65M
    {
5413
2.65M
        ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
5414
2.65M
    }
5415
5416
    /* Modify the cost function for this CU. */
5417
    /* loop in for 8x8 blocks */
5418
2.65M
    if(ps_ctxt->u1_enable_psyRDOPT)
5419
0
    {
5420
0
        UWORD8 *pu1_recon_cu;
5421
0
        WORD32 recon_stride;
5422
0
        WORD32 curr_pos_x;
5423
0
        WORD32 curr_pos_y;
5424
0
        WORD32 start_index;
5425
0
        WORD32 num_horz_cu_in_ctb;
5426
0
        WORD32 had_block_size;
5427
5428
        /* tODO: sreenivasa ctb size has to be used appropriately */
5429
0
        had_block_size = 8;
5430
0
        num_horz_cu_in_ctb = 64 / had_block_size;
5431
5432
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
5433
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
5434
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5435
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
5436
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
5437
        //+ \curr_pos_x + curr_pos_y * recon_stride;
5438
5439
        /* start index to index the source satd of curr cu int he current ctb*/
5440
0
        start_index =
5441
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
5442
5443
0
        {
5444
0
            total_rdopt_cost += ihevce_psy_rd_cost(
5445
0
                ps_ctxt->ai4_source_satd_8x8,
5446
0
                pu1_recon_cu,
5447
0
                recon_stride,
5448
0
                1,  //howz stride
5449
0
                cu_size,
5450
0
                0,  // pic type
5451
0
                0,  //layer id
5452
0
                ps_ctxt->i4_satd_lamda,  // lambda
5453
0
                start_index,
5454
0
                ps_ctxt->u1_is_input_data_hbd,
5455
0
                ps_ctxt->u4_psy_strength,
5456
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
5457
0
        }
5458
0
    }
5459
5460
2.65M
    ps_final_prms->u1_chroma_intra_pred_mode = 4;
5461
5462
    /* update the bytes consumed */
5463
2.65M
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
5464
5465
    /* store the current cu size to final prms */
5466
2.65M
    ps_final_prms->u1_cu_size = cu_size;
5467
    /* ------------- Chroma processing -------------- */
5468
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
5469
2.65M
    if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
5470
2.65M
       !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5471
2.65M
    {
5472
2.65M
        LWORD64 chrm_rdopt_cost;
5473
2.65M
        WORD32 chrm_rdopt_tu_bits;
5474
5475
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
5476
2.65M
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
5477
5478
2.65M
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
5479
2.65M
            ps_ctxt,
5480
2.65M
            curr_buf_idx,
5481
2.65M
            0, /* TU mode : Don't care in Inter patrh */
5482
2.65M
            ps_chrm_cu_buf_prms->pu1_curr_src,
5483
2.65M
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
5484
2.65M
            ps_chrm_cu_buf_prms->pu1_cu_left,
5485
2.65M
            ps_chrm_cu_buf_prms->pu1_cu_top,
5486
2.65M
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
5487
2.65M
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
5488
2.65M
            (cu_pos_x >> 1),
5489
2.65M
            (cu_pos_y >> 1),
5490
2.65M
            &chrm_rdopt_tu_bits,
5491
2.65M
            i4_alpha_stim_multiplier,
5492
2.65M
            u1_is_cu_noisy);
5493
5494
2.65M
#if WEIGH_CHROMA_COST
5495
2.65M
        chrm_rdopt_cost = (LWORD64)(
5496
2.65M
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
5497
2.65M
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
5498
2.65M
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
5499
2.65M
#endif
5500
5501
2.65M
#if CHROMA_RDOPT_ENABLE
5502
2.65M
        total_rdopt_cost += chrm_rdopt_cost;
5503
2.65M
#endif
5504
2.65M
        cu_bits += chrm_rdopt_tu_bits;
5505
5506
        /* during chroma evaluation if skip decision was over written     */
5507
        /* then the current skip candidate is set to a non skip candidate */
5508
2.65M
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
5509
5510
        /* cu bits for chroma residual if chroma rdopt is on       */
5511
        /* if zero_cbf eval is disabled then cu bits will be zero  */
5512
2.65M
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
5513
5514
2.65M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
5515
2.65M
        {
5516
            /* Early exit : If the current running cost exceeds
5517
            the prev. best mode cost, break */
5518
2.65M
            if(total_rdopt_cost > prev_best_rdopt_cost)
5519
334k
            {
5520
334k
                return (total_rdopt_cost);
5521
334k
            }
5522
2.65M
        }
5523
2.65M
    }
5524
0
    else
5525
0
    {}
5526
5527
2.32M
#if SHRINK_INTER_TUTREE
5528
    /* ------------- Quadtree TU split  optimization ------------  */
5529
2.32M
    if(ps_final_prms->u1_is_cu_coded)
5530
299k
    {
5531
299k
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
5532
299k
            &ps_final_prms->as_tu_enc_loop[0],
5533
299k
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
5534
299k
            &ps_final_prms->s_recon_datastore,
5535
299k
            ps_final_prms->u2_num_tus_in_cu,
5536
299k
            (ps_ctxt->u1_chroma_array_type == 2));
5537
299k
    }
5538
2.32M
#endif
5539
5540
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
5541
2.32M
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
5542
2.32M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5543
2.32M
                .s_cabac_ctxt.au1_ctxt_models[0] +
5544
2.32M
            IHEVC_CAB_COEFFX_PREFIX,
5545
2.32M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
5546
2.32M
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
5547
5548
    /* -------- Bit estimate for RD opt -------------- */
5549
2.32M
    {
5550
2.32M
        nbr_avail_flags_t s_nbr;
5551
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
5552
2.32M
        WORD32 cbf_bits, header_bits;
5553
5554
        /* get the neighbour availability flags for current cu  */
5555
2.32M
        ihevce_get_only_nbr_flag(
5556
2.32M
            &s_nbr,
5557
2.32M
            ps_ctxt->pu1_ctb_nbr_map,
5558
2.32M
            ps_ctxt->i4_nbr_map_strd,
5559
2.32M
            cu_pos_x,
5560
2.32M
            cu_pos_y,
5561
2.32M
            (cu_size >> 2),
5562
2.32M
            (cu_size >> 2));
5563
5564
        /* call the entropy rdo encode to get the bit estimate for current cu */
5565
2.32M
        header_bits = ihevce_entropy_rdo_encode_cu(
5566
2.32M
            &ps_ctxt->s_rdopt_entropy_ctxt,
5567
2.32M
            ps_final_prms,
5568
2.32M
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
5569
2.32M
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
5570
2.32M
            cu_size,
5571
2.32M
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
5572
2.32M
                                           : s_nbr.u1_top_avail,
5573
2.32M
            s_nbr.u1_left_avail,
5574
2.32M
            &ps_final_prms->pu1_cu_coeffs[0],
5575
2.32M
            &cbf_bits);
5576
5577
2.32M
        cu_bits += header_bits;
5578
5579
        /* cbf bits are excluded from header bits, instead considered as texture bits */
5580
        /* incase if zero cbf eval is disabled then texture bits gets added here */
5581
2.32M
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
5582
2.32M
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
5583
5584
2.32M
#if RDOPT_ENABLE
5585
        /* add the cost of coding the header bits */
5586
2.32M
        total_rdopt_cost +=
5587
2.32M
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
5588
5589
2.32M
#if ENABLE_INTER_ZCU_COST
5590
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
5591
2.32M
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
5592
299k
        {
5593
299k
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
5594
5595
299k
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
5596
171k
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
5597
5598
299k
            cab_ctxt_t *ps_cab_ctxt =
5599
299k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
5600
5601
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
5602
299k
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
5603
5604
            /* account for coding qt_root_cbf = 0 */
5605
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
5606
299k
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
5607
299k
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
5608
477
                u4_cu_hdr_bits_q12 = 0;
5609
298k
            else
5610
298k
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
5611
5612
            /* add the cost of coding the header bits */
5613
299k
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
5614
299k
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
5615
299k
                ps_ctxt->i8_cl_ssd_lambda_qf,
5616
299k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
5617
5618
299k
            if(ps_ctxt->u1_enable_psyRDOPT)
5619
0
            {
5620
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
5621
0
            }
5622
5623
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
5624
299k
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
5625
3.75k
            {
5626
3.75k
                WORD32 tx_size;
5627
5628
                /* force cu as not coded and update the cost */
5629
3.75k
                ps_final_prms->u1_is_cu_coded = 0;
5630
3.75k
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5631
3.75k
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5632
5633
3.75k
                total_rdopt_cost = i8_cu_not_coded_cost;
5634
5635
                /* reset num TUs to 1 unless cu size id 64 */
5636
3.75k
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
5637
3.75k
                trans_size = (64 == cu_size) ? 32 : cu_size;
5638
3.75k
                GETRANGE(tx_size, trans_size);
5639
5640
                /* reset the bytes consumed */
5641
3.75k
                ps_final_prms->i4_num_bytes_ecd_data = 0;
5642
5643
                /* reset texture related bits and roll back header bits*/
5644
3.75k
                ps_final_prms->u4_cu_cbf_bits = 0;
5645
3.75k
                ps_final_prms->u4_cu_luma_res_bits = 0;
5646
3.75k
                ps_final_prms->u4_cu_chroma_res_bits = 0;
5647
3.75k
                ps_final_prms->u4_cu_hdr_bits =
5648
3.75k
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
5649
5650
                /* update cabac model with qtroot cbf = 0 decision */
5651
3.75k
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
5652
3.75k
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
5653
5654
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
5655
3.75k
                memcpy(
5656
3.75k
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5657
3.75k
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5658
3.75k
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5659
5660
                /* mark all tus as not coded for final eval */
5661
9.35k
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5662
5.60k
                {
5663
5.60k
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5664
5.60k
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5665
5666
5.60k
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5667
5.60k
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5668
5669
5.60k
                    num_4x4_in_tu = trans_size >> 2;
5670
5671
5.60k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5672
5.60k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5673
5.60k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5674
5675
5.60k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5676
5.60k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5677
5.60k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5678
5679
5.60k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5680
5.60k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5681
5682
5.60k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5683
5.60k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5684
5.60k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5685
5686
                    /* reset cbf for the all 4x4 in TU */
5687
5.60k
                    {
5688
5.60k
                        WORD32 i, j;
5689
5.60k
                        nbr_4x4_t *ps_tmp_4x4;
5690
5.60k
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5691
5692
42.1k
                        for(i = 0; i < num_4x4_in_tu; i++)
5693
36.5k
                        {
5694
302k
                            for(j = 0; j < num_4x4_in_tu; j++)
5695
265k
                            {
5696
265k
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5697
265k
                            }
5698
                            /* row level update*/
5699
36.5k
                            ps_tmp_4x4 += num_4x4_in_cu;
5700
36.5k
                        }
5701
5.60k
                    }
5702
5.60k
                }
5703
3.75k
            }
5704
299k
        }
5705
2.32M
#endif /* ENABLE_INTER_ZCU_COST */
5706
5707
2.32M
#endif /* RDOPT_ENABLE */
5708
2.32M
    }
5709
5710
2.32M
    return (total_rdopt_cost);
5711
2.65M
}
5712
#endif
5713
5714
/*!
5715
******************************************************************************
5716
* \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
5717
*
5718
* \brief
5719
*    Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
5720
*
5721
* \param[in] ps_ctxt       enc_loop module ctxt pointer
5722
* \param[in] ps_inter_cand pointer to inter candidate structure
5723
* \param[in] cu_size         Current CU size
5724
* \param[in] cu_pos_x        cu position x w.r.t to ctb
5725
* \param[in] cu_pos_y        cu position y w.r.t to ctb
5726
* \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
5727
* \param[in] ps_top_nbr_4x4  top neighbour 4x4 structure pointer
5728
* \param[in] ps_topleft_nbr_4x4  top left neighbour 4x4 structure pointer
5729
* \param[in] nbr_4x4_left_strd  left neighbour 4x4 buffer stride
5730
* \param[in] curr_buf_idx Current Buffer index
5731
*
5732
* \return
5733
*    Rdopt cost
5734
*
5735
* \author
5736
*  Ittiam
5737
*
5738
*****************************************************************************
5739
*/
5740
LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
5741
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5742
    cu_inter_cand_t *ps_inter_cand,
5743
    WORD32 cu_size,
5744
    WORD32 cu_pos_x,
5745
    WORD32 cu_pos_y,
5746
    nbr_4x4_t *ps_left_nbr_4x4,
5747
    nbr_4x4_t *ps_top_nbr_4x4,
5748
    nbr_4x4_t *ps_topleft_nbr_4x4,
5749
    WORD32 nbr_4x4_left_strd,
5750
    WORD32 curr_buf_idx)
5751
3.44M
{
5752
    /* local variables */
5753
3.44M
    enc_loop_cu_final_prms_t *ps_final_prms;
5754
3.44M
    nbr_avail_flags_t s_nbr;
5755
3.44M
    nbr_4x4_t *ps_nbr_4x4;
5756
5757
3.44M
    UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
5758
3.44M
    UWORD8 *pu1_pred;
5759
3.44M
    WORD32 rdopt_cost;
5760
3.44M
    WORD32 ctr;
5761
3.44M
    WORD32 num_cu_part;
5762
3.44M
    WORD32 inter_pu_wd;
5763
3.44M
    WORD32 inter_pu_ht;
5764
3.44M
    WORD32 pred_stride;
5765
5766
    /* get the pointers based on curbuf idx */
5767
3.44M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5768
3.44M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5769
3.44M
    pu1_pred = ps_inter_cand->pu1_pred_data;
5770
5771
3.44M
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5772
5773
    /* store the partition mode in final prms */
5774
3.44M
    ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
5775
5776
    /* since encoder does not support NXN part type */
5777
    /* num parts can be either 1 or 2 only          */
5778
3.44M
    ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
5779
5780
3.44M
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
5781
5782
    /* get the 4x4 level position of current cu */
5783
3.44M
    cu_pos_x = cu_pos_x << 1;
5784
3.44M
    cu_pos_y = cu_pos_y << 1;
5785
5786
    /* populate cu level params */
5787
3.44M
    ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
5788
3.44M
    ps_final_prms->u2_num_pus_in_cu = num_cu_part;
5789
5790
    /* run a loop over all the partitons in cu */
5791
7.53M
    for(ctr = 0; ctr < num_cu_part; ctr++)
5792
4.08M
    {
5793
4.08M
        pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
5794
4.08M
        pu_t *ps_pu;
5795
4.08M
        WORD32 skip_or_merge_flag;
5796
4.08M
        UWORD8 u1_use_mvp_from_top_row;
5797
5798
4.08M
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
5799
5800
        /* IF AMP then each partitions can have diff wd ht */
5801
4.08M
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
5802
4.08M
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
5803
5804
        /* populate reference pic buf id for bs compute */
5805
5806
        /* L0 */
5807
4.08M
        if(-1 != ps_pu->mv.i1_l0_ref_idx)
5808
3.85M
        {
5809
3.85M
            ps_pu->mv.i1_l0_ref_pic_buf_id =
5810
3.85M
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
5811
3.85M
        }
5812
5813
        /* L1 */
5814
4.08M
        if(-1 != ps_pu->mv.i1_l1_ref_idx)
5815
1.40M
        {
5816
1.40M
            ps_pu->mv.i1_l1_ref_pic_buf_id =
5817
1.40M
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
5818
1.40M
        }
5819
5820
        /* SKIP or merge check for every part */
5821
4.08M
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
5822
5823
        /* ----------- MV Prediction ----------------- */
5824
4.08M
        if(0 == skip_or_merge_flag)
5825
987k
        {
5826
            /* get the neighbour availability flags */
5827
987k
            ihevce_get_only_nbr_flag(
5828
987k
                &s_nbr,
5829
987k
                ps_ctxt->pu1_ctb_nbr_map,
5830
987k
                ps_ctxt->i4_nbr_map_strd,
5831
987k
                cu_pos_x,
5832
987k
                cu_pos_y,
5833
987k
                inter_pu_wd >> 2,
5834
987k
                inter_pu_ht >> 2);
5835
5836
987k
            if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
5837
0
            {
5838
0
                u1_use_mvp_from_top_row = 0;
5839
0
            }
5840
987k
            else
5841
987k
            {
5842
987k
                u1_use_mvp_from_top_row = 1;
5843
987k
            }
5844
5845
987k
            if(!u1_use_mvp_from_top_row)
5846
0
            {
5847
0
                if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
5848
0
                {
5849
0
                    if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
5850
0
                    {
5851
0
                        WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
5852
5853
                        /* Ensure Top Right Sync */
5854
0
                        if(!ps_ctxt->u1_use_top_at_ctb_boundary)
5855
0
                        {
5856
0
                            curr_cu_pos_in_row =
5857
0
                                ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
5858
5859
0
                            if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
5860
0
                            {
5861
                                /* No wait for 1st row */
5862
0
                                cu_top_right_offset = -(MAX_CTB_SIZE);
5863
0
                                {
5864
0
                                    ihevce_tile_params_t *ps_col_tile_params =
5865
0
                                        ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
5866
0
                                         ps_ctxt->i4_tile_col_idx);
5867
5868
                                    /* No wait for 1st row */
5869
0
                                    cu_top_right_offset =
5870
0
                                        -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
5871
0
                                }
5872
0
                                cu_top_right_dep_pos = 0;
5873
0
                            }
5874
0
                            else
5875
0
                            {
5876
0
                                cu_top_right_offset = (cu_size) + 4;
5877
0
                                cu_top_right_dep_pos =
5878
0
                                    (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
5879
0
                            }
5880
5881
0
                            ihevce_dmgr_chk_row_row_sync(
5882
0
                                ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
5883
0
                                curr_cu_pos_in_row,
5884
0
                                cu_top_right_offset,
5885
0
                                cu_top_right_dep_pos,
5886
0
                                ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
5887
0
                                ps_ctxt->thrd_id);
5888
0
                        }
5889
5890
0
                        u1_use_mvp_from_top_row = 1;
5891
0
                    }
5892
0
                    else
5893
0
                    {
5894
0
                        s_nbr.u1_top_avail = 0;
5895
0
                        s_nbr.u1_top_lt_avail = 0;
5896
0
                        s_nbr.u1_top_rt_avail = 0;
5897
0
                    }
5898
0
                }
5899
0
                else
5900
0
                {
5901
0
                    u1_use_mvp_from_top_row = 1;
5902
0
                }
5903
0
            }
5904
            /* Call the MV prediction module to get MVP */
5905
987k
            ihevce_mv_pred(
5906
987k
                &ps_ctxt->s_mv_pred_ctxt,
5907
987k
                ps_top_nbr_4x4,
5908
987k
                ps_left_nbr_4x4,
5909
987k
                ps_topleft_nbr_4x4,
5910
987k
                nbr_4x4_left_strd,
5911
987k
                &s_nbr,
5912
987k
                NULL, /* colocated MV */
5913
987k
                ps_pu,
5914
987k
                &as_pred_mv[0],
5915
987k
                au1_is_top_used);
5916
987k
        }
5917
5918
        /* store the nbr 4x4 structure */
5919
4.08M
        ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
5920
4.08M
        ps_nbr_4x4->b1_intra_flag = 0;
5921
4.08M
        ps_nbr_4x4->b1_pred_l0_flag = 0;
5922
4.08M
        ps_nbr_4x4->b1_pred_l1_flag = 0;
5923
5924
        /* DC is default mode for inter cu, required for intra mode signalling */
5925
4.08M
        ps_nbr_4x4->b6_luma_intra_mode = 1;
5926
5927
        /* copy the motion vectors to neighbour structure */
5928
4.08M
        ps_nbr_4x4->mv = ps_pu->mv;
5929
5930
        /* copy the PU to final out pu */
5931
4.08M
        ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
5932
5933
        /* copy the PU to chroma */
5934
4.08M
        ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
5935
5936
        /* store the skip flag to final prms */
5937
4.08M
        ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
5938
5939
        /* MVP index & MVD calc is gated on skip/merge flag */
5940
4.08M
        if(0 == skip_or_merge_flag)
5941
987k
        {
5942
            /* calculate the MVDs and popluate the MVP idx for L0 */
5943
987k
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
5944
896k
            {
5945
896k
                WORD32 idx0_cost, idx1_cost;
5946
5947
                /* calculate the ABS mvd for cand 0 */
5948
896k
                idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
5949
896k
                idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
5950
5951
                /* calculate the ABS mvd for cand 1 */
5952
896k
                if(u1_use_mvp_from_top_row)
5953
896k
                {
5954
896k
                    idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
5955
896k
                    idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
5956
896k
                }
5957
0
                else
5958
0
                {
5959
0
                    idx1_cost = INT_MAX;
5960
0
                }
5961
5962
                /* based on the least cost choose the mvp idx */
5963
896k
                if(idx0_cost <= idx1_cost)
5964
610k
                {
5965
610k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5966
610k
                        as_pred_mv[0].s_l0_mv.i2_mvx;
5967
610k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5968
610k
                        as_pred_mv[0].s_l0_mv.i2_mvy;
5969
5970
610k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
5971
610k
                }
5972
286k
                else
5973
286k
                {
5974
286k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5975
286k
                        as_pred_mv[1].s_l0_mv.i2_mvx;
5976
286k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5977
286k
                        as_pred_mv[1].s_l0_mv.i2_mvy;
5978
5979
286k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
5980
286k
                }
5981
5982
                /* set the pred l0 flag for neighbour storage */
5983
896k
                ps_nbr_4x4->b1_pred_l0_flag = 1;
5984
896k
            }
5985
            /* calculate the MVDs and popluate the MVP idx for L1 */
5986
987k
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
5987
123k
            {
5988
123k
                WORD32 idx0_cost, idx1_cost;
5989
5990
                /* calculate the ABS mvd for cand 0 */
5991
123k
                idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
5992
123k
                idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
5993
5994
                /* calculate the ABS mvd for cand 1 */
5995
123k
                if(u1_use_mvp_from_top_row)
5996
123k
                {
5997
123k
                    idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
5998
123k
                    idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
5999
123k
                }
6000
0
                else
6001
0
                {
6002
0
                    idx1_cost = INT_MAX;
6003
0
                }
6004
6005
                /* based on the least cost choose the mvp idx */
6006
123k
                if(idx0_cost <= idx1_cost)
6007
83.0k
                {
6008
83.0k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6009
83.0k
                        as_pred_mv[0].s_l1_mv.i2_mvx;
6010
83.0k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6011
83.0k
                        as_pred_mv[0].s_l1_mv.i2_mvy;
6012
6013
83.0k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
6014
83.0k
                }
6015
40.3k
                else
6016
40.3k
                {
6017
40.3k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6018
40.3k
                        as_pred_mv[1].s_l1_mv.i2_mvx;
6019
40.3k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6020
40.3k
                        as_pred_mv[1].s_l1_mv.i2_mvy;
6021
6022
40.3k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
6023
40.3k
                }
6024
6025
                /* set the pred l1 flag for neighbour storage */
6026
123k
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6027
123k
            }
6028
6029
            /* set the merge flag to 0 */
6030
987k
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
6031
987k
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
6032
987k
        }
6033
3.09M
        else
6034
3.09M
        {
6035
            /* copy the merge index from candidate */
6036
3.09M
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
6037
6038
3.09M
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
6039
6040
3.09M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
6041
2.95M
            {
6042
                /* set the pred l0 flag for neighbour storage */
6043
2.95M
                ps_nbr_4x4->b1_pred_l0_flag = 1;
6044
2.95M
            }
6045
6046
            /* calculate the MVDs and popluate the MVP idx for L1 */
6047
3.09M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
6048
565k
            {
6049
                /* set the pred l1 flag for neighbour storage */
6050
565k
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6051
565k
            }
6052
3.09M
        }
6053
6054
        /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
6055
4.08M
        rdopt_cost = 0;
6056
6057
        /* copy the MV to colocated Mv structure */
6058
4.08M
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
6059
4.08M
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
6060
4.08M
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
6061
4.08M
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
6062
4.08M
        ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
6063
4.08M
        ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
6064
6065
        /* replicate neighbour 4x4 strcuture for entire partition */
6066
4.08M
        {
6067
4.08M
            WORD32 i, j;
6068
4.08M
            nbr_4x4_t *ps_tmp_4x4;
6069
6070
4.08M
            ps_tmp_4x4 = ps_nbr_4x4;
6071
6072
19.7M
            for(i = 0; i < (inter_pu_ht >> 2); i++)
6073
15.6M
            {
6074
102M
                for(j = 0; j < (inter_pu_wd >> 2); j++)
6075
86.9M
                {
6076
86.9M
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
6077
86.9M
                }
6078
                /* row level update*/
6079
15.6M
                ps_tmp_4x4 += (cu_size >> 2);
6080
15.6M
            }
6081
4.08M
        }
6082
        /* set the neighbour map to 1 */
6083
4.08M
        ihevce_set_inter_nbr_map(
6084
4.08M
            ps_ctxt->pu1_ctb_nbr_map,
6085
4.08M
            ps_ctxt->i4_nbr_map_strd,
6086
4.08M
            cu_pos_x,
6087
4.08M
            cu_pos_y,
6088
4.08M
            (inter_pu_wd >> 2),
6089
4.08M
            (inter_pu_ht >> 2),
6090
4.08M
            1);
6091
        /* ----------- Motion Compensation for Luma ----------- */
6092
#if !ENABLE_MIXED_INTER_MODE_EVAL
6093
        {
6094
            IV_API_CALL_STATUS_T valid_mv_cand;
6095
6096
            /*If the inter candidate is neither merge cand nor skip cand
6097
            then calculate the mc.*/
6098
            if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
6099
            {
6100
                valid_mv_cand =
6101
                    ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
6102
6103
                /* assert if the MC is given a valid mv candidate */
6104
                ASSERT(valid_mv_cand == IV_SUCCESS);
6105
            }
6106
        }
6107
#endif
6108
4.08M
        if((2 == num_cu_part) && (0 == ctr))
6109
643k
        {
6110
            /* 2Nx__ partion case */
6111
643k
            if(inter_pu_wd == cu_size)
6112
525k
            {
6113
525k
                cu_pos_y += (inter_pu_ht >> 2);
6114
525k
                pu1_pred += (inter_pu_ht * pred_stride);
6115
525k
                ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
6116
525k
                ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
6117
525k
                ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
6118
525k
                ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
6119
525k
            }
6120
6121
            /* __x2N partion case */
6122
643k
            if(inter_pu_ht == cu_size)
6123
117k
            {
6124
117k
                cu_pos_x += (inter_pu_wd >> 2);
6125
117k
                pu1_pred += inter_pu_wd;
6126
117k
                ps_nbr_4x4 += (inter_pu_wd >> 2);
6127
117k
                ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
6128
117k
                ps_top_nbr_4x4 += (inter_pu_wd >> 2);
6129
117k
                ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
6130
117k
                nbr_4x4_left_strd = (cu_size >> 2);
6131
117k
            }
6132
643k
        }
6133
4.08M
    }
6134
6135
3.44M
    return (rdopt_cost);
6136
3.44M
}
6137
6138
/*!
6139
******************************************************************************
6140
* \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
6141
*
6142
* \brief
6143
*    Coding unit processing function for chroma special modes (Non-Luma modes)
6144
*
6145
* \param[in] ps_ctxt       enc_loop module ctxt pointer
6146
* \param[in] ps_chrm_cu_buf_prms    ctxt having chroma related prms
6147
* \param[in] ps_cu_analyse      pointer to cu analyse
6148
* \param[in] rd_opt_curr_idx    index in the array of RDopt params
6149
* \param[in] tu_mode            TU_EQ_CU or other case
6150
*
6151
* \return
6152
*    Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
6153
*
6154
* \author
6155
*  Ittiam
6156
*
6157
*****************************************************************************
6158
*/
6159
UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
6160
    cu_analyse_t *ps_cu_analyse,
6161
    ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
6162
    pf_intra_pred *ppf_chroma_ip,
6163
    pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
6164
    UWORD8 *pu1_src,
6165
    WORD32 i4_src_stride,
6166
    UWORD8 *pu1_pred,
6167
    WORD32 i4_pred_stride,
6168
    UWORD8 *pu1_ctb_nbr_map,
6169
    WORD32 i4_nbr_map_strd,
6170
    UWORD8 *pu1_ref_sub_out,
6171
    WORD32 i4_alpha_stim_multiplier,
6172
    UWORD8 u1_is_cu_noisy,
6173
    UWORD8 u1_trans_size,
6174
    UWORD8 u1_trans_idx,
6175
    UWORD8 u1_num_tus_in_cu,
6176
    UWORD8 u1_num_4x4_luma_blks_in_tu,
6177
    UWORD8 u1_enable_psyRDOPT,
6178
    UWORD8 u1_is_422)
6179
1.95M
{
6180
1.95M
    UWORD8 u1_chrm_mode;
6181
1.95M
    UWORD8 ctr;
6182
1.95M
    WORD32 i4_subtu_idx;
6183
6184
1.95M
    WORD32 i = 0;
6185
1.95M
    UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
6186
1.95M
    WORD32 i4_satd_had[4] = { 0 };
6187
1.95M
    WORD32 i4_best_satd_had = INT_MAX;
6188
1.95M
    UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6189
1.95M
    UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6190
1.95M
    WORD32 i4_num_sub_tus = u1_is_422 + 1;
6191
1.95M
    UWORD8 u1_best_chrm_mode = 0;
6192
6193
    /* Get the best satd among all possible modes */
6194
9.76M
    for(i = 0; i < 4; i++)
6195
7.81M
    {
6196
7.81M
        WORD32 left_strd = i4_src_stride;
6197
6198
7.81M
        u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
6199
7.81M
                                        : u1_chrm_modes[i];
6200
6201
        /* loop based on num tus in a cu */
6202
20.2M
        for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
6203
12.4M
        {
6204
12.4M
            WORD32 luma_nbr_flags;
6205
12.4M
            WORD32 chrm_pred_func_idx;
6206
6207
12.4M
            WORD32 i4_trans_size_m2 = u1_trans_size << 1;
6208
12.4M
            UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
6209
12.4M
                                 (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
6210
12.4M
            UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
6211
12.4M
                                  (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
6212
12.4M
            WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
6213
12.4M
            WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
6214
6215
12.4M
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6216
12.4M
                pu1_ctb_nbr_map,
6217
12.4M
                i4_nbr_map_strd,
6218
12.4M
                i4_curr_tu_pos_x,
6219
12.4M
                i4_curr_tu_pos_y,
6220
12.4M
                u1_num_4x4_luma_blks_in_tu,
6221
12.4M
                u1_num_4x4_luma_blks_in_tu);
6222
6223
24.8M
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6224
12.4M
            {
6225
12.4M
                WORD32 nbr_flags;
6226
6227
12.4M
                UWORD8 *pu1_cur_src =
6228
12.4M
                    pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
6229
12.4M
                UWORD8 *pu1_cur_pred =
6230
12.4M
                    pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
6231
12.4M
                UWORD8 *pu1_left = pu1_cur_src - 2;
6232
12.4M
                UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
6233
12.4M
                UWORD8 *pu1_top_left = pu1_top - 2;
6234
6235
12.4M
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6236
12.4M
                    luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
6237
6238
                /* call the chroma reference array substitution */
6239
12.4M
                pf_ref_substitution(
6240
12.4M
                    pu1_top_left,
6241
12.4M
                    pu1_top,
6242
12.4M
                    pu1_left,
6243
12.4M
                    left_strd,
6244
12.4M
                    u1_trans_size,
6245
12.4M
                    nbr_flags,
6246
12.4M
                    pu1_ref_sub_out,
6247
12.4M
                    1);
6248
6249
                /* use the look up to get the function idx */
6250
12.4M
                chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
6251
6252
                /* call the intra prediction function */
6253
12.4M
                ppf_chroma_ip[chrm_pred_func_idx](
6254
12.4M
                    pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
6255
6256
12.4M
                if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
6257
12.4M
                {
6258
                    /* compute Hadamard-transform satd : Cb */
6259
12.4M
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6260
12.4M
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6261
6262
                    /* compute Hadamard-transform satd : Cr */
6263
12.4M
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6264
12.4M
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6265
12.4M
                }
6266
0
                else
6267
0
                {
6268
0
                    WORD32 i4_satd;
6269
6270
                    /* compute Hadamard-transform satd : Cb */
6271
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6272
0
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6273
6274
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6275
0
                        pu1_cur_src,
6276
0
                        i4_src_stride,
6277
0
                        pu1_cur_pred,
6278
0
                        i4_pred_stride,
6279
0
                        i4_satd,
6280
0
                        i4_alpha_stim_multiplier,
6281
0
                        u1_trans_size,
6282
0
                        0,
6283
0
                        u1_enable_psyRDOPT,
6284
0
                        U_PLANE);
6285
6286
0
                    i4_satd_had[i] += i4_satd;
6287
6288
                    /* compute Hadamard-transform satd : Cr */
6289
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6290
0
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6291
6292
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6293
0
                        pu1_cur_src,
6294
0
                        i4_src_stride,
6295
0
                        pu1_cur_pred,
6296
0
                        i4_pred_stride,
6297
0
                        i4_satd,
6298
0
                        i4_alpha_stim_multiplier,
6299
0
                        u1_trans_size,
6300
0
                        0,
6301
0
                        u1_enable_psyRDOPT,
6302
0
                        V_PLANE);
6303
6304
0
                    i4_satd_had[i] += i4_satd;
6305
0
                }
6306
12.4M
            }
6307
6308
            /* set the neighbour map to 1 */
6309
12.4M
            ihevce_set_nbr_map(
6310
12.4M
                pu1_ctb_nbr_map,
6311
12.4M
                i4_nbr_map_strd,
6312
12.4M
                i4_curr_tu_pos_x,
6313
12.4M
                i4_curr_tu_pos_y,
6314
12.4M
                u1_num_4x4_luma_blks_in_tu,
6315
12.4M
                1);
6316
12.4M
        }
6317
6318
        /* set the neighbour map to 0 */
6319
7.81M
        ihevce_set_nbr_map(
6320
7.81M
            pu1_ctb_nbr_map,
6321
7.81M
            i4_nbr_map_strd,
6322
7.81M
            (ps_cu_analyse->b3_cu_pos_x << 1),
6323
7.81M
            (ps_cu_analyse->b3_cu_pos_y << 1),
6324
7.81M
            (ps_cu_analyse->u1_cu_size >> 2),
6325
7.81M
            0);
6326
6327
        /* Get the least SATD and corresponding mode */
6328
7.81M
        if(i4_best_satd_had > i4_satd_had[i])
6329
2.30M
        {
6330
2.30M
            i4_best_satd_had = i4_satd_had[i];
6331
2.30M
            u1_best_chrm_mode = u1_chrm_mode;
6332
2.30M
        }
6333
7.81M
    }
6334
6335
1.95M
    return u1_best_chrm_mode;
6336
1.95M
}
6337
6338
void ihevce_intra_chroma_pred_mode_selector(
6339
    ihevce_enc_loop_ctxt_t *ps_ctxt,
6340
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
6341
    cu_analyse_t *ps_cu_analyse,
6342
    WORD32 rd_opt_curr_idx,
6343
    WORD32 tu_mode,
6344
    WORD32 i4_alpha_stim_multiplier,
6345
    UWORD8 u1_is_cu_noisy)
6346
1.95M
{
6347
1.95M
    chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
6348
6349
1.95M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
6350
6351
1.95M
    UWORD8 *pu1_pred;
6352
1.95M
    WORD32 trans_size;
6353
1.95M
    WORD32 num_tus_in_cu;
6354
1.95M
    WORD32 pred_strd;
6355
1.95M
    WORD32 ctr;
6356
1.95M
    WORD32 i4_subtu_idx;
6357
1.95M
    WORD32 i4_num_sub_tus;
6358
1.95M
    WORD32 trans_idx;
6359
1.95M
    WORD32 scan_idx;
6360
1.95M
    WORD32 num_4x4_luma_in_tu;
6361
1.95M
    WORD32 cu_pos_x;
6362
1.95M
    WORD32 cu_pos_y;
6363
6364
1.95M
    recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
6365
1.95M
                                                  &ps_ctxt->as_cu_prms[1].s_recon_datastore };
6366
6367
1.95M
    LWORD64 chrm_cod_cost = 0;
6368
1.95M
    WORD32 chrm_tu_bits = 0;
6369
1.95M
    WORD32 best_chrm_mode = DM_CHROMA_IDX;
6370
1.95M
    UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
6371
1.95M
    WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
6372
1.95M
    UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
6373
1.95M
    UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
6374
1.95M
    UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
6375
1.95M
    WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
6376
1.95M
    WORD32 cu_size = ps_cu_analyse->u1_cu_size;
6377
1.95M
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
6378
1.95M
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
6379
1.95M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
6380
6381
1.95M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
6382
1.95M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
6383
1.95M
    i4_num_sub_tus = (u1_is_422 == 1) + 1;
6384
6385
#if DISABLE_RDOQ_INTRA
6386
    i4_perform_rdoq = 0;
6387
#endif
6388
6389
1.95M
    if(TU_EQ_CU == tu_mode)
6390
1.56M
    {
6391
1.56M
        num_tus_in_cu = 1;
6392
1.56M
        trans_size = cu_size >> 1;
6393
1.56M
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6394
1.56M
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6395
1.56M
    }
6396
385k
    else
6397
385k
    {
6398
385k
        num_tus_in_cu = 4;
6399
385k
        trans_size = cu_size >> 2;
6400
385k
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6401
6402
        /* For 8x8 CU only one TU */
6403
385k
        if(MIN_TU_SIZE > trans_size)
6404
0
        {
6405
0
            trans_size = MIN_TU_SIZE;
6406
0
            num_tus_in_cu = 1;
6407
            /* chroma nbr avail. is derived based on luma.
6408
            for 4x4 chrm use 8x8 luma's size */
6409
0
            num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
6410
0
        }
6411
6412
385k
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6413
385k
    }
6414
6415
    /* Can't be TU_EQ_SUBCU case */
6416
1.95M
    ASSERT(TU_EQ_SUBCU != tu_mode);
6417
6418
    /* translate the transform size to index */
6419
1.95M
    trans_idx = trans_size >> 2;
6420
6421
1.95M
    pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
6422
6423
1.95M
    pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
6424
6425
    /* for 16x16 cases */
6426
1.95M
    if(16 == trans_size)
6427
319k
    {
6428
319k
        trans_idx = 3;
6429
319k
    }
6430
6431
1.95M
    best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
6432
1.95M
        ps_cu_analyse,
6433
1.95M
        ihevc_intra_pred_chroma_ref_substitution_fptr,
6434
1.95M
        ps_ctxt->apf_chrm_ip,
6435
1.95M
        ps_ctxt->apf_chrm_resd_trns_had,
6436
1.95M
        pu1_chrm_src,
6437
1.95M
        chrm_src_stride,
6438
1.95M
        pu1_pred,
6439
1.95M
        pred_strd,
6440
1.95M
        ps_ctxt->pu1_ctb_nbr_map,
6441
1.95M
        ps_ctxt->i4_nbr_map_strd,
6442
1.95M
        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6443
1.95M
        i4_alpha_stim_multiplier,
6444
1.95M
        u1_is_cu_noisy,
6445
1.95M
        trans_size,
6446
1.95M
        trans_idx,
6447
1.95M
        num_tus_in_cu,
6448
1.95M
        num_4x4_luma_in_tu,
6449
1.95M
        ps_ctxt->u1_enable_psyRDOPT,
6450
1.95M
        u1_is_422);
6451
6452
    /* Store the best chroma mode */
6453
1.95M
    ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
6454
6455
    /* evaluate RDOPT cost for the Best mode */
6456
1.95M
    {
6457
1.95M
        WORD32 i4_subtu_pos_x;
6458
1.95M
        WORD32 i4_subtu_pos_y;
6459
1.95M
        UWORD8 u1_compute_spatial_ssd;
6460
6461
1.95M
        WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
6462
1.95M
        WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
6463
        /* State for prefix bin of chroma intra pred mode before CU encode */
6464
1.95M
        UWORD8 u1_chroma_intra_mode_prefix_state =
6465
1.95M
            ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
6466
1.95M
        WORD32 luma_trans_size = trans_size << 1;
6467
1.95M
        WORD32 calc_recon = 0;
6468
1.95M
        UWORD8 *pu1_left = pu1_cu_left;
6469
1.95M
        UWORD8 *pu1_top = pu1_cu_top;
6470
1.95M
        UWORD8 *pu1_top_left = pu1_cu_top_left;
6471
1.95M
        WORD32 left_strd = cu_left_stride;
6472
6473
1.95M
        if(ps_ctxt->i1_cu_qp_delta_enable)
6474
906k
        {
6475
906k
            ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1);
6476
906k
        }
6477
6478
1.95M
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
6479
1.23M
                                 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
6480
1.23M
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6481
6482
1.95M
        if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
6483
0
        {
6484
0
            u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
6485
0
                                     CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6486
0
        }
6487
6488
        /* get the 4x4 level postion of current cu */
6489
1.95M
        cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6490
1.95M
        cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6491
6492
1.95M
        calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
6493
6494
1.95M
        if(calc_recon || u1_compute_spatial_ssd)
6495
1.35M
        {
6496
1.35M
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6497
1.35M
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6498
1.35M
        }
6499
595k
        else
6500
595k
        {
6501
595k
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6502
595k
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6503
595k
        }
6504
6505
        /* loop based on num tus in a cu */
6506
5.06M
        for(ctr = 0; ctr < num_tus_in_cu; ctr++)
6507
3.10M
        {
6508
3.10M
            WORD16 *pi2_cur_deq_data_cb;
6509
3.10M
            WORD16 *pi2_cur_deq_data_cr;
6510
6511
3.10M
            WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
6512
3.10M
            WORD32 luma_nbr_flags = 0;
6513
6514
3.10M
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6515
3.10M
                ps_ctxt->pu1_ctb_nbr_map,
6516
3.10M
                ps_ctxt->i4_nbr_map_strd,
6517
3.10M
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6518
3.10M
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6519
3.10M
                (luma_trans_size >> 2),
6520
3.10M
                (luma_trans_size >> 2));
6521
6522
6.21M
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6523
3.10M
            {
6524
3.10M
                WORD32 cbf, num_bytes;
6525
3.10M
                LWORD64 trans_ssd_u, trans_ssd_v;
6526
3.10M
                UWORD8 u1_is_recon_available;
6527
6528
3.10M
                WORD32 trans_size_m2 = trans_size << 1;
6529
3.10M
                UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
6530
3.10M
                                      (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
6531
3.10M
                                      (i4_subtu_idx * trans_size * chrm_src_stride);
6532
3.10M
                UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
6533
3.10M
                                       (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
6534
3.10M
                                       (i4_subtu_idx * trans_size * pred_strd);
6535
3.10M
                WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6536
3.10M
                UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
6537
3.10M
                                             ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
6538
3.10M
                                        ((ctr & 1) * trans_size_m2) +
6539
3.10M
                                        (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
6540
3.10M
                                        (i4_subtu_idx * trans_size * i4_recon_stride);
6541
6542
                /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
6543
                chroma coeff/iq for high quality intra SATD special modes. Will
6544
                be over written by coeff of luma mode in chroma_rdopt call */
6545
3.10M
                UWORD8 *pu1_ecd_data_cb =
6546
3.10M
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
6547
3.10M
                UWORD8 *pu1_ecd_data_cr =
6548
3.10M
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
6549
6550
3.10M
                WORD32 chrm_pred_func_idx = 0;
6551
3.10M
                LWORD64 curr_cb_cod_cost = 0;
6552
3.10M
                LWORD64 curr_cr_cod_cost = 0;
6553
3.10M
                WORD32 nbr_flags = 0;
6554
6555
3.10M
                i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
6556
3.10M
                i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
6557
3.10M
                                 ((i4_subtu_idx * trans_size) >> 2);
6558
3.10M
                pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
6559
3.10M
                                      ((ctr & 1) * trans_size) +
6560
3.10M
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6561
3.10M
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6562
3.10M
                pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
6563
3.10M
                                      ((ctr & 1) * trans_size) +
6564
3.10M
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6565
3.10M
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6566
6567
                /* left cu boundary */
6568
3.10M
                if(0 == i4_subtu_pos_x)
6569
2.33M
                {
6570
2.33M
                    left_strd = cu_left_stride;
6571
2.33M
                    pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
6572
2.33M
                }
6573
770k
                else
6574
770k
                {
6575
770k
                    pu1_left = pu1_cur_recon - 2;
6576
770k
                    left_strd = i4_recon_stride;
6577
770k
                }
6578
6579
                /* top cu boundary */
6580
3.10M
                if(0 == i4_subtu_pos_y)
6581
2.33M
                {
6582
2.33M
                    pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
6583
2.33M
                }
6584
770k
                else
6585
770k
                {
6586
770k
                    pu1_top = pu1_cur_recon - i4_recon_stride;
6587
770k
                }
6588
6589
                /* by default top left is set to cu top left */
6590
3.10M
                pu1_top_left = pu1_cu_top_left;
6591
6592
                /* top left based on position */
6593
3.10M
                if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
6594
385k
                {
6595
385k
                    pu1_top_left = pu1_left - left_strd;
6596
385k
                }
6597
2.72M
                else if(0 != i4_subtu_pos_x)
6598
770k
                {
6599
770k
                    pu1_top_left = pu1_top - 2;
6600
770k
                }
6601
6602
                /* populate the coeffs scan idx */
6603
3.10M
                scan_idx = SCAN_DIAG_UPRIGHT;
6604
6605
                /* RDOPT copy States :  TU init (best until prev TU) to current */
6606
3.10M
                COPY_CABAC_STATES(
6607
3.10M
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6608
3.10M
                         .s_cabac_ctxt.au1_ctxt_models[0],
6609
3.10M
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6610
3.10M
                    IHEVC_CAB_CTXT_END);
6611
6612
                /* for 4x4 transforms based on intra pred mode scan is choosen*/
6613
3.10M
                if(4 == trans_size)
6614
1.46M
                {
6615
                    /* for modes from 22 upto 30 horizontal scan is used */
6616
1.46M
                    if((best_chrm_mode > 21) && (best_chrm_mode < 31))
6617
44.1k
                    {
6618
44.1k
                        scan_idx = SCAN_HORZ;
6619
44.1k
                    }
6620
                    /* for modes from 6 upto 14 horizontal scan is used */
6621
1.42M
                    else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
6622
106k
                    {
6623
106k
                        scan_idx = SCAN_VERT;
6624
106k
                    }
6625
1.46M
                }
6626
6627
3.10M
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6628
3.10M
                    luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
6629
6630
                /* call the chroma reference array substitution */
6631
3.10M
                ihevc_intra_pred_chroma_ref_substitution_fptr(
6632
3.10M
                    pu1_top_left,
6633
3.10M
                    pu1_top,
6634
3.10M
                    pu1_left,
6635
3.10M
                    left_strd,
6636
3.10M
                    trans_size,
6637
3.10M
                    nbr_flags,
6638
3.10M
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6639
3.10M
                    1);
6640
6641
                /* use the look up to get the function idx */
6642
3.10M
                chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
6643
6644
                /* call the intra prediction function */
6645
3.10M
                ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
6646
3.10M
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6647
3.10M
                    1,
6648
3.10M
                    pu1_cur_pred,
6649
3.10M
                    pred_strd,
6650
3.10M
                    trans_size,
6651
3.10M
                    best_chrm_mode);
6652
6653
                /* UPLANE RDOPT Loop */
6654
3.10M
                {
6655
3.10M
                    WORD32 tu_bits;
6656
6657
3.10M
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6658
3.10M
                        ps_ctxt,
6659
3.10M
                        pu1_cur_pred,
6660
3.10M
                        pred_strd,
6661
3.10M
                        pu1_cur_src,
6662
3.10M
                        chrm_src_stride,
6663
3.10M
                        pi2_cur_deq_data_cb,
6664
3.10M
                        deq_data_strd,
6665
3.10M
                        pu1_cur_recon,
6666
3.10M
                        i4_recon_stride,
6667
3.10M
                        pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
6668
3.10M
                        ps_ctxt->au1_cu_csbf,
6669
3.10M
                        ps_ctxt->i4_cu_csbf_strd,
6670
3.10M
                        trans_size,
6671
3.10M
                        scan_idx,
6672
3.10M
                        1,
6673
3.10M
                        &num_bytes,
6674
3.10M
                        &tu_bits,
6675
3.10M
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6676
3.10M
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6677
3.10M
                        &u1_is_recon_available,
6678
3.10M
                        i4_perform_sbh,
6679
3.10M
                        i4_perform_rdoq,
6680
3.10M
                        &trans_ssd_u,
6681
3.10M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6682
3.10M
                        i4_alpha_stim_multiplier,
6683
3.10M
                        u1_is_cu_noisy,
6684
3.10M
#endif
6685
3.10M
                        0,
6686
3.10M
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6687
3.10M
                        U_PLANE);
6688
6689
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6690
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6691
                    {
6692
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6693
                        trans_ssd_u = ihevce_inject_stim_into_distortion(
6694
                            pu1_cur_src,
6695
                            chrm_src_stride,
6696
                            pu1_cur_pred,
6697
                            pred_strd,
6698
                            trans_ssd_u,
6699
                            i4_alpha_stim_multiplier,
6700
                            trans_size,
6701
                            0,
6702
                            ps_ctxt->u1_enable_psyRDOPT,
6703
                            U_PLANE);
6704
#else
6705
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6706
                        {
6707
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6708
                                pu1_cur_src,
6709
                                chrm_src_stride,
6710
                                pu1_cur_recon,
6711
                                i4_recon_stride,
6712
                                trans_ssd_u,
6713
                                i4_alpha_stim_multiplier,
6714
                                trans_size,
6715
                                0,
6716
                                ps_ctxt->u1_enable_psyRDOPT,
6717
                                U_PLANE);
6718
                        }
6719
                        else
6720
                        {
6721
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6722
                                pu1_cur_src,
6723
                                chrm_src_stride,
6724
                                pu1_cur_pred,
6725
                                pred_strd,
6726
                                trans_ssd_u,
6727
                                i4_alpha_stim_multiplier,
6728
                                trans_size,
6729
                                0,
6730
                                ps_ctxt->u1_enable_psyRDOPT,
6731
                                U_PLANE);
6732
                        }
6733
#endif
6734
                    }
6735
#endif
6736
6737
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6738
3.10M
                    if(0 != cbf)
6739
342k
                    {
6740
342k
                        memcpy(
6741
342k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6742
342k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6743
342k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6744
342k
                            IHEVC_CAB_CTXT_END);
6745
342k
                    }
6746
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6747
2.76M
                    else
6748
2.76M
                    {
6749
2.76M
                        memcpy(
6750
2.76M
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6751
2.76M
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6752
2.76M
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6753
2.76M
                            IHEVC_CAB_CTXT_END);
6754
2.76M
                    }
6755
6756
3.10M
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6757
482k
                    {
6758
482k
                        ihevce_chroma_it_recon_fxn(
6759
482k
                            ps_ctxt,
6760
482k
                            pi2_cur_deq_data_cb,
6761
482k
                            deq_data_strd,
6762
482k
                            pu1_cur_pred,
6763
482k
                            pred_strd,
6764
482k
                            pu1_cur_recon,
6765
482k
                            i4_recon_stride,
6766
482k
                            (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
6767
482k
                            trans_size,
6768
482k
                            cbf,
6769
482k
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6770
482k
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6771
482k
                            U_PLANE);
6772
482k
                    }
6773
6774
3.10M
                    ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
6775
3.10M
                    curr_cb_cod_cost =
6776
3.10M
                        trans_ssd_u +
6777
3.10M
                        COMPUTE_RATE_COST_CLIP30(
6778
3.10M
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6779
3.10M
                    chrm_tu_bits += tu_bits;
6780
3.10M
                    ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
6781
3.10M
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
6782
3.10M
                        num_bytes;
6783
3.10M
                }
6784
6785
                /* VPLANE RDOPT Loop */
6786
3.10M
                {
6787
3.10M
                    WORD32 tu_bits;
6788
6789
3.10M
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6790
3.10M
                        ps_ctxt,
6791
3.10M
                        pu1_cur_pred,
6792
3.10M
                        pred_strd,
6793
3.10M
                        pu1_cur_src,
6794
3.10M
                        chrm_src_stride,
6795
3.10M
                        pi2_cur_deq_data_cr,
6796
3.10M
                        deq_data_strd,
6797
3.10M
                        pu1_cur_recon,
6798
3.10M
                        i4_recon_stride,
6799
3.10M
                        pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
6800
3.10M
                        ps_ctxt->au1_cu_csbf,
6801
3.10M
                        ps_ctxt->i4_cu_csbf_strd,
6802
3.10M
                        trans_size,
6803
3.10M
                        scan_idx,
6804
3.10M
                        1,
6805
3.10M
                        &num_bytes,
6806
3.10M
                        &tu_bits,
6807
3.10M
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6808
3.10M
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6809
3.10M
                        &u1_is_recon_available,
6810
3.10M
                        i4_perform_sbh,
6811
3.10M
                        i4_perform_rdoq,
6812
3.10M
                        &trans_ssd_v,
6813
3.10M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6814
3.10M
                        i4_alpha_stim_multiplier,
6815
3.10M
                        u1_is_cu_noisy,
6816
3.10M
#endif
6817
3.10M
                        0,
6818
3.10M
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6819
3.10M
                        V_PLANE);
6820
6821
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6822
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6823
                    {
6824
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6825
                        trans_ssd_v = ihevce_inject_stim_into_distortion(
6826
                            pu1_cur_src,
6827
                            chrm_src_stride,
6828
                            pu1_cur_pred,
6829
                            pred_strd,
6830
                            trans_ssd_v,
6831
                            i4_alpha_stim_multiplier,
6832
                            trans_size,
6833
                            0,
6834
                            ps_ctxt->u1_enable_psyRDOPT,
6835
                            V_PLANE);
6836
#else
6837
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6838
                        {
6839
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6840
                                pu1_cur_src,
6841
                                chrm_src_stride,
6842
                                pu1_cur_recon,
6843
                                i4_recon_stride,
6844
                                trans_ssd_v,
6845
                                i4_alpha_stim_multiplier,
6846
                                trans_size,
6847
                                0,
6848
                                ps_ctxt->u1_enable_psyRDOPT,
6849
                                V_PLANE);
6850
                        }
6851
                        else
6852
                        {
6853
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6854
                                pu1_cur_src,
6855
                                chrm_src_stride,
6856
                                pu1_cur_pred,
6857
                                pred_strd,
6858
                                trans_ssd_v,
6859
                                i4_alpha_stim_multiplier,
6860
                                trans_size,
6861
                                0,
6862
                                ps_ctxt->u1_enable_psyRDOPT,
6863
                                V_PLANE);
6864
                        }
6865
#endif
6866
                    }
6867
#endif
6868
6869
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6870
3.10M
                    if(0 != cbf)
6871
330k
                    {
6872
330k
                        COPY_CABAC_STATES(
6873
330k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6874
330k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6875
330k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6876
330k
                            IHEVC_CAB_CTXT_END);
6877
330k
                    }
6878
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6879
2.77M
                    else
6880
2.77M
                    {
6881
2.77M
                        COPY_CABAC_STATES(
6882
2.77M
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6883
2.77M
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6884
2.77M
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6885
2.77M
                            IHEVC_CAB_CTXT_END);
6886
2.77M
                    }
6887
6888
3.10M
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6889
482k
                    {
6890
482k
                        ihevce_chroma_it_recon_fxn(
6891
482k
                            ps_ctxt,
6892
482k
                            pi2_cur_deq_data_cr,
6893
482k
                            deq_data_strd,
6894
482k
                            pu1_cur_pred,
6895
482k
                            pred_strd,
6896
482k
                            pu1_cur_recon,
6897
482k
                            i4_recon_stride,
6898
482k
                            (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
6899
482k
                            trans_size,
6900
482k
                            cbf,
6901
482k
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6902
482k
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6903
482k
                            V_PLANE);
6904
482k
                    }
6905
6906
3.10M
                    ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
6907
3.10M
                    curr_cr_cod_cost =
6908
3.10M
                        trans_ssd_v +
6909
3.10M
                        COMPUTE_RATE_COST_CLIP30(
6910
3.10M
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6911
3.10M
                    chrm_tu_bits += tu_bits;
6912
3.10M
                    ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
6913
3.10M
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
6914
3.10M
                        num_bytes;
6915
3.10M
                }
6916
6917
3.10M
                chrm_cod_cost += curr_cb_cod_cost;
6918
3.10M
                chrm_cod_cost += curr_cr_cod_cost;
6919
3.10M
            }
6920
6921
            /* set the neighbour map to 1 */
6922
3.10M
            ihevce_set_nbr_map(
6923
3.10M
                ps_ctxt->pu1_ctb_nbr_map,
6924
3.10M
                ps_ctxt->i4_nbr_map_strd,
6925
3.10M
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6926
3.10M
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6927
3.10M
                (luma_trans_size >> 2),
6928
3.10M
                1);
6929
3.10M
        }
6930
6931
        /* set the neighbour map to 0 */
6932
1.95M
        ihevce_set_nbr_map(
6933
1.95M
            ps_ctxt->pu1_ctb_nbr_map,
6934
1.95M
            ps_ctxt->i4_nbr_map_strd,
6935
1.95M
            (ps_cu_analyse->b3_cu_pos_x << 1),
6936
1.95M
            (ps_cu_analyse->b3_cu_pos_y << 1),
6937
1.95M
            (ps_cu_analyse->u1_cu_size >> 2),
6938
1.95M
            0);
6939
6940
        /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
6941
        /* This is done by adding the bits for signalling chroma mode (0-3)    */
6942
        /* and subtracting the bits for chroma mode same as luma mode (4)      */
6943
1.95M
#if CHROMA_RDOPT_ENABLE
6944
1.95M
        {
6945
            /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
6946
1.95M
            WORD32 bits_frac_1 =
6947
1.95M
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
6948
6949
1.95M
            WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
6950
6951
            /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
6952
1.95M
            WORD32 bits_for_mode4 =
6953
1.95M
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
6954
6955
            /* accumulate into final rd cost for chroma */
6956
1.95M
            ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
6957
1.95M
                (bits_for_mode_0to3 - bits_for_mode4),
6958
1.95M
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
6959
1.95M
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
6960
6961
1.95M
            chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
6962
1.95M
        }
6963
1.95M
#endif
6964
6965
1.95M
        if(ps_ctxt->u1_enable_psyRDOPT)
6966
0
        {
6967
0
            UWORD8 *pu1_recon_cu;
6968
0
            WORD32 recon_stride;
6969
0
            WORD32 curr_pos_x;
6970
0
            WORD32 curr_pos_y;
6971
0
            WORD32 start_index;
6972
0
            WORD32 num_horz_cu_in_ctb;
6973
0
            WORD32 had_block_size;
6974
6975
            /* tODO: sreenivasa ctb size has to be used appropriately */
6976
0
            had_block_size = 8;
6977
0
            num_horz_cu_in_ctb = 2 * 64 / had_block_size;
6978
0
            curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6979
0
            curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6980
0
            recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6981
0
            pu1_recon_cu =
6982
0
                aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)];  //
6983
6984
            /* start index to index the source satd of curr cu int he current ctb*/
6985
0
            start_index = 2 * (curr_pos_x / had_block_size) +
6986
0
                          (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
6987
6988
0
            {
6989
0
                chrm_cod_cost += ihevce_psy_rd_cost_croma(
6990
0
                    ps_ctxt->ai4_source_chroma_satd,
6991
0
                    pu1_recon_cu,
6992
0
                    recon_stride,
6993
0
                    1,  //
6994
0
                    cu_size,
6995
0
                    0,  // pic type
6996
0
                    0,  //layer id
6997
0
                    ps_ctxt->i4_satd_lamda,  // lambda
6998
0
                    start_index,
6999
0
                    ps_ctxt->u1_is_input_data_hbd,  // 8 bit
7000
0
                    ps_ctxt->u1_chroma_array_type,
7001
0
                    &ps_ctxt->s_cmn_opt_func
7002
7003
0
                );  // chroma subsampling 420
7004
0
            }
7005
0
        }
7006
7007
1.95M
        ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
7008
1.95M
        ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
7009
7010
1.95M
        memcpy(
7011
1.95M
            &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
7012
1.95M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7013
1.95M
            IHEVC_CAB_CTXT_END);
7014
1.95M
    }
7015
1.95M
}
7016
7017
/*!
7018
******************************************************************************
7019
* \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
7020
*
7021
* \brief
7022
*    Coding unit processing function for chroma
7023
*
7024
* \param[in] ps_ctxt    enc_loop module ctxt pointer
7025
* \param[in] rd_opt_curr_idx index in the array of RDopt params
7026
* \param[in] func_proc_mode TU_EQ_CU or other case
7027
* \param[in] pu1_chrm_src  pointer to source data buffer
7028
* \param[in] chrm_src_stride   source buffer stride
7029
* \param[in] pu1_cu_left pointer to left recon data buffer
7030
* \param[in] pu1_cu_top  pointer to top recon data buffer
7031
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
7032
* \param[in] left_stride left recon buffer stride
7033
* \param[out] cu_pos_x position x of current CU in CTB
7034
* \param[out] cu_pos_y position y of current CU in CTB
7035
* \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
7036
*
7037
* \return
7038
*    Chroma coding cost (cb adn Cr included)
7039
*
7040
* \author
7041
*  Ittiam
7042
*
7043
*****************************************************************************
7044
*/
7045
LWORD64 ihevce_chroma_cu_prcs_rdopt(
7046
    ihevce_enc_loop_ctxt_t *ps_ctxt,
7047
    WORD32 rd_opt_curr_idx,
7048
    WORD32 func_proc_mode,
7049
    UWORD8 *pu1_chrm_src,
7050
    WORD32 chrm_src_stride,
7051
    UWORD8 *pu1_cu_left,
7052
    UWORD8 *pu1_cu_top,
7053
    UWORD8 *pu1_cu_top_left,
7054
    WORD32 cu_left_stride,
7055
    WORD32 cu_pos_x,
7056
    WORD32 cu_pos_y,
7057
    WORD32 *pi4_chrm_tu_bits,
7058
    WORD32 i4_alpha_stim_multiplier,
7059
    UWORD8 u1_is_cu_noisy)
7060
9.34M
{
7061
9.34M
    tu_enc_loop_out_t *ps_tu;
7062
9.34M
    tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
7063
7064
9.34M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
7065
7066
9.34M
    UWORD8 *pu1_pred;
7067
9.34M
    UWORD8 *pu1_recon;
7068
9.34M
    WORD32 i4_recon_stride;
7069
9.34M
    WORD32 cu_size, trans_size = 0;
7070
9.34M
    WORD32 pred_strd;
7071
9.34M
    WORD32 ctr, i4_subtu_idx;
7072
9.34M
    WORD32 scan_idx;
7073
9.34M
    WORD32 u1_is_cu_coded_old;
7074
9.34M
    WORD32 init_bytes_offset;
7075
7076
9.34M
    enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
7077
9.34M
    recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
7078
7079
9.34M
    WORD32 total_bytes_offset = 0;
7080
9.34M
    LWORD64 chrm_cod_cost = 0;
7081
9.34M
    WORD32 chrm_tu_bits = 0;
7082
9.34M
    WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
7083
9.34M
    LWORD64 i8_ssd_cb = 0;
7084
9.34M
    WORD32 i4_bits_cb = 0;
7085
9.34M
    LWORD64 i8_ssd_cr = 0;
7086
9.34M
    WORD32 i4_bits_cr = 0;
7087
9.34M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
7088
9.34M
    UWORD8 u1_num_tus =
7089
        /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
7090
9.34M
        (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
7091
9.34M
            ? 1
7092
9.34M
            : ps_best_cu_prms->u2_num_tus_in_cu;
7093
9.34M
    UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
7094
9.34M
    UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
7095
5.85M
                                    (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
7096
5.30M
                                    CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7097
    /* Get the RDOPT cost of the best CU mode for early_exit */
7098
9.34M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
7099
    /* Get the current running RDOPT (Luma RDOPT) for early_exit */
7100
9.34M
    LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
7101
9.34M
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
7102
9.34M
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
7103
7104
9.34M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
7105
9.34M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
7106
7107
9.34M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
7108
0
    {
7109
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
7110
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7111
0
    }
7112
7113
    /* Store the init bytes offset from luma */
7114
9.34M
    init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
7115
7116
    /* Unused pred buffer in merge_skip_pred_data_t structure is used as
7117
    Chroma pred storage buf. for final_recon function.
7118
    The buffer is split into two and used as a ping-pong buffer */
7119
9.34M
    pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
7120
9.34M
               rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
7121
9.34M
                                  (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
7122
7123
9.34M
    pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
7124
7125
9.34M
    pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
7126
9.34M
    i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
7127
9.34M
    cu_size = ps_best_cu_prms->u1_cu_size;
7128
9.34M
    chrm_tu_bits = 0;
7129
7130
    /* get the first TU pointer */
7131
9.34M
    ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7132
    /* get the first TU enc_loop temp prms pointer */
7133
9.34M
    ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7134
7135
9.34M
    if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7136
6.37M
    {
7137
        /* Mode signalled by intra prediction for luma */
7138
6.37M
        luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
7139
7140
#if DISABLE_RDOQ_INTRA
7141
        i4_perform_rdoq = 0;
7142
#endif
7143
6.37M
    }
7144
7145
2.97M
    else
7146
2.97M
    {
7147
2.97M
        UWORD8 *pu1_pred_org = pu1_pred;
7148
7149
        /* ------ Motion Compensation for Chroma -------- */
7150
6.58M
        for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
7151
3.61M
        {
7152
3.61M
            pu_t *ps_pu;
7153
3.61M
            WORD32 inter_pu_wd;
7154
3.61M
            WORD32 inter_pu_ht;
7155
7156
3.61M
            ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
7157
7158
3.61M
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
7159
3.61M
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
7160
3.61M
            inter_pu_ht <<= u1_is_422;
7161
7162
3.61M
            ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
7163
7164
3.61M
            if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
7165
1.27M
            {
7166
                /* 2Nx__ partion case */
7167
1.27M
                if(inter_pu_wd == cu_size)
7168
1.04M
                {
7169
1.04M
                    pu1_pred += (inter_pu_ht * pred_strd);
7170
1.04M
                }
7171
7172
                /* __x2N partion case */
7173
1.27M
                if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
7174
231k
                {
7175
231k
                    pu1_pred += inter_pu_wd;
7176
231k
                }
7177
1.27M
            }
7178
3.61M
        }
7179
7180
        /* restore the pred pointer to start for transform loop */
7181
2.97M
        pu1_pred = pu1_pred_org;
7182
2.97M
    }
7183
7184
    /* Used to store back only the luma based info. if SATD based chorma
7185
    mode also comes */
7186
9.34M
    u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
7187
7188
    /* evaluate chroma candidates (same as luma) and
7189
    if INTRA & HIGH_QUALITY compare with best SATD mode */
7190
9.34M
    {
7191
9.34M
        WORD32 calc_recon = 0, deq_data_strd;
7192
9.34M
        WORD16 *pi2_deq_data;
7193
9.34M
        UWORD8 *pu1_ecd_data;
7194
9.34M
        UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
7195
7196
9.34M
        pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
7197
9.34M
        pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
7198
9.34M
        deq_data_strd = cu_size;
7199
        /* update ecd buffer for storing coeff. */
7200
9.34M
        pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
7201
9.34M
        pu1_ecd_data += init_bytes_offset;
7202
        /* store chroma starting index */
7203
9.34M
        ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
7204
7205
        /* get the first TU pointer */
7206
9.34M
        ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7207
9.34M
        ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7208
7209
        /* Reset total_bytes_offset for each candidate */
7210
9.34M
        chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
7211
9.34M
                                          : luma_pred_mode;
7212
7213
9.34M
        total_bytes_offset = 0;
7214
7215
9.34M
        if(TU_EQ_SUBCU == func_proc_mode)
7216
361k
        {
7217
361k
            func_proc_mode = TU_EQ_CU_DIV2;
7218
361k
        }
7219
7220
        /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
7221
        TU_EQ_CU_DIV2 and  TU_EQ_SUBCU case */
7222
9.34M
        if(8 == cu_size)
7223
3.54M
        {
7224
3.54M
            func_proc_mode = TU_EQ_CU;
7225
3.54M
        }
7226
7227
        /* loop based on num tus in a cu */
7228
9.34M
        if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
7229
5.59M
           (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
7230
5.59M
            (chrm_pred_mode !=
7231
5.59M
             ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
7232
7.68M
        {
7233
            /* loop based on num tus in a cu */
7234
18.4M
            for(ctr = 0; ctr < u1_num_tus; ctr++)
7235
11.7M
            {
7236
11.7M
                WORD32 num_bytes = 0;
7237
11.7M
                LWORD64 curr_cb_cod_cost = 0;
7238
11.7M
                LWORD64 curr_cr_cod_cost = 0;
7239
11.7M
                WORD32 chrm_pred_func_idx = 0;
7240
11.7M
                UWORD8 u1_is_early_exit_condition_satisfied = 0;
7241
7242
                /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7   */
7243
                /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
7244
11.7M
                ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
7245
11.7M
                ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
7246
11.7M
                ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7247
11.7M
                ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7248
11.7M
                ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7249
11.7M
                ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7250
11.7M
                ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
7251
11.7M
                ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
7252
11.7M
                ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
7253
11.7M
                ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
7254
7255
                /* TU level inits */
7256
                /* check if chroma present flag is set */
7257
11.7M
                if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7258
11.0M
                {
7259
                    /* RDOPT copy States :  TU init (best until prev TU) to current */
7260
11.0M
                    COPY_CABAC_STATES(
7261
11.0M
                        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7262
11.0M
                             .s_cabac_ctxt.au1_ctxt_models[0],
7263
11.0M
                        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7264
11.0M
                        IHEVC_CAB_CTXT_END);
7265
7266
                    /* get the current transform size */
7267
11.0M
                    trans_size = ps_tu->s_tu.b3_size;
7268
11.0M
                    trans_size = (1 << (trans_size + 1)); /* in chroma units */
7269
7270
                    /* since 2x2 transform is not allowed for chroma*/
7271
11.0M
                    if(2 == trans_size)
7272
780k
                    {
7273
780k
                        trans_size = 4;
7274
780k
                    }
7275
11.0M
                }
7276
7277
22.5M
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
7278
11.7M
                {
7279
11.7M
                    WORD32 cbf;
7280
11.7M
                    UWORD8 u1_is_recon_available;
7281
7282
11.7M
                    WORD32 nbr_flags = 0;
7283
11.7M
                    WORD32 zero_cols = 0;
7284
11.7M
                    WORD32 zero_rows = 0;
7285
7286
                    /* check if chroma present flag is set */
7287
11.7M
                    if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7288
11.0M
                    {
7289
11.0M
                        UWORD8 *pu1_cur_pred;
7290
11.0M
                        UWORD8 *pu1_cur_recon;
7291
11.0M
                        UWORD8 *pu1_cur_src;
7292
11.0M
                        WORD16 *pi2_cur_deq_data;
7293
11.0M
                        WORD32 curr_pos_x, curr_pos_y;
7294
11.0M
                        LWORD64 trans_ssd_u, trans_ssd_v;
7295
7296
                        /* get the current sub-tu posx and posy w.r.t to cu */
7297
11.0M
                        curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
7298
11.0M
                        curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
7299
11.0M
                                     (i4_subtu_idx * trans_size);
7300
7301
                        /* 420sp case only vertical height will be half */
7302
11.0M
                        if(u1_is_422 == 0)
7303
11.0M
                        {
7304
11.0M
                            curr_pos_y >>= 1;
7305
11.0M
                        }
7306
7307
                        /* increment the pointers to start of current Sub-TU */
7308
11.0M
                        pu1_cur_recon = (pu1_recon + curr_pos_x);
7309
11.0M
                        pu1_cur_recon += (curr_pos_y * i4_recon_stride);
7310
11.0M
                        pu1_cur_src = (pu1_chrm_src + curr_pos_x);
7311
11.0M
                        pu1_cur_src += (curr_pos_y * chrm_src_stride);
7312
11.0M
                        pu1_cur_pred = (pu1_pred + curr_pos_x);
7313
11.0M
                        pu1_cur_pred += (curr_pos_y * pred_strd);
7314
11.0M
                        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
7315
11.0M
                        pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
7316
7317
                        /* populate the coeffs scan idx */
7318
11.0M
                        scan_idx = SCAN_DIAG_UPRIGHT;
7319
7320
                        /* perform intra prediction only for Intra case */
7321
11.0M
                        if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7322
7.26M
                        {
7323
7.26M
                            UWORD8 *pu1_top_left;
7324
7.26M
                            UWORD8 *pu1_top;
7325
7.26M
                            UWORD8 *pu1_left;
7326
7.26M
                            WORD32 left_strd;
7327
7328
7.26M
                            calc_recon = !u1_compute_spatial_ssd &&
7329
3.35M
                                         ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
7330
1.47M
                                         (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
7331
1.47M
                                          ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
7332
1.47M
                                          ((u1_num_tus == 4) && (ctr < 3)));
7333
7334
                            /* left cu boundary */
7335
7.26M
                            if(0 == curr_pos_x)
7336
5.56M
                            {
7337
5.56M
                                pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
7338
5.56M
                                left_strd = cu_left_stride;
7339
5.56M
                            }
7340
1.69M
                            else
7341
1.69M
                            {
7342
1.69M
                                pu1_left = pu1_cur_recon - 2;
7343
1.69M
                                left_strd = i4_recon_stride;
7344
1.69M
                            }
7345
7346
                            /* top cu boundary */
7347
7.26M
                            if(0 == curr_pos_y)
7348
5.57M
                            {
7349
5.57M
                                pu1_top = pu1_cu_top + curr_pos_x;
7350
5.57M
                            }
7351
1.68M
                            else
7352
1.68M
                            {
7353
1.68M
                                pu1_top = pu1_cur_recon - i4_recon_stride;
7354
1.68M
                            }
7355
7356
                            /* by default top left is set to cu top left */
7357
7.26M
                            pu1_top_left = pu1_cu_top_left;
7358
7359
                            /* top left based on position */
7360
7.26M
                            if((0 != curr_pos_y) && (0 == curr_pos_x))
7361
850k
                            {
7362
850k
                                pu1_top_left = pu1_left - cu_left_stride;
7363
850k
                            }
7364
6.40M
                            else if(0 != curr_pos_x)
7365
1.69M
                            {
7366
1.69M
                                pu1_top_left = pu1_top - 2;
7367
1.69M
                            }
7368
7369
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
7370
7.26M
                            if(4 == trans_size)
7371
3.87M
                            {
7372
                                /* for modes from 22 upto 30 horizontal scan is used */
7373
3.87M
                                if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
7374
1.33M
                                {
7375
1.33M
                                    scan_idx = SCAN_HORZ;
7376
1.33M
                                }
7377
                                /* for modes from 6 upto 14 horizontal scan is used */
7378
2.54M
                                else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
7379
685k
                                {
7380
685k
                                    scan_idx = SCAN_VERT;
7381
685k
                                }
7382
3.87M
                            }
7383
7384
7.26M
                            nbr_flags = ihevce_get_intra_chroma_tu_nbr(
7385
7.26M
                                ps_best_cu_prms->au4_nbr_flags[ctr],
7386
7.26M
                                i4_subtu_idx,
7387
7.26M
                                trans_size,
7388
7.26M
                                u1_is_422);
7389
7390
                            /* call the chroma reference array substitution */
7391
7.26M
                            ihevc_intra_pred_chroma_ref_substitution_fptr(
7392
7.26M
                                pu1_top_left,
7393
7.26M
                                pu1_top,
7394
7.26M
                                pu1_left,
7395
7.26M
                                left_strd,
7396
7.26M
                                trans_size,
7397
7.26M
                                nbr_flags,
7398
7.26M
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7399
7.26M
                                1);
7400
7401
                            /* use the look up to get the function idx */
7402
7.26M
                            chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
7403
7404
                            /* call the intra prediction function */
7405
7.26M
                            ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
7406
7.26M
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7407
7.26M
                                1,
7408
7.26M
                                pu1_cur_pred,
7409
7.26M
                                pred_strd,
7410
7.26M
                                trans_size,
7411
7.26M
                                chrm_pred_mode);
7412
7.26M
                        }
7413
7414
11.0M
                        if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
7415
4.60M
                        {
7416
4.60M
                            ps_recon_datastore->au1_is_chromaRecon_available[0] =
7417
4.60M
                                !ps_best_cu_prms->u1_skip_flag;
7418
4.60M
                        }
7419
6.41M
                        else if(!ctr && !i4_subtu_idx)
7420
3.08M
                        {
7421
3.08M
                            ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
7422
3.08M
                        }
7423
                        /************************************************************/
7424
                        /* recon loop is done for all cases including skip cu       */
7425
                        /* This is because skipping chroma reisdual based on luma   */
7426
                        /* skip decision can lead to chroma artifacts               */
7427
                        /************************************************************/
7428
                        /************************************************************/
7429
                        /*In the high quality and medium speed modes, wherein chroma*/
7430
                        /*and luma costs are included in the total cost calculation */
7431
                        /*the cost is just a ssd cost, and not that obtained through*/
7432
                        /*iq_it path                                                */
7433
                        /************************************************************/
7434
11.0M
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7435
10.1M
                        {
7436
10.1M
                            WORD32 tu_bits;
7437
7438
10.1M
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7439
10.1M
                                ps_ctxt,
7440
10.1M
                                pu1_cur_pred,
7441
10.1M
                                pred_strd,
7442
10.1M
                                pu1_cur_src,
7443
10.1M
                                chrm_src_stride,
7444
10.1M
                                pi2_cur_deq_data,
7445
10.1M
                                deq_data_strd,
7446
10.1M
                                pu1_cur_recon,
7447
10.1M
                                i4_recon_stride,
7448
10.1M
                                pu1_ecd_data + total_bytes_offset,
7449
10.1M
                                ps_ctxt->au1_cu_csbf,
7450
10.1M
                                ps_ctxt->i4_cu_csbf_strd,
7451
10.1M
                                trans_size,
7452
10.1M
                                scan_idx,
7453
10.1M
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7454
10.1M
                                &num_bytes,
7455
10.1M
                                &tu_bits,
7456
10.1M
                                &zero_cols,
7457
10.1M
                                &zero_rows,
7458
10.1M
                                &u1_is_recon_available,
7459
10.1M
                                i4_perform_sbh,
7460
10.1M
                                i4_perform_rdoq,
7461
10.1M
                                &trans_ssd_u,
7462
10.1M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7463
10.1M
                                i4_alpha_stim_multiplier,
7464
10.1M
                                u1_is_cu_noisy,
7465
10.1M
#endif
7466
10.1M
                                ps_best_cu_prms->u1_skip_flag,
7467
10.1M
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7468
10.1M
                                U_PLANE);
7469
7470
10.1M
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7471
5.59M
                            {
7472
5.59M
                                ps_recon_datastore
7473
5.59M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7474
5.59M
                                                                        [i4_subtu_idx] = 0;
7475
5.59M
                            }
7476
4.54M
                            else
7477
4.54M
                            {
7478
4.54M
                                ps_recon_datastore
7479
4.54M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7480
4.54M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7481
4.54M
                            }
7482
7483
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7484
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7485
                            {
7486
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7487
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7488
                                    pu1_cur_src,
7489
                                    chrm_src_stride,
7490
                                    pu1_cur_pred,
7491
                                    pred_strd,
7492
                                    trans_ssd_u,
7493
                                    i4_alpha_stim_multiplier,
7494
                                    trans_size,
7495
                                    0,
7496
                                    ps_ctxt->u1_enable_psyRDOPT,
7497
                                    U_PLANE);
7498
#else
7499
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7500
                                {
7501
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7502
                                        pu1_cur_src,
7503
                                        chrm_src_stride,
7504
                                        pu1_cur_recon,
7505
                                        i4_recon_stride,
7506
                                        trans_ssd_u,
7507
                                        i4_alpha_stim_multiplier,
7508
                                        trans_size,
7509
                                        0,
7510
                                        ps_ctxt->u1_enable_psyRDOPT,
7511
                                        U_PLANE);
7512
                                }
7513
                                else
7514
                                {
7515
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7516
                                        pu1_cur_src,
7517
                                        chrm_src_stride,
7518
                                        pu1_cur_pred,
7519
                                        pred_strd,
7520
                                        trans_ssd_u,
7521
                                        i4_alpha_stim_multiplier,
7522
                                        trans_size,
7523
                                        0,
7524
                                        ps_ctxt->u1_enable_psyRDOPT,
7525
                                        U_PLANE);
7526
                                }
7527
#endif
7528
                            }
7529
#endif
7530
7531
10.1M
                            curr_cb_cod_cost =
7532
10.1M
                                trans_ssd_u +
7533
10.1M
                                COMPUTE_RATE_COST_CLIP30(
7534
10.1M
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7535
7536
10.1M
                            chrm_tu_bits += tu_bits;
7537
10.1M
                            i4_bits_cb += tu_bits;
7538
7539
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7540
10.1M
                            if(0 != cbf)
7541
1.65M
                            {
7542
1.65M
                                COPY_CABAC_STATES(
7543
1.65M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7544
1.65M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7545
1.65M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7546
1.65M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7547
1.65M
                                    IHEVC_CAB_CTXT_END);
7548
1.65M
                            }
7549
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7550
8.48M
                            else
7551
8.48M
                            {
7552
8.48M
                                COPY_CABAC_STATES(
7553
8.48M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7554
8.48M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7555
8.48M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7556
8.48M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7557
8.48M
                                    IHEVC_CAB_CTXT_END);
7558
8.48M
                            }
7559
7560
                            /* If Intra and TU=CU/2, need recon for next TUs */
7561
10.1M
                            if(calc_recon)
7562
1.12M
                            {
7563
1.12M
                                ihevce_chroma_it_recon_fxn(
7564
1.12M
                                    ps_ctxt,
7565
1.12M
                                    pi2_cur_deq_data,
7566
1.12M
                                    deq_data_strd,
7567
1.12M
                                    pu1_cur_pred,
7568
1.12M
                                    pred_strd,
7569
1.12M
                                    pu1_cur_recon,
7570
1.12M
                                    i4_recon_stride,
7571
1.12M
                                    (pu1_ecd_data + total_bytes_offset),
7572
1.12M
                                    trans_size,
7573
1.12M
                                    cbf,
7574
1.12M
                                    zero_cols,
7575
1.12M
                                    zero_rows,
7576
1.12M
                                    U_PLANE);
7577
7578
1.12M
                                ps_recon_datastore
7579
1.12M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7580
1.12M
                                                                        [i4_subtu_idx] = 0;
7581
1.12M
                            }
7582
9.01M
                            else
7583
9.01M
                            {
7584
9.01M
                                ps_recon_datastore
7585
9.01M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7586
9.01M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7587
9.01M
                            }
7588
10.1M
                        }
7589
882k
                        else
7590
882k
                        {
7591
                            /* num bytes is set to 0 */
7592
882k
                            num_bytes = 0;
7593
7594
                            /* cbf is returned as 0 */
7595
882k
                            cbf = 0;
7596
7597
882k
                            curr_cb_cod_cost = trans_ssd_u =
7598
7599
882k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7600
882k
                                    pu1_cur_pred,
7601
882k
                                    pu1_cur_src,
7602
882k
                                    pred_strd,
7603
882k
                                    chrm_src_stride,
7604
882k
                                    trans_size,
7605
882k
                                    trans_size,
7606
882k
                                    U_PLANE);
7607
7608
882k
                            if(u1_compute_spatial_ssd)
7609
525k
                            {
7610
                                /* buffer copy fromp pred to recon */
7611
7612
525k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7613
525k
                                    pu1_cur_pred,
7614
525k
                                    pred_strd,
7615
525k
                                    pu1_cur_recon,
7616
525k
                                    i4_recon_stride,
7617
525k
                                    trans_size,
7618
525k
                                    trans_size,
7619
525k
                                    U_PLANE);
7620
7621
525k
                                ps_recon_datastore
7622
525k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7623
525k
                                                                        [i4_subtu_idx] = 0;
7624
525k
                            }
7625
7626
882k
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7627
0
                            {
7628
0
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7629
0
                                    pu1_cur_src,
7630
0
                                    chrm_src_stride,
7631
0
                                    pu1_cur_pred,
7632
0
                                    pred_strd,
7633
0
                                    trans_ssd_u,
7634
0
                                    i4_alpha_stim_multiplier,
7635
0
                                    trans_size,
7636
0
                                    0,
7637
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7638
0
                                    U_PLANE);
7639
0
                            }
7640
7641
882k
#if ENABLE_INTER_ZCU_COST
7642
#if !WEIGH_CHROMA_COST
7643
                            /* cbf = 0, accumulate cu not coded cost */
7644
                            ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
7645
#else
7646
                            /* cbf = 0, accumulate cu not coded cost */
7647
7648
882k
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7649
882k
                                (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7650
882k
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7651
882k
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7652
882k
#endif
7653
882k
#endif
7654
882k
                        }
7655
7656
#if !WEIGH_CHROMA_COST
7657
                        curr_rdopt_cost += curr_cb_cod_cost;
7658
#else
7659
11.0M
                        curr_rdopt_cost +=
7660
11.0M
                            ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7661
11.0M
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7662
11.0M
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7663
11.0M
#endif
7664
11.0M
                        chrm_cod_cost += curr_cb_cod_cost;
7665
11.0M
                        i8_ssd_cb += trans_ssd_u;
7666
7667
11.0M
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7668
11.0M
                        {
7669
                            /* Early exit : If the current running cost exceeds
7670
                            the prev. best mode cost, break */
7671
11.0M
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7672
522k
                            {
7673
522k
                                u1_is_early_exit_condition_satisfied = 1;
7674
522k
                                break;
7675
522k
                            }
7676
11.0M
                        }
7677
7678
                        /* inter cu is coded if any of the tu is coded in it */
7679
10.4M
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7680
7681
                        /* update CB related params */
7682
10.4M
                        ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
7683
10.4M
                            total_bytes_offset + init_bytes_offset;
7684
7685
10.4M
                        if(0 == i4_subtu_idx)
7686
10.4M
                        {
7687
10.4M
                            ps_tu->s_tu.b1_cb_cbf = cbf;
7688
10.4M
                        }
7689
0
                        else
7690
0
                        {
7691
0
                            ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
7692
0
                        }
7693
7694
10.4M
                        total_bytes_offset += num_bytes;
7695
7696
10.4M
                        ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
7697
10.4M
                        ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
7698
10.4M
                        ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
7699
7700
                        /* recon loop is done for non skip cases */
7701
10.4M
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7702
9.76M
                        {
7703
9.76M
                            WORD32 tu_bits;
7704
7705
9.76M
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7706
9.76M
                                ps_ctxt,
7707
9.76M
                                pu1_cur_pred,
7708
9.76M
                                pred_strd,
7709
9.76M
                                pu1_cur_src,
7710
9.76M
                                chrm_src_stride,
7711
9.76M
                                pi2_cur_deq_data + trans_size,
7712
9.76M
                                deq_data_strd,
7713
9.76M
                                pu1_cur_recon,
7714
9.76M
                                i4_recon_stride,
7715
9.76M
                                pu1_ecd_data + total_bytes_offset,
7716
9.76M
                                ps_ctxt->au1_cu_csbf,
7717
9.76M
                                ps_ctxt->i4_cu_csbf_strd,
7718
9.76M
                                trans_size,
7719
9.76M
                                scan_idx,
7720
9.76M
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7721
9.76M
                                &num_bytes,
7722
9.76M
                                &tu_bits,
7723
9.76M
                                &zero_cols,
7724
9.76M
                                &zero_rows,
7725
9.76M
                                &u1_is_recon_available,
7726
9.76M
                                i4_perform_sbh,
7727
9.76M
                                i4_perform_rdoq,
7728
9.76M
                                &trans_ssd_v,
7729
9.76M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7730
9.76M
                                i4_alpha_stim_multiplier,
7731
9.76M
                                u1_is_cu_noisy,
7732
9.76M
#endif
7733
9.76M
                                ps_best_cu_prms->u1_skip_flag,
7734
9.76M
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7735
9.76M
                                V_PLANE);
7736
7737
9.76M
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7738
5.47M
                            {
7739
5.47M
                                ps_recon_datastore
7740
5.47M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7741
5.47M
                                                                        [i4_subtu_idx] = 0;
7742
5.47M
                            }
7743
4.28M
                            else
7744
4.28M
                            {
7745
4.28M
                                ps_recon_datastore
7746
4.28M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7747
4.28M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7748
4.28M
                            }
7749
7750
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7751
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7752
                            {
7753
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7754
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7755
                                    pu1_cur_src,
7756
                                    chrm_src_stride,
7757
                                    pu1_cur_pred,
7758
                                    pred_strd,
7759
                                    trans_ssd_v,
7760
                                    i4_alpha_stim_multiplier,
7761
                                    trans_size,
7762
                                    0,
7763
                                    ps_ctxt->u1_enable_psyRDOPT,
7764
                                    V_PLANE);
7765
#else
7766
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7767
                                {
7768
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7769
                                        pu1_cur_src,
7770
                                        chrm_src_stride,
7771
                                        pu1_cur_recon,
7772
                                        i4_recon_stride,
7773
                                        trans_ssd_v,
7774
                                        i4_alpha_stim_multiplier,
7775
                                        trans_size,
7776
                                        0,
7777
                                        ps_ctxt->u1_enable_psyRDOPT,
7778
                                        V_PLANE);
7779
                                }
7780
                                else
7781
                                {
7782
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7783
                                        pu1_cur_src,
7784
                                        chrm_src_stride,
7785
                                        pu1_cur_pred,
7786
                                        pred_strd,
7787
                                        trans_ssd_v,
7788
                                        i4_alpha_stim_multiplier,
7789
                                        trans_size,
7790
                                        0,
7791
                                        ps_ctxt->u1_enable_psyRDOPT,
7792
                                        V_PLANE);
7793
                                }
7794
#endif
7795
                            }
7796
#endif
7797
7798
9.76M
                            curr_cr_cod_cost =
7799
9.76M
                                trans_ssd_v +
7800
9.76M
                                COMPUTE_RATE_COST_CLIP30(
7801
9.76M
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7802
9.76M
                            chrm_tu_bits += tu_bits;
7803
9.76M
                            i4_bits_cr += tu_bits;
7804
7805
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7806
9.76M
                            if(0 != cbf)
7807
1.38M
                            {
7808
1.38M
                                COPY_CABAC_STATES(
7809
1.38M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7810
1.38M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7811
1.38M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7812
1.38M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7813
1.38M
                                    IHEVC_CAB_CTXT_END);
7814
1.38M
                            }
7815
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7816
8.37M
                            else
7817
8.37M
                            {
7818
8.37M
                                COPY_CABAC_STATES(
7819
8.37M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7820
8.37M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7821
8.37M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7822
8.37M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7823
8.37M
                                    IHEVC_CAB_CTXT_END);
7824
8.37M
                            }
7825
7826
                            /* If Intra and TU=CU/2, need recon for next TUs */
7827
9.76M
                            if(calc_recon)
7828
1.10M
                            {
7829
1.10M
                                ihevce_chroma_it_recon_fxn(
7830
1.10M
                                    ps_ctxt,
7831
1.10M
                                    (pi2_cur_deq_data + trans_size),
7832
1.10M
                                    deq_data_strd,
7833
1.10M
                                    pu1_cur_pred,
7834
1.10M
                                    pred_strd,
7835
1.10M
                                    pu1_cur_recon,
7836
1.10M
                                    i4_recon_stride,
7837
1.10M
                                    (pu1_ecd_data + total_bytes_offset),
7838
1.10M
                                    trans_size,
7839
1.10M
                                    cbf,
7840
1.10M
                                    zero_cols,
7841
1.10M
                                    zero_rows,
7842
1.10M
                                    V_PLANE);
7843
7844
1.10M
                                ps_recon_datastore
7845
1.10M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7846
1.10M
                                                                        [i4_subtu_idx] = 0;
7847
1.10M
                            }
7848
8.66M
                            else
7849
8.66M
                            {
7850
8.66M
                                ps_recon_datastore
7851
8.66M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7852
8.66M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7853
8.66M
                            }
7854
9.76M
                        }
7855
735k
                        else
7856
735k
                        {
7857
                            /* num bytes is set to 0 */
7858
735k
                            num_bytes = 0;
7859
7860
                            /* cbf is returned as 0 */
7861
735k
                            cbf = 0;
7862
7863
735k
                            curr_cr_cod_cost = trans_ssd_v =
7864
7865
735k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7866
735k
                                    pu1_cur_pred,
7867
735k
                                    pu1_cur_src,
7868
735k
                                    pred_strd,
7869
735k
                                    chrm_src_stride,
7870
735k
                                    trans_size,
7871
735k
                                    trans_size,
7872
735k
                                    V_PLANE);
7873
7874
735k
                            if(u1_compute_spatial_ssd)
7875
447k
                            {
7876
                                /* buffer copy fromp pred to recon */
7877
447k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7878
447k
                                    pu1_cur_pred,
7879
447k
                                    pred_strd,
7880
447k
                                    pu1_cur_recon,
7881
447k
                                    i4_recon_stride,
7882
447k
                                    trans_size,
7883
447k
                                    trans_size,
7884
447k
                                    V_PLANE);
7885
7886
447k
                                ps_recon_datastore
7887
447k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7888
447k
                                                                        [i4_subtu_idx] = 0;
7889
447k
                            }
7890
7891
735k
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7892
0
                            {
7893
0
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7894
0
                                    pu1_cur_src,
7895
0
                                    chrm_src_stride,
7896
0
                                    pu1_cur_pred,
7897
0
                                    pred_strd,
7898
0
                                    trans_ssd_v,
7899
0
                                    i4_alpha_stim_multiplier,
7900
0
                                    trans_size,
7901
0
                                    0,
7902
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7903
0
                                    V_PLANE);
7904
0
                            }
7905
7906
735k
#if ENABLE_INTER_ZCU_COST
7907
#if !WEIGH_CHROMA_COST
7908
                            /* cbf = 0, accumulate cu not coded cost */
7909
                            ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
7910
#else
7911
                            /* cbf = 0, accumulate cu not coded cost */
7912
7913
735k
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7914
735k
                                (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7915
735k
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7916
735k
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7917
735k
#endif
7918
735k
#endif
7919
735k
                        }
7920
7921
#if !WEIGH_CHROMA_COST
7922
                        curr_rdopt_cost += curr_cr_cod_cost;
7923
#else
7924
10.4M
                        curr_rdopt_cost +=
7925
10.4M
                            ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7926
10.4M
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7927
10.4M
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7928
10.4M
#endif
7929
7930
10.4M
                        chrm_cod_cost += curr_cr_cod_cost;
7931
10.4M
                        i8_ssd_cr += trans_ssd_v;
7932
7933
10.4M
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7934
10.4M
                        {
7935
                            /* Early exit : If the current running cost exceeds
7936
                            the prev. best mode cost, break */
7937
10.4M
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7938
396k
                            {
7939
396k
                                u1_is_early_exit_condition_satisfied = 1;
7940
396k
                                break;
7941
396k
                            }
7942
10.4M
                        }
7943
7944
                        /* inter cu is coded if any of the tu is coded in it */
7945
10.1M
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7946
7947
                        /* update CR related params */
7948
10.1M
                        ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
7949
10.1M
                            total_bytes_offset + init_bytes_offset;
7950
7951
10.1M
                        if(0 == i4_subtu_idx)
7952
10.1M
                        {
7953
10.1M
                            ps_tu->s_tu.b1_cr_cbf = cbf;
7954
10.1M
                        }
7955
0
                        else
7956
0
                        {
7957
0
                            ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
7958
0
                        }
7959
7960
10.1M
                        total_bytes_offset += num_bytes;
7961
7962
10.1M
                        ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
7963
10.1M
                        ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
7964
10.1M
                        ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
7965
10.1M
                    }
7966
700k
                    else
7967
700k
                    {
7968
700k
                        ps_recon_datastore
7969
700k
                            ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
7970
700k
                            UCHAR_MAX;
7971
700k
                        ps_recon_datastore
7972
700k
                            ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
7973
700k
                            UCHAR_MAX;
7974
700k
                    }
7975
11.7M
                }
7976
7977
11.7M
                if(u1_is_early_exit_condition_satisfied)
7978
918k
                {
7979
918k
                    break;
7980
918k
                }
7981
7982
                /* loop increments */
7983
10.8M
                ps_tu++;
7984
10.8M
                ps_tu_temp_prms++;
7985
10.8M
            }
7986
7987
            /* Signal as luma mode. HIGH_QUALITY may update it */
7988
7.68M
            ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
7989
7990
            /* modify the cost chrm_cod_cost */
7991
7.68M
            if(ps_ctxt->u1_enable_psyRDOPT)
7992
0
            {
7993
0
                UWORD8 *pu1_recon_cu;
7994
0
                WORD32 recon_stride;
7995
0
                WORD32 curr_pos_x;
7996
0
                WORD32 curr_pos_y;
7997
0
                WORD32 start_index;
7998
0
                WORD32 num_horz_cu_in_ctb;
7999
0
                WORD32 had_block_size;
8000
                /* tODO: sreenivasa ctb size has to be used appropriately */
8001
0
                had_block_size = 8;
8002
0
                num_horz_cu_in_ctb = 2 * 64 / had_block_size;
8003
8004
0
                curr_pos_x = cu_pos_x << 3; /* pel units */
8005
0
                curr_pos_y = cu_pos_y << 3; /* pel units */
8006
0
                recon_stride = i4_recon_stride;
8007
0
                pu1_recon_cu = pu1_recon;
8008
8009
                /* start index to index the source satd of curr cu int he current ctb*/
8010
0
                start_index = 2 * (curr_pos_x / had_block_size) +
8011
0
                              (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
8012
8013
0
                {
8014
0
                    chrm_cod_cost += ihevce_psy_rd_cost_croma(
8015
0
                        ps_ctxt->ai4_source_chroma_satd,
8016
0
                        pu1_recon,
8017
0
                        recon_stride,
8018
0
                        1,  //
8019
0
                        cu_size,
8020
0
                        0,  // pic type
8021
0
                        0,  //layer id
8022
0
                        ps_ctxt->i4_satd_lamda,  // lambda
8023
0
                        start_index,
8024
0
                        ps_ctxt->u1_is_input_data_hbd,  // 8 bit
8025
0
                        ps_ctxt->u1_chroma_array_type,
8026
0
                        &ps_ctxt->s_cmn_opt_func
8027
8028
0
                    );  // chroma subsampling 420
8029
0
                }
8030
0
            }
8031
7.68M
        }
8032
1.66M
        else
8033
1.66M
        {
8034
1.66M
            u1_is_mode_eq_chroma_satd_mode = 1;
8035
1.66M
            chrm_cod_cost = MAX_COST_64;
8036
1.66M
        }
8037
8038
        /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
8039
9.34M
        if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
8040
6.37M
           (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
8041
5.59M
        {
8042
5.59M
            if(64 == cu_size)
8043
119k
            {
8044
119k
                ASSERT(TU_EQ_CU != func_proc_mode);
8045
119k
            }
8046
8047
5.59M
            if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
8048
5.59M
                   .i8_chroma_best_rdopt < chrm_cod_cost)
8049
2.02M
            {
8050
2.02M
                UWORD8 *pu1_src;
8051
2.02M
                UWORD8 *pu1_ecd_data_src_cb;
8052
2.02M
                UWORD8 *pu1_ecd_data_src_cr;
8053
8054
2.02M
                chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
8055
2.02M
                    &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
8056
8057
2.02M
                UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
8058
2.02M
                WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
8059
2.02M
                WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
8060
8061
2.02M
                pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
8062
2.02M
                chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
8063
2.02M
                chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
8064
2.02M
                chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
8065
8066
2.02M
                if(u1_is_mode_eq_chroma_satd_mode)
8067
1.66M
                {
8068
1.66M
                    chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
8069
1.66M
                }
8070
8071
                /*Resetting total_num_bytes_to 0*/
8072
2.02M
                total_bytes_offset = 0;
8073
8074
                /* Update the CABAC state corresponding to chroma only */
8075
                /* Chroma Cbf */
8076
2.02M
                memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
8077
                /* Chroma transform skip */
8078
2.02M
                memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
8079
                /* Chroma last coeff x prefix */
8080
2.02M
                memcpy(
8081
2.02M
                    pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
8082
2.02M
                    pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
8083
2.02M
                    3);
8084
                /* Chroma last coeff y prefix */
8085
2.02M
                memcpy(
8086
2.02M
                    pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
8087
2.02M
                    pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
8088
2.02M
                    3);
8089
                /* Chroma csbf */
8090
2.02M
                memcpy(
8091
2.02M
                    pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8092
2.02M
                    pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8093
2.02M
                    2);
8094
                /* Chroma sig coeff flags */
8095
2.02M
                memcpy(
8096
2.02M
                    pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
8097
                /* Chroma absgt1 flags */
8098
2.02M
                memcpy(
8099
2.02M
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8100
2.02M
                    pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8101
2.02M
                    8);
8102
                /* Chroma absgt2 flags */
8103
2.02M
                memcpy(
8104
2.02M
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8105
2.02M
                    pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8106
2.02M
                    2);
8107
8108
2.02M
                ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
8109
2.02M
                ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8110
8111
                /* update to luma decision as we update chroma in final mode */
8112
2.02M
                ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
8113
8114
5.16M
                for(ctr = 0; ctr < u1_num_tus; ctr++)
8115
3.13M
                {
8116
6.26M
                    for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
8117
3.13M
                    {
8118
3.13M
                        WORD32 cbf;
8119
3.13M
                        WORD32 num_bytes;
8120
8121
3.13M
                        pu1_ecd_data_src_cb =
8122
3.13M
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
8123
3.13M
                        pu1_ecd_data_src_cr =
8124
3.13M
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
8125
8126
                        /* check if chroma present flag is set */
8127
3.13M
                        if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
8128
3.13M
                        {
8129
3.13M
                            UWORD8 *pu1_cur_pred_dest;
8130
3.13M
                            UWORD8 *pu1_cur_pred_src;
8131
3.13M
                            WORD32 pred_src_strd;
8132
3.13M
                            WORD16 *pi2_cur_deq_data_dest;
8133
3.13M
                            WORD16 *pi2_cur_deq_data_src_cb;
8134
3.13M
                            WORD16 *pi2_cur_deq_data_src_cr;
8135
3.13M
                            WORD32 deq_src_strd;
8136
8137
3.13M
                            WORD32 curr_pos_x, curr_pos_y;
8138
8139
3.13M
                            trans_size = ps_tu->s_tu.b3_size;
8140
3.13M
                            trans_size = (1 << (trans_size + 1)); /* in chroma units */
8141
8142
                            /*Deriving stride values*/
8143
3.13M
                            pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
8144
3.13M
                            deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
8145
8146
                            /* since 2x2 transform is not allowed for chroma*/
8147
3.13M
                            if(2 == trans_size)
8148
194k
                            {
8149
194k
                                trans_size = 4;
8150
194k
                            }
8151
8152
                            /* get the current tu posx and posy w.r.t to cu */
8153
3.13M
                            curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
8154
3.13M
                            curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
8155
3.13M
                                         (i4_subtu_idx * trans_size);
8156
8157
                            /* 420sp case only vertical height will be half */
8158
3.13M
                            if(0 == u1_is_422)
8159
3.13M
                            {
8160
3.13M
                                curr_pos_y >>= 1;
8161
3.13M
                            }
8162
8163
                            /* increment the pointers to start of current TU  */
8164
3.13M
                            pu1_cur_pred_src =
8165
3.13M
                                ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
8166
3.13M
                            pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
8167
3.13M
                            pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
8168
3.13M
                            pu1_cur_pred_dest += (curr_pos_y * pred_strd);
8169
8170
3.13M
                            pi2_cur_deq_data_src_cb =
8171
3.13M
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
8172
3.13M
                            pi2_cur_deq_data_src_cr =
8173
3.13M
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
8174
3.13M
                            pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
8175
3.13M
                            pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
8176
3.13M
                            pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
8177
3.13M
                            pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
8178
8179
                            /*Overwriting deq data with that belonging to the winning special mode
8180
                            (luma mode !=  chroma mode)
8181
                            ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
8182
                            correspondingly manipulate to copy WORD16 data*/
8183
8184
3.13M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8185
3.13M
                                (UWORD8 *)pi2_cur_deq_data_dest,
8186
3.13M
                                (deq_data_strd << 1),
8187
3.13M
                                (UWORD8 *)pi2_cur_deq_data_src_cb,
8188
3.13M
                                (deq_src_strd << 1),
8189
3.13M
                                (trans_size << 1),
8190
3.13M
                                trans_size);
8191
8192
3.13M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8193
3.13M
                                (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
8194
3.13M
                                (deq_data_strd << 1),
8195
3.13M
                                (UWORD8 *)pi2_cur_deq_data_src_cr,
8196
3.13M
                                (deq_src_strd << 1),
8197
3.13M
                                (trans_size << 1),
8198
3.13M
                                trans_size);
8199
8200
                            /*Overwriting pred data with that belonging to the winning special mode
8201
                            (luma mode !=  chroma mode)*/
8202
8203
3.13M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8204
3.13M
                                pu1_cur_pred_dest,
8205
3.13M
                                pred_strd,
8206
3.13M
                                pu1_cur_pred_src,
8207
3.13M
                                pred_src_strd,
8208
3.13M
                                (trans_size << 1),
8209
3.13M
                                trans_size);
8210
8211
3.13M
                            num_bytes = ps_chr_intra_satd_ctxt
8212
3.13M
                                            ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
8213
3.13M
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
8214
                            /* inter cu is coded if any of the tu is coded in it */
8215
3.13M
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8216
8217
                            /* update CB related params */
8218
3.13M
                            ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
8219
3.13M
                                total_bytes_offset + init_bytes_offset;
8220
8221
3.13M
                            if(0 == i4_subtu_idx)
8222
3.13M
                            {
8223
3.13M
                                ps_tu->s_tu.b1_cb_cbf = cbf;
8224
3.13M
                            }
8225
0
                            else
8226
0
                            {
8227
0
                                ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
8228
0
                            }
8229
8230
                            /*Overwriting the cb ecd data corresponding to the special mode*/
8231
3.13M
                            if(0 != num_bytes)
8232
465k
                            {
8233
465k
                                memcpy(
8234
465k
                                    (pu1_ecd_data + total_bytes_offset),
8235
465k
                                    pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
8236
465k
                                    num_bytes);
8237
465k
                            }
8238
8239
3.13M
                            total_bytes_offset += num_bytes;
8240
3.13M
                            ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
8241
3.13M
                            ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
8242
8243
3.13M
                            num_bytes = ps_chr_intra_satd_ctxt
8244
3.13M
                                            ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
8245
3.13M
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
8246
                            /* inter cu is coded if any of the tu is coded in it */
8247
3.13M
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8248
8249
                            /*Overwriting the cr ecd data corresponding to the special mode*/
8250
3.13M
                            if(0 != num_bytes)
8251
438k
                            {
8252
438k
                                memcpy(
8253
438k
                                    (pu1_ecd_data + total_bytes_offset),
8254
438k
                                    pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
8255
438k
                                    num_bytes);
8256
438k
                            }
8257
8258
                            /* update CR related params */
8259
3.13M
                            ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
8260
3.13M
                                total_bytes_offset + init_bytes_offset;
8261
8262
3.13M
                            if(0 == i4_subtu_idx)
8263
3.13M
                            {
8264
3.13M
                                ps_tu->s_tu.b1_cr_cbf = cbf;
8265
3.13M
                            }
8266
0
                            else
8267
0
                            {
8268
0
                                ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
8269
0
                            }
8270
8271
3.13M
                            total_bytes_offset += num_bytes;
8272
3.13M
                            ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
8273
8274
                            /*Updating zero rows and zero cols*/
8275
3.13M
                            ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
8276
3.13M
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
8277
3.13M
                            ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
8278
3.13M
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
8279
3.13M
                            ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
8280
3.13M
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
8281
3.13M
                            ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
8282
3.13M
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
8283
8284
3.13M
                            ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
8285
8286
3.13M
                            if((u1_num_tus > 1) &&
8287
1.47M
                               ps_recon_datastore->au1_is_chromaRecon_available[2])
8288
1.47M
                            {
8289
1.47M
                                ps_recon_datastore
8290
1.47M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8291
1.47M
                                                                        [i4_subtu_idx] = 2;
8292
1.47M
                                ps_recon_datastore
8293
1.47M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8294
1.47M
                                                                        [i4_subtu_idx] = 2;
8295
1.47M
                            }
8296
1.65M
                            else if(
8297
1.65M
                                (1 == u1_num_tus) &&
8298
1.65M
                                ps_recon_datastore->au1_is_chromaRecon_available[1])
8299
974k
                            {
8300
974k
                                ps_recon_datastore
8301
974k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8302
974k
                                                                        [i4_subtu_idx] = 1;
8303
974k
                                ps_recon_datastore
8304
974k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8305
974k
                                                                        [i4_subtu_idx] = 1;
8306
974k
                            }
8307
681k
                            else
8308
681k
                            {
8309
681k
                                ps_recon_datastore
8310
681k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8311
681k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8312
681k
                                ps_recon_datastore
8313
681k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8314
681k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8315
681k
                            }
8316
3.13M
                        }
8317
3.13M
                    }
8318
8319
                    /* loop increments */
8320
3.13M
                    ps_tu++;
8321
3.13M
                    ps_tu_temp_prms++;
8322
3.13M
                }
8323
2.02M
            }
8324
8325
5.59M
            if(!u1_is_422)
8326
5.59M
            {
8327
5.59M
                if(chrm_pred_mode == luma_pred_mode)
8328
5.23M
                {
8329
5.23M
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8330
5.23M
                }
8331
364k
                else if(chrm_pred_mode == 0)
8332
64.0k
                {
8333
64.0k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8334
64.0k
                }
8335
300k
                else if(chrm_pred_mode == 1)
8336
93.1k
                {
8337
93.1k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8338
93.1k
                }
8339
206k
                else if(chrm_pred_mode == 10)
8340
157k
                {
8341
157k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8342
157k
                }
8343
48.9k
                else if(chrm_pred_mode == 26)
8344
48.9k
                {
8345
48.9k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8346
48.9k
                }
8347
0
                else
8348
0
                {
8349
0
                    ASSERT(0); /*Should not come here*/
8350
0
                }
8351
5.59M
            }
8352
0
            else
8353
0
            {
8354
0
                if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
8355
0
                {
8356
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8357
0
                }
8358
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
8359
0
                {
8360
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8361
0
                }
8362
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
8363
0
                {
8364
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8365
0
                }
8366
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
8367
0
                {
8368
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8369
0
                }
8370
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
8371
0
                {
8372
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8373
0
                }
8374
0
                else
8375
0
                {
8376
0
                    ASSERT(0); /*Should not come here*/
8377
0
                }
8378
0
            }
8379
5.59M
        }
8380
8381
        /* Store the actual chroma mode */
8382
9.34M
        ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
8383
9.34M
    }
8384
8385
    /* update the total bytes produced */
8386
9.34M
    ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
8387
8388
    /* store the final chrm bits accumulated */
8389
9.34M
    *pi4_chrm_tu_bits = chrm_tu_bits;
8390
8391
9.34M
    return (chrm_cod_cost);
8392
9.34M
}
8393
8394
/*!
8395
******************************************************************************
8396
* \if Function name : ihevce_final_rdopt_mode_prcs \endif
8397
*
8398
* \brief
8399
*    Final RDOPT mode process function. Performs Recon computation for the
8400
*    final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
8401
*
8402
* \param[in] pv_ctxt : pointer to enc_loop module
8403
* \param[in] ps_prms : pointer to struct containing requisite parameters
8404
*
8405
* \return
8406
*    None
8407
*
8408
* \author
8409
*  Ittiam
8410
*
8411
*****************************************************************************
8412
*/
8413
void ihevce_final_rdopt_mode_prcs(
8414
    ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
8415
2.37M
{
8416
2.37M
    enc_loop_cu_final_prms_t *ps_best_cu_prms;
8417
2.37M
    tu_enc_loop_out_t *ps_tu_enc_loop;
8418
2.37M
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
8419
2.37M
    nbr_avail_flags_t s_nbr;
8420
2.37M
    recon_datastore_t *ps_recon_datastore;
8421
8422
2.37M
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
8423
2.37M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
8424
2.37M
    ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
8425
8426
2.37M
    WORD32 num_tu_in_cu;
8427
2.37M
    LWORD64 rd_opt_cost;
8428
2.37M
    WORD32 ctr;
8429
2.37M
    WORD32 i4_subtu_idx;
8430
2.37M
    WORD32 cu_size;
8431
2.37M
    WORD32 cu_pos_x, cu_pos_y;
8432
2.37M
    WORD32 chrm_present_flag = 1;
8433
2.37M
    WORD32 num_bytes, total_bytes = 0;
8434
2.37M
    WORD32 chrm_ctr = 0;
8435
2.37M
    WORD32 u1_is_cu_coded;
8436
2.37M
    UWORD8 *pu1_old_ecd_data;
8437
2.37M
    UWORD8 *pu1_chrm_old_ecd_data;
8438
2.37M
    UWORD8 *pu1_cur_pred;
8439
2.37M
    WORD16 *pi2_deq_data;
8440
2.37M
    WORD16 *pi2_chrm_deq_data;
8441
2.37M
    WORD16 *pi2_cur_deq_data;
8442
2.37M
    WORD16 *pi2_cur_deq_data_chrm;
8443
2.37M
    UWORD8 *pu1_cur_luma_recon;
8444
2.37M
    UWORD8 *pu1_cur_chroma_recon;
8445
2.37M
    UWORD8 *pu1_cur_src;
8446
2.37M
    UWORD8 *pu1_cur_src_chrm;
8447
2.37M
    UWORD8 *pu1_cur_pred_chrm;
8448
2.37M
    UWORD8 *pu1_intra_pred_mode;
8449
2.37M
    UWORD32 *pu4_nbr_flags;
8450
2.37M
    LWORD64 i8_ssd;
8451
8452
2.37M
    cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
8453
2.37M
    cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
8454
2.37M
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
8455
8456
2.37M
    WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
8457
2.37M
    WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
8458
2.37M
    UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
8459
2.37M
    WORD32 src_strd = ps_prms->src_strd;
8460
2.37M
    UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
8461
2.37M
    WORD32 pred_strd = ps_prms->pred_strd;
8462
2.37M
    UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
8463
2.37M
    WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
8464
2.37M
    UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
8465
2.37M
    UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
8466
2.37M
    WORD32 csbf_strd = ps_prms->csbf_strd;
8467
2.37M
    UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
8468
2.37M
    WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
8469
2.37M
    UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
8470
2.37M
    WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
8471
2.37M
    UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
8472
2.37M
    UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
8473
2.37M
    UWORD8 u1_cu_size = ps_prms->u1_cu_size;
8474
2.37M
    WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
8475
2.37M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
8476
2.37M
    UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
8477
    /* Get the Chroma pointer and parameters */
8478
2.37M
    UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
8479
2.37M
    WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
8480
2.37M
    UWORD8 u1_compute_spatial_ssd_luma = 0;
8481
2.37M
    UWORD8 u1_compute_spatial_ssd_chroma = 0;
8482
    /* Get the pointer for function selector */
8483
2.37M
    ihevc_intra_pred_luma_ref_substitution_fptr =
8484
2.37M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
8485
8486
2.37M
    ihevc_intra_pred_ref_filtering_fptr =
8487
2.37M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
8488
8489
2.37M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
8490
2.37M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
8491
8492
    /* Get the best CU parameters */
8493
2.37M
    ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
8494
2.37M
    num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
8495
2.37M
    cu_size = ps_best_cu_prms->u1_cu_size;
8496
2.37M
    cu_pos_x = u1_cu_pos_x;
8497
2.37M
    cu_pos_y = u1_cu_pos_y;
8498
2.37M
    pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
8499
2.37M
    pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
8500
2.37M
    ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
8501
8502
    /* get the first TU pointer */
8503
2.37M
    ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8504
    /* get the first TU only enc_loop prms pointer */
8505
2.37M
    ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8506
    /*modify quant related param in ctxt based on current cu qp*/
8507
2.37M
    if((ps_ctxt->i1_cu_qp_delta_enable))
8508
934k
    {
8509
        /*recompute quant related param at every cu level*/
8510
934k
        ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
8511
8512
        /* get frame level lambda params */
8513
934k
        ihevce_get_cl_cu_lambda_prms(
8514
934k
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
8515
934k
    }
8516
8517
2.37M
    ps_best_cu_prms->i8_cu_ssd = 0;
8518
2.37M
    ps_best_cu_prms->u4_cu_open_intra_sad = 0;
8519
8520
    /* For skip case : Set TU_size = CU_size and make cbf = 0
8521
    so that same TU loop can be used for all modes */
8522
2.37M
    if(PRED_MODE_SKIP == packed_pred_mode)
8523
217k
    {
8524
436k
        for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8525
219k
        {
8526
219k
            ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
8527
8528
219k
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
8529
8530
219k
            ps_tu_enc_loop++;
8531
219k
            ps_tu_enc_loop_temp_prms++;
8532
219k
        }
8533
8534
        /* go back to the first TU pointer */
8535
217k
        ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8536
217k
        ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8537
217k
    }
8538
    /**   For inter case, pred calculation is outside the loop     **/
8539
2.37M
    if(PRED_MODE_INTRA != packed_pred_mode)
8540
1.06M
    {
8541
        /**------------- Compute pred data if required --------------**/
8542
1.06M
        if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8543
0
        {
8544
0
            nbr_4x4_t *ps_topleft_nbr_4x4;
8545
0
            nbr_4x4_t *ps_left_nbr_4x4;
8546
0
            nbr_4x4_t *ps_top_nbr_4x4;
8547
0
            WORD32 nbr_4x4_left_strd;
8548
8549
0
            ps_best_inter_cand->pu1_pred_data = pu1_pred;
8550
0
            ps_best_inter_cand->i4_pred_data_stride = pred_strd;
8551
8552
            /* Get the CU nbr information */
8553
0
            ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
8554
0
            ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
8555
0
            ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
8556
0
            nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
8557
8558
            /* MVP ,MVD calc and Motion compensation */
8559
0
            rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
8560
0
                ps_ctxt,
8561
0
                ps_best_inter_cand,
8562
0
                u1_cu_size,
8563
0
                cu_pos_x,
8564
0
                cu_pos_y,
8565
0
                ps_left_nbr_4x4,
8566
0
                ps_top_nbr_4x4,
8567
0
                ps_topleft_nbr_4x4,
8568
0
                nbr_4x4_left_strd,
8569
0
                rd_opt_best_idx);
8570
0
        }
8571
8572
        /** ------ Motion Compensation for Chroma -------- **/
8573
1.06M
        if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
8574
232k
        {
8575
232k
            UWORD8 *pu1_cur_pred;
8576
232k
            pu1_cur_pred = pu1_pred_chrm;
8577
8578
            /* run a loop over all the partitons in cu */
8579
468k
            for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
8580
236k
            {
8581
236k
                pu_t *ps_pu;
8582
236k
                WORD32 inter_pu_wd, inter_pu_ht;
8583
8584
236k
                ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
8585
8586
                /* IF AMP then each partitions can have diff wd ht */
8587
236k
                inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
8588
236k
                inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
8589
236k
                inter_pu_ht <<= u1_is_422;
8590
                /* chroma mc func */
8591
236k
                ihevce_chroma_inter_pred_pu(
8592
236k
                    &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
8593
236k
                if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
8594
6.74k
                {
8595
                    /* 2Nx__ partion case */
8596
6.74k
                    if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
8597
3.10k
                    {
8598
3.10k
                        pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
8599
3.10k
                    }
8600
                    /* __x2N partion case */
8601
6.74k
                    if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
8602
3.63k
                    {
8603
3.63k
                        pu1_cur_pred += inter_pu_wd;
8604
3.63k
                    }
8605
6.74k
                }
8606
236k
            }
8607
232k
        }
8608
1.06M
    }
8609
2.37M
    pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
8610
2.37M
    pi2_chrm_deq_data =
8611
2.37M
        &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
8612
2.37M
    pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
8613
2.37M
    pu1_chrm_old_ecd_data =
8614
2.37M
        &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
8615
8616
    /* default value for cu coded flag */
8617
2.37M
    u1_is_cu_coded = 0;
8618
8619
    /* If we are re-computing coeff, set sad to 0 and start accumulating */
8620
    /* else use the best cand. sad from RDOPT stage                    */
8621
2.37M
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8622
0
    {
8623
        /*init of ssd of CU accuumulated over all TU*/
8624
0
        ps_best_cu_prms->u4_cu_sad = 0;
8625
8626
        /* reset the luma residual bits */
8627
0
        ps_best_cu_prms->u4_cu_luma_res_bits = 0;
8628
0
    }
8629
8630
2.37M
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
8631
544k
    {
8632
        /* reset the chroma residual bits */
8633
544k
        ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
8634
544k
    }
8635
8636
2.37M
    if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
8637
2.37M
       (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
8638
544k
    {
8639
        /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
8640
        the quantized coefficients might be changed.
8641
        We are copying only those states which correspond to the header from the cabac state
8642
        of the previous CU, because the header is going to be recomputed for this condition*/
8643
544k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
8644
544k
        memcpy(
8645
544k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
8646
544k
            &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
8647
544k
            IHEVC_CAB_COEFFX_PREFIX);
8648
8649
544k
        if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
8650
0
        {
8651
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8652
0
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8653
0
                (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
8654
0
                 IHEVC_CAB_COEFFX_PREFIX),
8655
0
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8656
0
        }
8657
544k
        else
8658
544k
        {
8659
544k
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8660
544k
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8661
544k
                (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8662
544k
                      .s_cabac_ctxt.au1_ctxt_models[0] +
8663
544k
                 IHEVC_CAB_COEFFX_PREFIX),
8664
544k
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8665
544k
        }
8666
544k
        ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
8667
544k
    }
8668
1.83M
    else
8669
1.83M
    {
8670
1.83M
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
8671
1.83M
    }
8672
8673
    /* Zero cbf tool is disabled for intra CUs */
8674
2.37M
    if(PRED_MODE_INTRA == packed_pred_mode)
8675
1.30M
    {
8676
#if ENABLE_ZERO_CBF_IN_INTRA
8677
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8678
#else
8679
1.30M
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8680
1.30M
#endif
8681
1.30M
    }
8682
1.06M
    else
8683
1.06M
    {
8684
#if DISABLE_ZERO_ZBF_IN_INTER
8685
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8686
#else
8687
1.06M
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8688
1.06M
#endif
8689
1.06M
    }
8690
8691
    /** Loop for all tu blocks in current cu and do reconstruction **/
8692
6.66M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8693
4.28M
    {
8694
4.28M
        tu_t *ps_tu;
8695
4.28M
        WORD32 trans_size, num_4x4_in_tu;
8696
4.28M
        WORD32 cbf, zero_rows, zero_cols;
8697
4.28M
        WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
8698
4.28M
        WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
8699
4.28M
        WORD32 luma_pred_mode, chroma_pred_mode = 0;
8700
4.28M
        UWORD8 au1_is_recon_available[2];
8701
8702
4.28M
        ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
8703
8704
4.28M
        u1_compute_spatial_ssd_luma = 0;
8705
4.28M
        u1_compute_spatial_ssd_chroma = 0;
8706
8707
4.28M
        trans_size = 1 << (ps_tu->b3_size + 2);
8708
4.28M
        num_4x4_in_tu = (trans_size >> 2);
8709
4.28M
        cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
8710
4.28M
        cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
8711
8712
        /* populate the coeffs scan idx */
8713
4.28M
        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
8714
8715
        /* get the current pos x and pos y in pixels */
8716
4.28M
        cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
8717
4.28M
        cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
8718
8719
        /* Update pointers based on the location */
8720
4.28M
        pu1_cur_src = pu1_src + cu_pos_x_in_pix;
8721
4.28M
        pu1_cur_src += (cu_pos_y_in_pix * src_strd);
8722
4.28M
        pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
8723
4.28M
        pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
8724
8725
4.28M
        pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
8726
4.28M
        pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
8727
8728
4.28M
        pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
8729
4.28M
        pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
8730
8731
4.28M
        pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
8732
4.28M
        pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
8733
4.28M
                            (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
8734
8735
4.28M
        pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
8736
4.28M
        pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
8737
4.28M
                             (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
8738
8739
4.28M
        pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
8740
4.28M
        pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
8741
4.28M
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
8742
8743
4.28M
        pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
8744
4.28M
        pi2_cur_deq_data_chrm +=
8745
4.28M
            ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
8746
8747
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
8748
4.28M
        chrm_present_flag = 1; /* by default chroma present is set to 1*/
8749
8750
4.28M
        if(4 == trans_size)
8751
1.49M
        {
8752
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
8753
1.49M
            if(0 != chrm_ctr)
8754
1.12M
            {
8755
1.12M
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
8756
1.12M
            }
8757
8758
            /* increment the chrm ctr unconditionally */
8759
1.49M
            chrm_ctr++;
8760
            /* after ctr reached 4 reset it */
8761
1.49M
            if(4 == chrm_ctr)
8762
374k
            {
8763
374k
                chrm_ctr = 0;
8764
374k
            }
8765
1.49M
        }
8766
8767
        /**------------- Compute pred data if required --------------**/
8768
4.28M
        if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
8769
2.37M
        {
8770
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8771
2.37M
            luma_pred_mode = *pu1_intra_pred_mode;
8772
8773
2.37M
            if((ps_ctxt->i4_rc_pass == 1) ||
8774
2.37M
               (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8775
0
            {
8776
0
                WORD32 nbr_flags;
8777
0
                WORD32 luma_pred_func_idx;
8778
0
                UWORD8 *pu1_left;
8779
0
                UWORD8 *pu1_top;
8780
0
                UWORD8 *pu1_top_left;
8781
0
                WORD32 left_strd;
8782
8783
                /* left cu boundary */
8784
0
                if(0 == cu_pos_x_in_pix)
8785
0
                {
8786
0
                    left_strd = ps_cu_nbr_prms->cu_left_stride;
8787
0
                    pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
8788
0
                }
8789
0
                else
8790
0
                {
8791
0
                    pu1_left = pu1_cur_luma_recon - 1;
8792
0
                    left_strd = recon_luma_strd;
8793
0
                }
8794
8795
                /* top cu boundary */
8796
0
                if(0 == cu_pos_y_in_pix)
8797
0
                {
8798
0
                    pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
8799
0
                }
8800
0
                else
8801
0
                {
8802
0
                    pu1_top = pu1_cur_luma_recon - recon_luma_strd;
8803
0
                }
8804
8805
                /* by default top left is set to cu top left */
8806
0
                pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
8807
8808
                /* top left based on position */
8809
0
                if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
8810
0
                {
8811
0
                    pu1_top_left = pu1_left - left_strd;
8812
0
                }
8813
0
                else if(0 != cu_pos_x_in_pix)
8814
0
                {
8815
0
                    pu1_top_left = pu1_top - 1;
8816
0
                }
8817
8818
                /* get the neighbour availability flags */
8819
0
                nbr_flags = ihevce_get_nbr_intra(
8820
0
                    &s_nbr,
8821
0
                    ps_ctxt->pu1_ctb_nbr_map,
8822
0
                    ps_ctxt->i4_nbr_map_strd,
8823
0
                    cu_pos_x_in_4x4,
8824
0
                    cu_pos_y_in_4x4,
8825
0
                    num_4x4_in_tu);
8826
8827
0
                if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
8828
0
                {
8829
                    /* copy the nbr flags for chroma reuse */
8830
0
                    if(4 != trans_size)
8831
0
                    {
8832
0
                        *pu4_nbr_flags = nbr_flags;
8833
0
                    }
8834
0
                    else if(1 == chrm_present_flag)
8835
0
                    {
8836
                        /* compute the avail flags assuming luma trans is 8x8 */
8837
                        /* get the neighbour availability flags */
8838
0
                        *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8839
0
                            ps_ctxt->pu1_ctb_nbr_map,
8840
0
                            ps_ctxt->i4_nbr_map_strd,
8841
0
                            cu_pos_x_in_4x4,
8842
0
                            cu_pos_y_in_4x4,
8843
0
                            (num_4x4_in_tu << 1),
8844
0
                            (num_4x4_in_tu << 1));
8845
0
                    }
8846
8847
                    /* call reference array substitution */
8848
0
                    ihevc_intra_pred_luma_ref_substitution_fptr(
8849
0
                        pu1_top_left,
8850
0
                        pu1_top,
8851
0
                        pu1_left,
8852
0
                        left_strd,
8853
0
                        trans_size,
8854
0
                        nbr_flags,
8855
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8856
0
                        1);
8857
8858
                    /* call reference filtering */
8859
0
                    ihevc_intra_pred_ref_filtering_fptr(
8860
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8861
0
                        trans_size,
8862
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8863
0
                        luma_pred_mode,
8864
0
                        ps_ctxt->i1_strong_intra_smoothing_enable_flag);
8865
8866
                    /* use the look up to get the function idx */
8867
0
                    luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
8868
8869
                    /* call the intra prediction function */
8870
0
                    ps_ctxt->apf_lum_ip[luma_pred_func_idx](
8871
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8872
0
                        1,
8873
0
                        pu1_cur_pred,
8874
0
                        pred_strd,
8875
0
                        trans_size,
8876
0
                        luma_pred_mode);
8877
0
                }
8878
0
            }
8879
2.37M
            else if(
8880
2.37M
                (1 == chrm_present_flag) &&
8881
1.63M
                (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
8882
401k
            {
8883
401k
                WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
8884
8885
401k
                if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
8886
71.7k
                {
8887
71.7k
                    temp_num_4x4_in_tu = num_4x4_in_tu << 1;
8888
71.7k
                }
8889
8890
401k
                *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8891
401k
                    ps_ctxt->pu1_ctb_nbr_map,
8892
401k
                    ps_ctxt->i4_nbr_map_strd,
8893
401k
                    cu_pos_x_in_4x4,
8894
401k
                    cu_pos_y_in_4x4,
8895
401k
                    temp_num_4x4_in_tu,
8896
401k
                    temp_num_4x4_in_tu);
8897
401k
            }
8898
8899
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8900
2.37M
            chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
8901
2.37M
        }
8902
8903
4.28M
        if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8904
0
        {
8905
0
            WORD32 temp_bits;
8906
0
            LWORD64 temp_cost;
8907
0
            UWORD32 u4_tu_sad;
8908
0
            WORD32 perform_sbh, perform_rdoq;
8909
8910
0
            if(PRED_MODE_INTRA == packed_pred_mode)
8911
0
            {
8912
                /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
8913
0
                if(trans_size < 16)
8914
0
                {
8915
                    /* for modes from 22 upto 30 horizontal scan is used */
8916
0
                    if((luma_pred_mode > 21) && (luma_pred_mode < 31))
8917
0
                    {
8918
0
                        ps_ctxt->i4_scan_idx = SCAN_HORZ;
8919
0
                    }
8920
                    /* for modes from 6 upto 14 horizontal scan is used */
8921
0
                    else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
8922
0
                    {
8923
0
                        ps_ctxt->i4_scan_idx = SCAN_VERT;
8924
0
                    }
8925
0
                }
8926
0
            }
8927
8928
            /* RDOPT copy States :  TU init (best until prev TU) to current */
8929
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8930
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8931
0
                        .s_cabac_ctxt.au1_ctxt_models[0] +
8932
0
                    IHEVC_CAB_COEFFX_PREFIX,
8933
0
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
8934
0
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
8935
8936
0
            if(ps_prms->u1_recompute_sbh_and_rdoq)
8937
0
            {
8938
0
                perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
8939
0
                perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
8940
0
            }
8941
0
            else
8942
0
            {
8943
                /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
8944
0
                perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
8945
                /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
8946
                we would have to do RDOQ again.*/
8947
0
                perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
8948
0
            }
8949
8950
#if DISABLE_RDOQ_INTRA
8951
            if(PRED_MODE_INTRA == packed_pred_mode)
8952
            {
8953
                perform_rdoq = 0;
8954
            }
8955
#endif
8956
            /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
8957
            so that all candidates and best candidate are quantized with same rounding factor  */
8958
0
            if(1 == perform_rdoq)
8959
0
            {
8960
0
                ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
8961
0
            }
8962
8963
0
            cbf = ihevce_t_q_iq_ssd_scan_fxn(
8964
0
                ps_ctxt,
8965
0
                pu1_cur_pred,
8966
0
                pred_strd,
8967
0
                pu1_cur_src,
8968
0
                src_strd,
8969
0
                pi2_cur_deq_data,
8970
0
                cu_size, /*deq_data stride is cu_size*/
8971
0
                pu1_cur_luma_recon,
8972
0
                recon_luma_strd,
8973
0
                pu1_final_ecd_data,
8974
0
                pu1_csbf_buf,
8975
0
                csbf_strd,
8976
0
                trans_size,
8977
0
                packed_pred_mode,
8978
0
                &temp_cost,
8979
0
                &num_bytes,
8980
0
                &temp_bits,
8981
0
                &u4_tu_sad,
8982
0
                &zero_cols,
8983
0
                &zero_rows,
8984
0
                &au1_is_recon_available[0],
8985
0
                perform_rdoq,  //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
8986
0
                perform_sbh,
8987
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
8988
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
8989
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
8990
0
                                          (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
8991
0
                                             100.0,
8992
0
                ps_prms->u1_is_cu_noisy,
8993
0
#endif
8994
0
                u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
8995
0
                1 /*early cbf*/
8996
0
            );  //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
8997
8998
            /* Accumulate luma residual bits */
8999
0
            ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
9000
9001
            /* RDOPT copy States :  New updated after curr TU to TU init */
9002
0
            if(0 != cbf)
9003
0
            {
9004
                /* update to new state only if CBF is non zero */
9005
0
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9006
0
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9007
0
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9008
0
                            .s_cabac_ctxt.au1_ctxt_models[0] +
9009
0
                        IHEVC_CAB_COEFFX_PREFIX,
9010
0
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9011
0
            }
9012
9013
            /* accumulate the TU sad into cu sad */
9014
0
            ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
9015
0
            ps_tu->b1_y_cbf = cbf;
9016
0
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
9017
9018
            /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
9019
0
            if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
9020
0
            {
9021
0
                WORD32 num_4x4_in_cu = u1_cu_size >> 2;
9022
0
                nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
9023
0
                ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
9024
0
                ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
9025
                /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
9026
0
                ps_cur_nbr_4x4->b1_y_cbf = cbf;
9027
                /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
9028
0
                ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
9029
                /* Qp and cbf are stored for the all 4x4 in TU */
9030
0
                {
9031
0
                    WORD32 i, j;
9032
0
                    nbr_4x4_t *ps_tmp_4x4;
9033
0
                    ps_tmp_4x4 = ps_cur_nbr_4x4;
9034
9035
0
                    for(i = 0; i < num_4x4_in_tu; i++)
9036
0
                    {
9037
0
                        for(j = 0; j < num_4x4_in_tu; j++)
9038
0
                        {
9039
0
                            ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
9040
0
                            ps_tmp_4x4[j].b1_y_cbf = cbf;
9041
0
                        }
9042
                        /* row level update*/
9043
0
                        ps_tmp_4x4 += num_4x4_in_cu;
9044
0
                    }
9045
0
                }
9046
0
            }
9047
0
        }
9048
4.28M
        else
9049
4.28M
        {
9050
4.28M
            zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
9051
4.28M
            zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
9052
9053
4.28M
            if(ps_prms->u1_will_cabac_state_change)
9054
4.28M
            {
9055
4.28M
                num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
9056
4.28M
            }
9057
0
            else
9058
0
            {
9059
0
                num_bytes = 0;
9060
0
            }
9061
9062
            /* copy luma ecd data to final buffer */
9063
4.28M
            memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
9064
9065
4.28M
            pu1_old_ecd_data += num_bytes;
9066
9067
4.28M
            au1_is_recon_available[0] = 0;
9068
4.28M
        }
9069
9070
        /**-------- Compute Recon data (Do IT & Recon) : Luma  -----------**/
9071
4.28M
        if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9072
4.26M
           (!u1_compute_spatial_ssd_luma ||
9073
0
            (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
9074
4.26M
        {
9075
4.26M
            if(!ps_recon_datastore->u1_is_lumaRecon_available ||
9076
2.61M
               (ps_recon_datastore->u1_is_lumaRecon_available &&
9077
2.61M
                (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
9078
1.91M
            {
9079
1.91M
                ihevce_it_recon_fxn(
9080
1.91M
                    ps_ctxt,
9081
1.91M
                    pi2_cur_deq_data,
9082
1.91M
                    cu_size,
9083
1.91M
                    pu1_cur_pred,
9084
1.91M
                    pred_strd,
9085
1.91M
                    pu1_cur_luma_recon,
9086
1.91M
                    recon_luma_strd,
9087
1.91M
                    pu1_final_ecd_data,
9088
1.91M
                    trans_size,
9089
1.91M
                    packed_pred_mode,
9090
1.91M
                    ps_tu->b1_y_cbf,
9091
1.91M
                    zero_cols,
9092
1.91M
                    zero_rows);
9093
1.91M
            }
9094
2.34M
            else if(
9095
2.34M
                ps_recon_datastore->u1_is_lumaRecon_available &&
9096
2.34M
                (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
9097
2.34M
            {
9098
2.34M
                UWORD8 *pu1_recon_src =
9099
2.34M
                    ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
9100
2.34M
                         [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
9101
2.34M
                    cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
9102
9103
2.34M
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
9104
2.34M
                    pu1_cur_luma_recon,
9105
2.34M
                    recon_luma_strd,
9106
2.34M
                    pu1_recon_src,
9107
2.34M
                    ps_recon_datastore->i4_lumaRecon_stride,
9108
2.34M
                    trans_size,
9109
2.34M
                    trans_size);
9110
2.34M
            }
9111
4.26M
        }
9112
9113
4.28M
        if(ps_prms->u1_will_cabac_state_change)
9114
4.28M
        {
9115
4.28M
            ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
9116
4.28M
        }
9117
9118
4.28M
        pu1_final_ecd_data += num_bytes;
9119
        /* update total bytes consumed */
9120
4.28M
        total_bytes += num_bytes;
9121
9122
4.28M
        u1_is_cu_coded |= ps_tu->b1_y_cbf;
9123
9124
        /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
9125
4.28M
        if(1 == chrm_present_flag)
9126
3.16M
        {
9127
3.16M
            pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
9128
3.16M
            pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
9129
3.16M
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
9130
9131
3.16M
            pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
9132
3.16M
            pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
9133
3.16M
                                 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
9134
9135
3.16M
            pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
9136
3.16M
            pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
9137
3.16M
                                    (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
9138
9139
3.16M
            pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
9140
3.16M
            pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
9141
3.16M
                                     (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
9142
9143
3.16M
            if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
9144
0
               (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
9145
0
               (PRED_MODE_INTRA != packed_pred_mode))
9146
0
            {
9147
0
                WORD32 i4_num_bytes;
9148
0
                UWORD8 *pu1_chroma_pred;
9149
0
                UWORD8 *pu1_chroma_recon;
9150
0
                WORD16 *pi2_chroma_deq;
9151
0
                UWORD32 u4_zero_col;
9152
0
                UWORD32 u4_zero_row;
9153
9154
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9155
0
                {
9156
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9157
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9158
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9159
9160
0
                    if(0 == u1_is_422)
9161
0
                    {
9162
0
                        i4_subtu_pos_y >>= 1;
9163
0
                    }
9164
9165
0
                    pu1_chroma_pred =
9166
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9167
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9168
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9169
0
                    pi2_chroma_deq =
9170
0
                        pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
9171
9172
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9173
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9174
9175
0
                    if(ps_prms->u1_will_cabac_state_change)
9176
0
                    {
9177
0
                        i4_num_bytes =
9178
0
                            ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9179
0
                    }
9180
0
                    else
9181
0
                    {
9182
0
                        i4_num_bytes = 0;
9183
0
                    }
9184
9185
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9186
9187
0
                    pu1_old_ecd_data += i4_num_bytes;
9188
9189
0
                    au1_is_recon_available[U_PLANE] = 0;
9190
9191
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9192
0
                       (!u1_compute_spatial_ssd_chroma ||
9193
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9194
0
                    {
9195
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9196
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9197
0
                            (UCHAR_MAX ==
9198
0
                             ps_recon_datastore
9199
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9200
0
                        {
9201
0
                            ihevce_chroma_it_recon_fxn(
9202
0
                                ps_ctxt,
9203
0
                                pi2_chroma_deq,
9204
0
                                cu_size,
9205
0
                                pu1_chroma_pred,
9206
0
                                pred_chrm_strd,
9207
0
                                pu1_chroma_recon,
9208
0
                                recon_chrma_strd,
9209
0
                                pu1_final_ecd_data,
9210
0
                                chroma_trans_size,
9211
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9212
0
                                u4_zero_col,
9213
0
                                u4_zero_row,
9214
0
                                U_PLANE);
9215
0
                        }
9216
0
                        else if(
9217
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9218
0
                            (UCHAR_MAX !=
9219
0
                             ps_recon_datastore
9220
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9221
0
                        {
9222
0
                            UWORD8 *pu1_recon_src =
9223
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9224
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9225
0
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9226
0
                                i4_subtu_pos_x +
9227
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9228
9229
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9230
0
                                pu1_recon_src,
9231
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9232
0
                                pu1_chroma_recon,
9233
0
                                recon_chrma_strd,
9234
0
                                chroma_trans_size,
9235
0
                                chroma_trans_size,
9236
0
                                U_PLANE);
9237
0
                        }
9238
0
                    }
9239
9240
0
                    u1_is_cu_coded |=
9241
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9242
9243
0
                    pu1_final_ecd_data += i4_num_bytes;
9244
0
                    total_bytes += i4_num_bytes;
9245
0
                }
9246
9247
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9248
0
                {
9249
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9250
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9251
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9252
9253
0
                    if(0 == u1_is_422)
9254
0
                    {
9255
0
                        i4_subtu_pos_y >>= 1;
9256
0
                    }
9257
9258
0
                    pu1_chroma_pred =
9259
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9260
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9261
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9262
0
                    pi2_chroma_deq = pi2_cur_deq_data_chrm +
9263
0
                                     (i4_subtu_idx * chroma_trans_size * cu_size) +
9264
0
                                     chroma_trans_size;
9265
9266
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9267
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9268
9269
0
                    if(ps_prms->u1_will_cabac_state_change)
9270
0
                    {
9271
0
                        i4_num_bytes =
9272
0
                            ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9273
0
                    }
9274
0
                    else
9275
0
                    {
9276
0
                        i4_num_bytes = 0;
9277
0
                    }
9278
9279
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9280
9281
0
                    pu1_old_ecd_data += i4_num_bytes;
9282
9283
0
                    au1_is_recon_available[V_PLANE] = 0;
9284
9285
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9286
0
                       (!u1_compute_spatial_ssd_chroma ||
9287
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9288
0
                    {
9289
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9290
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9291
0
                            (UCHAR_MAX ==
9292
0
                             ps_recon_datastore
9293
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9294
0
                        {
9295
0
                            ihevce_chroma_it_recon_fxn(
9296
0
                                ps_ctxt,
9297
0
                                pi2_chroma_deq,
9298
0
                                cu_size,
9299
0
                                pu1_chroma_pred,
9300
0
                                pred_chrm_strd,
9301
0
                                pu1_chroma_recon,
9302
0
                                recon_chrma_strd,
9303
0
                                pu1_final_ecd_data,
9304
0
                                chroma_trans_size,
9305
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9306
0
                                u4_zero_col,
9307
0
                                u4_zero_row,
9308
0
                                V_PLANE);
9309
0
                        }
9310
0
                        else if(
9311
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9312
0
                            (UCHAR_MAX !=
9313
0
                             ps_recon_datastore
9314
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9315
0
                        {
9316
0
                            UWORD8 *pu1_recon_src =
9317
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9318
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9319
0
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9320
0
                                i4_subtu_pos_x +
9321
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9322
9323
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9324
0
                                pu1_recon_src,
9325
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9326
0
                                pu1_chroma_recon,
9327
0
                                recon_chrma_strd,
9328
0
                                chroma_trans_size,
9329
0
                                chroma_trans_size,
9330
0
                                V_PLANE);
9331
0
                        }
9332
0
                    }
9333
9334
0
                    u1_is_cu_coded |=
9335
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9336
9337
0
                    pu1_final_ecd_data += i4_num_bytes;
9338
0
                    total_bytes += i4_num_bytes;
9339
0
                }
9340
0
            }
9341
3.16M
            else
9342
3.16M
            {
9343
3.16M
                WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
9344
9345
6.33M
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9346
3.16M
                {
9347
3.16M
                    WORD32 cb_cbf, cr_cbf;
9348
3.16M
                    WORD32 cb_num_bytes, cr_num_bytes;
9349
9350
3.16M
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9351
9352
3.16M
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9353
3.16M
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9354
9355
3.16M
                    if(0 == u1_is_422)
9356
3.16M
                    {
9357
3.16M
                        i4_subtu_pos_y >>= 1;
9358
3.16M
                    }
9359
9360
3.16M
                    pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
9361
3.16M
                    pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9362
3.16M
                    pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9363
3.16M
                    pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
9364
9365
3.16M
                    if((PRED_MODE_INTRA == packed_pred_mode) &&
9366
1.63M
                       (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
9367
401k
                    {
9368
401k
                        WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
9369
401k
                        UWORD8 *pu1_left_chrm;
9370
401k
                        UWORD8 *pu1_top_chrm;
9371
401k
                        UWORD8 *pu1_top_left_chrm;
9372
9373
401k
                        nbr_flags = ihevce_get_intra_chroma_tu_nbr(
9374
401k
                            *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
9375
9376
                        /* left cu boundary */
9377
401k
                        if(0 == i4_subtu_pos_x)
9378
341k
                        {
9379
341k
                            left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
9380
341k
                            pu1_left_chrm =
9381
341k
                                ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
9382
341k
                        }
9383
59.3k
                        else
9384
59.3k
                        {
9385
59.3k
                            pu1_left_chrm = pu1_cur_chroma_recon - 2;
9386
59.3k
                            left_strd_chrm = recon_chrma_strd;
9387
59.3k
                        }
9388
9389
                        /* top cu boundary */
9390
401k
                        if(0 == i4_subtu_pos_y)
9391
341k
                        {
9392
341k
                            pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
9393
341k
                        }
9394
59.3k
                        else
9395
59.3k
                        {
9396
59.3k
                            pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
9397
59.3k
                        }
9398
9399
                        /* by default top left is set to cu top left */
9400
401k
                        pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
9401
9402
                        /* top left based on position */
9403
401k
                        if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
9404
29.6k
                        {
9405
29.6k
                            pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
9406
29.6k
                        }
9407
371k
                        else if(0 != i4_subtu_pos_x)
9408
59.3k
                        {
9409
59.3k
                            pu1_top_left_chrm = pu1_top_chrm - 2;
9410
59.3k
                        }
9411
9412
                        /* call the chroma reference array substitution */
9413
401k
                        ihevc_intra_pred_chroma_ref_substitution_fptr(
9414
401k
                            pu1_top_left_chrm,
9415
401k
                            pu1_top_chrm,
9416
401k
                            pu1_left_chrm,
9417
401k
                            left_strd_chrm,
9418
401k
                            chroma_trans_size,
9419
401k
                            nbr_flags,
9420
401k
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9421
401k
                            1);
9422
9423
                        /* use the look up to get the function idx */
9424
401k
                        chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
9425
9426
                        /* call the intra prediction function */
9427
401k
                        ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
9428
401k
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9429
401k
                            1,
9430
401k
                            pu1_cur_pred_chrm,
9431
401k
                            pred_chrm_strd,
9432
401k
                            chroma_trans_size,
9433
401k
                            chroma_pred_mode);
9434
401k
                    }
9435
9436
                    /**---------- Compute iq&coeff data if required : Chroma ------------**/
9437
3.16M
                    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
9438
799k
                    {
9439
799k
                        WORD32 perform_sbh, perform_rdoq, temp_bits;
9440
9441
799k
                        if(ps_prms->u1_recompute_sbh_and_rdoq)
9442
0
                        {
9443
0
                            perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
9444
0
                            perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
9445
0
                        }
9446
799k
                        else
9447
799k
                        {
9448
                            /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
9449
799k
                            perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
9450
                            /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
9451
                        we would have to do RDOQ again.*/
9452
799k
                            perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
9453
799k
                        }
9454
9455
                        /* populate the coeffs scan idx */
9456
799k
                        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
9457
9458
799k
                        if(PRED_MODE_INTRA == packed_pred_mode)
9459
401k
                        {
9460
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
9461
401k
                            if(4 == chroma_trans_size)
9462
217k
                            {
9463
                                /* for modes from 22 upto 30 horizontal scan is used */
9464
217k
                                if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
9465
22.6k
                                {
9466
22.6k
                                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
9467
22.6k
                                }
9468
                                /* for modes from 6 upto 14 horizontal scan is used */
9469
194k
                                else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
9470
70.4k
                                {
9471
70.4k
                                    ps_ctxt->i4_scan_idx = SCAN_VERT;
9472
70.4k
                                }
9473
217k
                            }
9474
401k
                        }
9475
9476
#if DISABLE_RDOQ_INTRA
9477
                        if(PRED_MODE_INTRA == packed_pred_mode)
9478
                        {
9479
                            perform_rdoq = 0;
9480
                        }
9481
#endif
9482
9483
                        /* RDOPT copy States :  TU init (best until prev TU) to current */
9484
799k
                        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9485
799k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9486
799k
                                    .s_cabac_ctxt.au1_ctxt_models[0] +
9487
799k
                                IHEVC_CAB_COEFFX_PREFIX,
9488
799k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9489
799k
                            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9490
9491
799k
                        ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
9492
                        /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
9493
                    so that all candidates and best candidate are quantized with same rounding factor  */
9494
799k
                        if(1 == perform_rdoq)
9495
0
                        {
9496
0
                            ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
9497
0
                        }
9498
9499
799k
                        if(!ps_best_cu_prms->u1_skip_flag ||
9500
4.30k
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9501
799k
                        {
9502
                            /* Cb */
9503
799k
                            cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9504
799k
                                ps_ctxt,
9505
799k
                                pu1_cur_pred_chrm,
9506
799k
                                pred_chrm_strd,
9507
799k
                                pu1_cur_src_chrm,
9508
799k
                                src_chrm_strd,
9509
799k
                                pi2_cur_deq_data_chrm,
9510
799k
                                cu_size,
9511
799k
                                pu1_chrm_recon,
9512
799k
                                recon_chrma_strd,
9513
799k
                                pu1_final_ecd_data,
9514
799k
                                pu1_csbf_buf,
9515
799k
                                csbf_strd,
9516
799k
                                chroma_trans_size,
9517
799k
                                ps_ctxt->i4_scan_idx,
9518
799k
                                (PRED_MODE_INTRA == packed_pred_mode),
9519
799k
                                &cb_num_bytes,
9520
799k
                                &temp_bits,
9521
799k
                                &cb_zero_col,
9522
799k
                                &cb_zero_row,
9523
799k
                                &au1_is_recon_available[U_PLANE],
9524
799k
                                perform_sbh,
9525
799k
                                perform_rdoq,
9526
799k
                                &i8_ssd,
9527
799k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9528
799k
                                !ps_ctxt->u1_is_refPic
9529
799k
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9530
799k
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9531
764k
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9532
764k
                                          100.0,
9533
799k
                                ps_prms->u1_is_cu_noisy,
9534
799k
#endif
9535
799k
                                ps_best_cu_prms->u1_skip_flag &&
9536
4.30k
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9537
799k
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9538
799k
                                                              : FREQUENCY_DOMAIN_SSD,
9539
799k
                                U_PLANE);
9540
799k
                        }
9541
0
                        else
9542
0
                        {
9543
0
                            cb_cbf = 0;
9544
0
                            temp_bits = 0;
9545
0
                            cb_num_bytes = 0;
9546
0
                            au1_is_recon_available[U_PLANE] = 0;
9547
0
                            cb_zero_col = 0;
9548
0
                            cb_zero_row = 0;
9549
0
                        }
9550
9551
                        /* Accumulate chroma residual bits */
9552
799k
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9553
9554
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9555
799k
                        if(0 != cb_cbf)
9556
209k
                        {
9557
209k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9558
209k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9559
209k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9560
209k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9561
209k
                                    IHEVC_CAB_COEFFX_PREFIX,
9562
209k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9563
209k
                        }
9564
                        /* RDOPT copy States :  Restoring back the Cb init state to Cr */
9565
589k
                        else
9566
589k
                        {
9567
589k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9568
589k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9569
589k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9570
589k
                                    IHEVC_CAB_COEFFX_PREFIX,
9571
589k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9572
589k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9573
589k
                        }
9574
9575
799k
                        if(!ps_best_cu_prms->u1_skip_flag ||
9576
4.30k
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9577
799k
                        {
9578
                            /* Cr */
9579
799k
                            cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9580
799k
                                ps_ctxt,
9581
799k
                                pu1_cur_pred_chrm,
9582
799k
                                pred_chrm_strd,
9583
799k
                                pu1_cur_src_chrm,
9584
799k
                                src_chrm_strd,
9585
799k
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9586
799k
                                cu_size,
9587
799k
                                pu1_chrm_recon,
9588
799k
                                recon_chrma_strd,
9589
799k
                                pu1_final_ecd_data + cb_num_bytes,
9590
799k
                                pu1_csbf_buf,
9591
799k
                                csbf_strd,
9592
799k
                                chroma_trans_size,
9593
799k
                                ps_ctxt->i4_scan_idx,
9594
799k
                                (PRED_MODE_INTRA == packed_pred_mode),
9595
799k
                                &cr_num_bytes,
9596
799k
                                &temp_bits,
9597
799k
                                &cr_zero_col,
9598
799k
                                &cr_zero_row,
9599
799k
                                &au1_is_recon_available[V_PLANE],
9600
799k
                                perform_sbh,
9601
799k
                                perform_rdoq,
9602
799k
                                &i8_ssd,
9603
799k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9604
799k
                                !ps_ctxt->u1_is_refPic
9605
799k
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9606
799k
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9607
764k
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9608
764k
                                          100.0,
9609
799k
                                ps_prms->u1_is_cu_noisy,
9610
799k
#endif
9611
799k
                                ps_best_cu_prms->u1_skip_flag &&
9612
4.30k
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9613
799k
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9614
799k
                                                              : FREQUENCY_DOMAIN_SSD,
9615
799k
                                V_PLANE);
9616
799k
                        }
9617
0
                        else
9618
0
                        {
9619
0
                            cr_cbf = 0;
9620
0
                            temp_bits = 0;
9621
0
                            cr_num_bytes = 0;
9622
0
                            au1_is_recon_available[V_PLANE] = 0;
9623
0
                            cr_zero_col = 0;
9624
0
                            cr_zero_row = 0;
9625
0
                        }
9626
9627
                        /* Accumulate chroma residual bits */
9628
799k
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9629
9630
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9631
799k
                        if(0 != cr_cbf)
9632
205k
                        {
9633
205k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9634
205k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9635
205k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9636
205k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9637
205k
                                    IHEVC_CAB_COEFFX_PREFIX,
9638
205k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9639
205k
                        }
9640
9641
799k
                        if(0 == i4_subtu_idx)
9642
799k
                        {
9643
799k
                            ps_tu->b1_cb_cbf = cb_cbf;
9644
799k
                            ps_tu->b1_cr_cbf = cr_cbf;
9645
799k
                        }
9646
0
                        else
9647
0
                        {
9648
0
                            ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
9649
0
                            ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
9650
0
                        }
9651
799k
                    }
9652
2.36M
                    else
9653
2.36M
                    {
9654
2.36M
                        cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9655
2.36M
                        cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9656
2.36M
                        cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9657
2.36M
                        cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9658
9659
2.36M
                        if(ps_prms->u1_will_cabac_state_change)
9660
2.36M
                        {
9661
2.36M
                            cb_num_bytes =
9662
2.36M
                                ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9663
2.36M
                        }
9664
0
                        else
9665
0
                        {
9666
0
                            cb_num_bytes = 0;
9667
0
                        }
9668
9669
2.36M
                        if(ps_prms->u1_will_cabac_state_change)
9670
2.36M
                        {
9671
2.36M
                            cr_num_bytes =
9672
2.36M
                                ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9673
2.36M
                        }
9674
0
                        else
9675
0
                        {
9676
0
                            cr_num_bytes = 0;
9677
0
                        }
9678
9679
                        /* copy cb ecd data to final buffer */
9680
2.36M
                        memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
9681
9682
2.36M
                        pu1_chrm_old_ecd_data += cb_num_bytes;
9683
9684
                        /* copy cb ecd data to final buffer */
9685
2.36M
                        memcpy(
9686
2.36M
                            (pu1_final_ecd_data + cb_num_bytes),
9687
2.36M
                            pu1_chrm_old_ecd_data,
9688
2.36M
                            cr_num_bytes);
9689
9690
2.36M
                        pu1_chrm_old_ecd_data += cr_num_bytes;
9691
9692
2.36M
                        au1_is_recon_available[U_PLANE] = 0;
9693
2.36M
                        au1_is_recon_available[V_PLANE] = 0;
9694
2.36M
                    }
9695
9696
                    /**-------- Compute Recon data (Do IT & Recon) : Chroma  -----------**/
9697
3.16M
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9698
3.14M
                       (!u1_compute_spatial_ssd_chroma ||
9699
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9700
3.14M
                    {
9701
3.14M
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9702
956k
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9703
956k
                            (UCHAR_MAX ==
9704
956k
                             ps_recon_datastore
9705
956k
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9706
2.98M
                        {
9707
2.98M
                            ihevce_chroma_it_recon_fxn(
9708
2.98M
                                ps_ctxt,
9709
2.98M
                                pi2_cur_deq_data_chrm,
9710
2.98M
                                cu_size,
9711
2.98M
                                pu1_cur_pred_chrm,
9712
2.98M
                                pred_chrm_strd,
9713
2.98M
                                pu1_cur_chroma_recon,
9714
2.98M
                                recon_chrma_strd,
9715
2.98M
                                pu1_final_ecd_data,
9716
2.98M
                                chroma_trans_size,
9717
2.98M
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9718
2.98M
                                cb_zero_col,
9719
2.98M
                                cb_zero_row,
9720
2.98M
                                U_PLANE);
9721
2.98M
                        }
9722
162k
                        else if(
9723
162k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9724
162k
                            (UCHAR_MAX !=
9725
162k
                             ps_recon_datastore
9726
162k
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9727
162k
                        {
9728
162k
                            UWORD8 *pu1_recon_src =
9729
162k
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9730
162k
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9731
162k
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9732
162k
                                i4_subtu_pos_x +
9733
162k
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9734
9735
162k
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9736
162k
                                pu1_recon_src,
9737
162k
                                ps_recon_datastore->i4_lumaRecon_stride,
9738
162k
                                pu1_cur_chroma_recon,
9739
162k
                                recon_chrma_strd,
9740
162k
                                chroma_trans_size,
9741
162k
                                chroma_trans_size,
9742
162k
                                U_PLANE);
9743
162k
                        }
9744
3.14M
                    }
9745
9746
3.16M
                    u1_is_cu_coded |=
9747
3.16M
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9748
9749
3.16M
                    if(ps_prms->u1_will_cabac_state_change)
9750
3.16M
                    {
9751
3.16M
                        ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
9752
3.16M
                    }
9753
9754
3.16M
                    pu1_final_ecd_data += cb_num_bytes;
9755
                    /* update total bytes consumed */
9756
3.16M
                    total_bytes += cb_num_bytes;
9757
9758
3.16M
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9759
3.14M
                       (!u1_compute_spatial_ssd_chroma ||
9760
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9761
3.14M
                    {
9762
3.14M
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9763
956k
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9764
956k
                            (UCHAR_MAX ==
9765
956k
                             ps_recon_datastore
9766
956k
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9767
2.98M
                        {
9768
2.98M
                            ihevce_chroma_it_recon_fxn(
9769
2.98M
                                ps_ctxt,
9770
2.98M
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9771
2.98M
                                cu_size,
9772
2.98M
                                pu1_cur_pred_chrm,
9773
2.98M
                                pred_chrm_strd,
9774
2.98M
                                pu1_cur_chroma_recon,
9775
2.98M
                                recon_chrma_strd,
9776
2.98M
                                pu1_final_ecd_data,
9777
2.98M
                                chroma_trans_size,
9778
2.98M
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9779
2.98M
                                cr_zero_col,
9780
2.98M
                                cr_zero_row,
9781
2.98M
                                V_PLANE);
9782
2.98M
                        }
9783
162k
                        else if(
9784
162k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9785
162k
                            (UCHAR_MAX !=
9786
162k
                             ps_recon_datastore
9787
162k
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9788
162k
                        {
9789
162k
                            UWORD8 *pu1_recon_src =
9790
162k
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9791
162k
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9792
162k
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9793
162k
                                i4_subtu_pos_x +
9794
162k
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9795
9796
162k
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9797
162k
                                pu1_recon_src,
9798
162k
                                ps_recon_datastore->i4_lumaRecon_stride,
9799
162k
                                pu1_cur_chroma_recon,
9800
162k
                                recon_chrma_strd,
9801
162k
                                chroma_trans_size,
9802
162k
                                chroma_trans_size,
9803
162k
                                V_PLANE);
9804
162k
                        }
9805
3.14M
                    }
9806
9807
3.16M
                    u1_is_cu_coded |=
9808
3.16M
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9809
9810
3.16M
                    if(ps_prms->u1_will_cabac_state_change)
9811
3.16M
                    {
9812
3.16M
                        ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
9813
3.16M
                    }
9814
9815
3.16M
                    pu1_final_ecd_data += cr_num_bytes;
9816
                    /* update total bytes consumed */
9817
3.16M
                    total_bytes += cr_num_bytes;
9818
3.16M
                }
9819
3.16M
            }
9820
3.16M
        }
9821
1.12M
        else
9822
1.12M
        {
9823
1.12M
            ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
9824
1.12M
            ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
9825
1.12M
            ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
9826
1.12M
            ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
9827
1.12M
            ps_tu->b1_cb_cbf = 0;
9828
1.12M
            ps_tu->b1_cr_cbf = 0;
9829
1.12M
            ps_tu->b1_cb_cbf_subtu1 = 0;
9830
1.12M
            ps_tu->b1_cr_cbf_subtu1 = 0;
9831
1.12M
        }
9832
9833
        /* Update to next TU */
9834
4.28M
        ps_tu_enc_loop++;
9835
4.28M
        ps_tu_enc_loop_temp_prms++;
9836
9837
4.28M
        pu4_nbr_flags++;
9838
4.28M
        pu1_intra_pred_mode++;
9839
9840
        /*Do not set the nbr map for last pu in cu */
9841
4.28M
        if((num_tu_in_cu - 1) != ctr)
9842
1.91M
        {
9843
            /* set the neighbour map to 1 */
9844
1.91M
            ihevce_set_nbr_map(
9845
1.91M
                ps_ctxt->pu1_ctb_nbr_map,
9846
1.91M
                ps_ctxt->i4_nbr_map_strd,
9847
1.91M
                cu_pos_x_in_4x4,
9848
1.91M
                cu_pos_y_in_4x4,
9849
1.91M
                (trans_size >> 2),
9850
1.91M
                1);
9851
1.91M
        }
9852
4.28M
    }
9853
9854
2.37M
    if(ps_prms->u1_will_cabac_state_change)
9855
2.37M
    {
9856
2.37M
        ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
9857
9858
        /* Modify skip flag, if luma is skipped & Chroma is coded */
9859
2.37M
        if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
9860
1.65k
        {
9861
1.65k
            ps_best_cu_prms->u1_skip_flag = 0;
9862
1.65k
        }
9863
2.37M
    }
9864
9865
    /* during chroma evaluation if skip decision was over written     */
9866
    /* then the current skip candidate is set to a non skip candidate */
9867
2.37M
    if(PRED_MODE_INTRA != packed_pred_mode)
9868
1.06M
    {
9869
1.06M
        ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
9870
1.06M
    }
9871
9872
    /**------------- Compute header data if required --------------**/
9873
2.37M
    if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
9874
544k
    {
9875
544k
        WORD32 cbf_bits;
9876
544k
        WORD32 cu_bits;
9877
544k
        WORD32 unit_4x4_size = cu_size >> 2;
9878
9879
        /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
9880
        be copied as the base reference for the next cu
9881
        Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
9882
        luma and chroma are being reevaluated*/
9883
544k
        COPY_CABAC_STATES(
9884
544k
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9885
544k
                 .s_cabac_ctxt.au1_ctxt_models[0],
9886
544k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
9887
544k
            IHEVC_CAB_CTXT_END);
9888
9889
        /* get the neighbour availability flags for current cu  */
9890
544k
        ihevce_get_only_nbr_flag(
9891
544k
            &s_nbr,
9892
544k
            ps_ctxt->pu1_ctb_nbr_map,
9893
544k
            ps_ctxt->i4_nbr_map_strd,
9894
544k
            (cu_pos_x << 1),
9895
544k
            (cu_pos_y << 1),
9896
544k
            unit_4x4_size,
9897
544k
            unit_4x4_size);
9898
9899
544k
        cu_bits = ihevce_entropy_rdo_encode_cu(
9900
544k
            &ps_ctxt->s_rdopt_entropy_ctxt,
9901
544k
            ps_best_cu_prms,
9902
544k
            cu_pos_x,
9903
544k
            cu_pos_y,
9904
544k
            cu_size,
9905
544k
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
9906
544k
                                           : s_nbr.u1_top_avail,
9907
544k
            s_nbr.u1_left_avail,
9908
544k
            (pu1_final_ecd_data - total_bytes),
9909
544k
            &cbf_bits);
9910
9911
        /* cbf bits are excluded from header bits, instead considered as texture bits */
9912
544k
        ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
9913
544k
        ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
9914
544k
    }
9915
9916
2.37M
    if(ps_prms->u1_will_cabac_state_change)
9917
2.37M
    {
9918
2.37M
        ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
9919
2.37M
    }
9920
2.37M
}
9921
9922
/*!
9923
******************************************************************************
9924
* \if Function name : ihevce_set_eval_flags \endif
9925
*
9926
* \brief
9927
*    Function which decides which eval flags have to be set based on present
9928
*    and RDOQ conditions
9929
*
9930
* \param[in] ps_ctxt : encoder ctxt pointer
9931
* \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
9932
*
9933
* \return
9934
*    None
9935
*
9936
* \author
9937
*  Ittiam
9938
*
9939
*****************************************************************************
9940
*/
9941
void ihevce_set_eval_flags(
9942
    ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
9943
2.37M
{
9944
2.37M
    WORD32 count = 0;
9945
9946
2.37M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
9947
9948
2.37M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
9949
2.37M
        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
9950
9951
2.37M
    if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
9952
13.3k
    {
9953
13.3k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
9954
13.3k
    }
9955
2.36M
    else
9956
2.36M
    {
9957
2.36M
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
9958
2.36M
    }
9959
9960
2.37M
    if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
9961
2.37M
       (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
9962
0
    {
9963
        /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
9964
        RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
9965
        for the current CU will change. Therefore, we need to reevaluate the pred data*/
9966
0
        if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
9967
0
           (ps_enc_loop_bestprms->u1_intra_flag == 1))
9968
0
        {
9969
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
9970
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
9971
0
        }
9972
0
        if(ps_enc_loop_bestprms->u1_skip_flag == 1)
9973
0
        {
9974
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9975
0
            {
9976
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9977
0
                    .b1_eval_luma_iq_and_coeff_data = 0;
9978
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9979
0
                    .b1_eval_chroma_iq_and_coeff_data = 0;
9980
0
            }
9981
0
        }
9982
0
        else
9983
0
        {
9984
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9985
0
            {
9986
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9987
0
                    .b1_eval_luma_iq_and_coeff_data = 1;
9988
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9989
0
                    .b1_eval_chroma_iq_and_coeff_data = 1;
9990
0
            }
9991
0
        }
9992
0
    }
9993
2.37M
    else
9994
2.37M
    {
9995
2.37M
        switch(ps_ctxt->i4_quality_preset)
9996
2.37M
        {
9997
1.49M
        case IHEVCE_QUALITY_P0:
9998
1.61M
        case IHEVCE_QUALITY_P2:
9999
1.83M
        case IHEVCE_QUALITY_P3:
10000
1.83M
        {
10001
4.97M
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10002
3.14M
            {
10003
3.14M
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10004
3.14M
                    .b1_eval_luma_iq_and_coeff_data = 0;
10005
3.14M
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10006
3.14M
                    .b1_eval_chroma_iq_and_coeff_data =
10007
3.14M
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10008
3.14M
            }
10009
10010
1.83M
            break;
10011
1.61M
        }
10012
132k
        case IHEVCE_QUALITY_P4:
10013
242k
        case IHEVCE_QUALITY_P5:
10014
242k
        {
10015
777k
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10016
535k
            {
10017
535k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10018
535k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10019
535k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10020
535k
                    .b1_eval_chroma_iq_and_coeff_data =
10021
535k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10022
535k
            }
10023
10024
242k
            break;
10025
132k
        }
10026
302k
        case IHEVCE_QUALITY_P6:
10027
302k
        {
10028
909k
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10029
607k
            {
10030
607k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10031
607k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10032
607k
#if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
10033
607k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10034
607k
                    .b1_eval_chroma_iq_and_coeff_data =
10035
607k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10036
#else
10037
                if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
10038
                   (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
10039
                {
10040
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10041
                        .b1_eval_chroma_iq_and_coeff_data =
10042
                        ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
10043
                }
10044
                else
10045
                {
10046
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10047
                        .b1_eval_chroma_iq_and_coeff_data =
10048
                        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10049
                }
10050
#endif
10051
607k
            }
10052
10053
302k
            break;
10054
132k
        }
10055
0
        default:
10056
0
        {
10057
0
            break;
10058
132k
        }
10059
2.37M
        }
10060
2.37M
    }
10061
10062
    /* Not recomputing Luma pred-data and header data for any preset now */
10063
2.37M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
10064
2.37M
}
10065
10066
/**
10067
******************************************************************************
10068
*
10069
*  @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
10070
*         (not coded children) into a parent node(not coded).
10071
*
10072
*  @par   Description
10073
*         This is required post RDO evaluation as TU decisions are
10074
*         pre-determined(pre RDO) based on recursive SATD,
10075
*         while the quad children TU's can be skipped during RDO
10076
*
10077
*         The shrink process is applied iteratively till there are no
10078
*         more modes to shrink
10079
*
10080
*  @param[inout]   ps_tu_enc_loop
10081
*       pointer to tu enc loop params of inter cu
10082
*
10083
*  @param[inout]   ps_tu_enc_loop_temp_prms
10084
*       pointer to temp tu enc loop params of inter cu
10085
*
10086
*  @param[in]   num_tu_in_cu
10087
*       number of tus in cu
10088
*
10089
*  @return      modified number of tus in cu
10090
*
10091
******************************************************************************
10092
*/
10093
WORD32 ihevce_shrink_inter_tu_tree(
10094
    tu_enc_loop_out_t *ps_tu_enc_loop,
10095
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
10096
    recon_datastore_t *ps_recon_datastore,
10097
    WORD32 num_tu_in_cu,
10098
    UWORD8 u1_is_422)
10099
408k
{
10100
408k
    WORD32 recurse = 1;
10101
408k
    WORD32 ctr;
10102
10103
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10104
    /* Post RDO, if all 4 child nodes are not coded the overheads of split TU    */
10105
    /* flags and cbf flags are saved by merging to parent node and marking       */
10106
    /* parent TU as not coded                                                    */
10107
    /*                                                                           */
10108
    /*                               ParentTUSplit=1                             */
10109
    /*                                      |                                    */
10110
    /*       ---------------------------------------------------------           */
10111
    /*       |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded)      */
10112
    /*                                     ||                                    */
10113
    /*                                     \/                                    */
10114
    /*                                                                           */
10115
    /*                              ParentTUSplit=0 (Not Coded)                  */
10116
    /*                                                                           */
10117
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10118
522k
    while((num_tu_in_cu > 4) && recurse)
10119
114k
    {
10120
114k
        recurse = 0;
10121
10122
        /* Validate inter CU */
10123
        //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
10124
10125
        /* loop for all tu blocks in current cu */
10126
778k
        for(ctr = 0; ctr < num_tu_in_cu;)
10127
664k
        {
10128
            /* Get current tu posx, posy and size */
10129
664k
            WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
10130
664k
            WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
10131
            /* +1 is for parents size */
10132
664k
            WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
10133
10134
            /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
10135
664k
            WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
10136
664k
            eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
10137
10138
            /* As TUs are published in encode order (Z SCAN),                      */
10139
            /* Four consecutive TUS of same size implies we have hit leaf nodes.   */
10140
664k
            if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
10141
469k
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
10142
326k
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
10143
218k
               eval_merge)
10144
187k
            {
10145
187k
                WORD32 merge_parent = 1;
10146
10147
                /* If any leaf noded is coded, it cannot be merged to parent */
10148
187k
                if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
10149
18.5k
                   (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
10150
10151
16.0k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
10152
12.4k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
10153
12.4k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
10154
10155
12.3k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
10156
8.48k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
10157
8.43k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
10158
10159
8.38k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
10160
7.18k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
10161
7.17k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
10162
180k
                {
10163
180k
                    merge_parent = 0;
10164
180k
                }
10165
10166
187k
                if(u1_is_422)
10167
0
                {
10168
0
                    if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
10169
0
                       (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
10170
10171
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
10172
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
10173
10174
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
10175
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
10176
10177
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
10178
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
10179
0
                    {
10180
0
                        merge_parent = 0;
10181
0
                    }
10182
0
                }
10183
10184
187k
                if(merge_parent)
10185
7.16k
                {
10186
                    /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
10187
10188
7.16k
                    if(ps_recon_datastore->u1_is_lumaRecon_available)
10189
167
                    {
10190
167
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
10191
10192
167
                        memmove(
10193
167
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
10194
167
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
10195
167
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10196
167
                    }
10197
10198
7.16k
                    if(ps_recon_datastore->au1_is_chromaRecon_available[0])
10199
167
                    {
10200
167
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
10201
167
                            UCHAR_MAX;
10202
167
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
10203
167
                            UCHAR_MAX;
10204
10205
167
                        memmove(
10206
167
                            &ps_recon_datastore
10207
167
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
10208
167
                            &ps_recon_datastore
10209
167
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
10210
167
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10211
10212
167
                        memmove(
10213
167
                            &ps_recon_datastore
10214
167
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
10215
167
                            &ps_recon_datastore
10216
167
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
10217
167
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10218
10219
167
                        if(u1_is_422)
10220
0
                        {
10221
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
10222
0
                                UCHAR_MAX;
10223
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
10224
0
                                UCHAR_MAX;
10225
10226
0
                            memmove(
10227
0
                                &ps_recon_datastore
10228
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
10229
0
                                &ps_recon_datastore
10230
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
10231
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10232
10233
0
                            memmove(
10234
0
                                &ps_recon_datastore
10235
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
10236
0
                                &ps_recon_datastore
10237
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
10238
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10239
0
                        }
10240
167
                    }
10241
10242
                    /* Parent node size is one more than that of child */
10243
7.16k
                    ps_tu_enc_loop[ctr].s_tu.b3_size++;
10244
10245
7.16k
                    ctr++;
10246
10247
                    /* move the subsequent TUs to next element */
10248
7.16k
                    ASSERT(num_tu_in_cu >= (ctr + 3));
10249
7.16k
                    memmove(
10250
7.16k
                        (void *)(ps_tu_enc_loop + ctr),
10251
7.16k
                        (void *)(ps_tu_enc_loop + ctr + 3),
10252
7.16k
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
10253
10254
                    /* Also memmove the temp TU params */
10255
7.16k
                    memmove(
10256
7.16k
                        (void *)(ps_tu_enc_loop_temp_prms + ctr),
10257
7.16k
                        (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
10258
7.16k
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
10259
10260
                    /* Number of TUs in CU are now less by 3 */
10261
7.16k
                    num_tu_in_cu -= 3;
10262
10263
                    /* Recurse again as new parent also be can be merged later */
10264
7.16k
                    recurse = 1;
10265
7.16k
                }
10266
180k
                else
10267
180k
                {
10268
                    /* Go to next set of leaf nodes */
10269
180k
                    ctr += 4;
10270
180k
                }
10271
187k
            }
10272
476k
            else
10273
476k
            {
10274
476k
                ctr++;
10275
476k
            }
10276
664k
        }
10277
114k
    }
10278
10279
    /* return the modified num TUs*/
10280
408k
    ASSERT(num_tu_in_cu > 0);
10281
408k
    return (num_tu_in_cu);
10282
408k
}
10283
10284
UWORD8 ihevce_intra_mode_nxn_hash_updater(
10285
    UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
10286
1.11M
{
10287
1.11M
    WORD32 i;
10288
1.11M
    WORD32 i4_mode;
10289
10290
4.45M
    for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
10291
3.34M
    {
10292
3.34M
        if(pu1_mode_array[i] < 35)
10293
3.34M
        {
10294
3.34M
            if(pu1_mode_array[i] != 0)
10295
2.66M
            {
10296
2.66M
                i4_mode = pu1_mode_array[i] - 1;
10297
10298
2.66M
                if(!pu1_hash_table[i4_mode])
10299
1.18M
                {
10300
1.18M
                    pu1_hash_table[i4_mode] = 1;
10301
1.18M
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10302
1.18M
                    u1_num_ipe_modes++;
10303
1.18M
                }
10304
2.66M
            }
10305
10306
3.34M
            if(pu1_mode_array[i] != 34)
10307
3.32M
            {
10308
3.32M
                i4_mode = pu1_mode_array[i] + 1;
10309
10310
3.32M
                if((!pu1_hash_table[i4_mode]))
10311
1.83M
                {
10312
1.83M
                    pu1_hash_table[i4_mode] = 1;
10313
1.83M
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10314
1.83M
                    u1_num_ipe_modes++;
10315
1.83M
                }
10316
3.32M
            }
10317
3.34M
        }
10318
3.34M
    }
10319
10320
1.11M
    if(!pu1_hash_table[INTRA_PLANAR])
10321
320k
    {
10322
320k
        pu1_hash_table[INTRA_PLANAR] = 1;
10323
320k
        pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
10324
320k
        u1_num_ipe_modes++;
10325
320k
    }
10326
10327
1.11M
    if(!pu1_hash_table[INTRA_DC])
10328
319k
    {
10329
319k
        pu1_hash_table[INTRA_DC] = 1;
10330
319k
        pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
10331
319k
        u1_num_ipe_modes++;
10332
319k
    }
10333
10334
1.11M
    return u1_num_ipe_modes;
10335
1.11M
}
10336
10337
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
10338
WORD32 ihevce_determine_tu_tree_distribution(
10339
    cu_inter_cand_t *ps_cu_data,
10340
    me_func_selector_t *ps_func_selector,
10341
    WORD16 *pi2_scratch_mem,
10342
    UWORD8 *pu1_inp,
10343
    WORD32 i4_inp_stride,
10344
    WORD32 i4_lambda,
10345
    UWORD8 u1_lambda_q_shift,
10346
    UWORD8 u1_cu_size,
10347
    UWORD8 u1_max_tr_depth)
10348
{
10349
    err_prms_t s_err_prms;
10350
10351
    PF_SAD_FXN_TU_REC pf_err_compute[4];
10352
10353
    WORD32 i4_satd;
10354
10355
    s_err_prms.pi4_sad_grid = &i4_satd;
10356
    s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
10357
    s_err_prms.pu1_inp = pu1_inp;
10358
    s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
10359
    s_err_prms.i4_inp_stride = i4_inp_stride;
10360
    s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
10361
    s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
10362
10363
    if(u1_cu_size == 64)
10364
    {
10365
        s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
10366
    }
10367
    else
10368
    {
10369
        s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
10370
    }
10371
10372
    pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
10373
    pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
10374
    pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
10375
    pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
10376
10377
    i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
10378
        &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
10379
10380
    if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
10381
    {
10382
        ps_cu_data->ai4_tu_split_flag[0] = 1;
10383
    }
10384
10385
    return i4_satd;
10386
}
10387
#endif
10388
10389
void ihevce_populate_nbr_4x4_with_pu_data(
10390
    nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
10391
554k
{
10392
554k
    WORD32 i, j;
10393
10394
554k
    nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
10395
10396
554k
    WORD32 ht = (ps_pu->b4_ht + 1);
10397
554k
    WORD32 wd = (ps_pu->b4_wd + 1);
10398
10399
554k
    ps_nbr_4x4->b1_intra_flag = 0;
10400
554k
    ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
10401
554k
    ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
10402
554k
    ps_nbr_4x4->mv = ps_pu->mv;
10403
10404
1.70M
    for(i = 0; i < ht; i++)
10405
1.15M
    {
10406
6.62M
        for(j = 0; j < wd; j++)
10407
5.46M
        {
10408
5.46M
            ps_tmp_4x4[j] = *ps_nbr_4x4;
10409
5.46M
        }
10410
10411
1.15M
        ps_tmp_4x4 += i4_nbr_buf_stride;
10412
1.15M
    }
10413
554k
}
10414
10415
void ihevce_call_luma_inter_pred_rdopt_pass1(
10416
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
10417
0
{
10418
0
    pu_t *ps_pu;
10419
0
    UWORD8 *pu1_pred;
10420
0
    WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
10421
0
    WORD32 inter_pu_wd, inter_pu_ht;
10422
10423
0
    pu1_pred = ps_inter_cand->pu1_pred_data_scr;
10424
0
    pred_stride = ps_inter_cand->i4_pred_data_stride;
10425
0
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
10426
10427
0
    for(ctr = 0; ctr < num_cu_part; ctr++)
10428
0
    {
10429
0
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
10430
10431
        /* IF AMP then each partitions can have diff wd ht */
10432
0
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
10433
0
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
10434
10435
0
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
10436
        //if(0 == skip_or_merge_flag)
10437
0
        {
10438
0
            ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
10439
0
        }
10440
0
        if((2 == num_cu_part) && (0 == ctr))
10441
0
        {
10442
            /* 2Nx__ partion case */
10443
0
            if(inter_pu_wd == cu_size)
10444
0
            {
10445
0
                pu1_pred += (inter_pu_ht * pred_stride);
10446
0
            }
10447
10448
            /* __x2N partion case */
10449
0
            if(inter_pu_ht == cu_size)
10450
0
            {
10451
0
                pu1_pred += inter_pu_wd;
10452
0
            }
10453
0
        }
10454
0
    }
10455
0
}
10456
10457
LWORD64 ihevce_it_recon_ssd(
10458
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10459
    UWORD8 *pu1_src,
10460
    WORD32 i4_src_strd,
10461
    UWORD8 *pu1_pred,
10462
    WORD32 i4_pred_strd,
10463
    WORD16 *pi2_deq_data,
10464
    WORD32 i4_deq_data_strd,
10465
    UWORD8 *pu1_recon,
10466
    WORD32 i4_recon_stride,
10467
    UWORD8 *pu1_ecd_data,
10468
    UWORD8 u1_trans_size,
10469
    UWORD8 u1_pred_mode,
10470
    WORD32 i4_cbf,
10471
    WORD32 i4_zero_col,
10472
    WORD32 i4_zero_row,
10473
    CHROMA_PLANE_ID_T e_chroma_plane)
10474
27.0M
{
10475
27.0M
    if(NULL_PLANE == e_chroma_plane)
10476
11.8M
    {
10477
11.8M
        ihevce_it_recon_fxn(
10478
11.8M
            ps_ctxt,
10479
11.8M
            pi2_deq_data,
10480
11.8M
            i4_deq_data_strd,
10481
11.8M
            pu1_pred,
10482
11.8M
            i4_pred_strd,
10483
11.8M
            pu1_recon,
10484
11.8M
            i4_recon_stride,
10485
11.8M
            pu1_ecd_data,
10486
11.8M
            u1_trans_size,
10487
11.8M
            u1_pred_mode,
10488
11.8M
            i4_cbf,
10489
11.8M
            i4_zero_col,
10490
11.8M
            i4_zero_row);
10491
10492
11.8M
        return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
10493
11.8M
            pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
10494
11.8M
            e_chroma_plane);
10495
11.8M
    }
10496
15.1M
    else
10497
15.1M
    {
10498
15.1M
        ihevce_chroma_it_recon_fxn(
10499
15.1M
            ps_ctxt,
10500
15.1M
            pi2_deq_data,
10501
15.1M
            i4_deq_data_strd,
10502
15.1M
            pu1_pred,
10503
15.1M
            i4_pred_strd,
10504
15.1M
            pu1_recon,
10505
15.1M
            i4_recon_stride,
10506
15.1M
            pu1_ecd_data,
10507
15.1M
            u1_trans_size,
10508
15.1M
            i4_cbf,
10509
15.1M
            i4_zero_col,
10510
15.1M
            i4_zero_row,
10511
15.1M
            e_chroma_plane);
10512
10513
15.1M
        return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10514
15.1M
            pu1_recon,
10515
15.1M
            pu1_src,
10516
15.1M
            i4_recon_stride,
10517
15.1M
            i4_src_strd,
10518
15.1M
            u1_trans_size,
10519
15.1M
            u1_trans_size,
10520
15.1M
            e_chroma_plane);
10521
15.1M
    }
10522
27.0M
}
10523
10524
/*!
10525
******************************************************************************
10526
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
10527
*
10528
* \brief
10529
*    Transform unit level (Chroma) enc_loop function
10530
*
10531
* \param[in] ps_ctxt    enc_loop module ctxt pointer
10532
* \param[in] pu1_pred       pointer to predicted data buffer
10533
* \param[in] pred_strd      predicted buffer stride
10534
* \param[in] pu1_src    pointer to source data buffer
10535
* \param[in] src_strd   source buffer stride
10536
* \param[in] pi2_deq_data   pointer to store iq data
10537
* \param[in] deq_data_strd  iq data buffer stride
10538
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
10539
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
10540
*                           block
10541
* \param[out] csbf_strd     csbf buffer stride
10542
* \param[in] trans_size     transform size (4, 8, 16)
10543
* \param[in] intra_flag     0:Inter/Skip 1:Intra
10544
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
10545
*                           coeff buffer
10546
the current TU in RDopt Mode
10547
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
10548
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
10549
*
10550
* \return
10551
*    CBF of the current block
10552
*
10553
* \author
10554
*  Ittiam
10555
*
10556
*****************************************************************************
10557
*/
10558
WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
10559
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10560
    UWORD8 *pu1_pred,
10561
    WORD32 pred_strd,
10562
    UWORD8 *pu1_src,
10563
    WORD32 src_strd,
10564
    WORD16 *pi2_deq_data,
10565
    WORD32 deq_data_strd,
10566
    UWORD8 *pu1_recon,
10567
    WORD32 i4_recon_stride,
10568
    UWORD8 *pu1_ecd_data,
10569
    UWORD8 *pu1_csbf_buf,
10570
    WORD32 csbf_strd,
10571
    WORD32 trans_size,
10572
    WORD32 i4_scan_idx,
10573
    WORD32 intra_flag,
10574
    WORD32 *pi4_coeff_off,
10575
    WORD32 *pi4_tu_bits,
10576
    WORD32 *pi4_zero_col,
10577
    WORD32 *pi4_zero_row,
10578
    UWORD8 *pu1_is_recon_available,
10579
    WORD32 i4_perform_sbh,
10580
    WORD32 i4_perform_rdoq,
10581
    LWORD64 *pi8_cost,
10582
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10583
    WORD32 i4_alpha_stim_multiplier,
10584
    UWORD8 u1_is_cu_noisy,
10585
#endif
10586
    UWORD8 u1_is_skip,
10587
    SSD_TYPE_T e_ssd_type,
10588
    CHROMA_PLANE_ID_T e_chroma_plane)
10589
27.7M
{
10590
27.7M
    WORD32 trans_idx, cbf, u4_blk_sad;
10591
27.7M
    WORD16 *pi2_quant_coeffs;
10592
27.7M
    WORD16 *pi2_trans_values;
10593
27.7M
    WORD32 quant_scale_mat_offset;
10594
27.7M
    WORD32 *pi4_trans_scratch;
10595
27.7M
    WORD32 *pi4_subBlock2csbfId_map = NULL;
10596
10597
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10598
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
10599
#endif
10600
10601
27.7M
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
10602
10603
27.7M
    WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
10604
21.2M
                             (!intra_flag && ENABLE_INTER_ZCU_COST);
10605
27.7M
    WORD32 i4_perform_coeff_level_rdoq =
10606
27.7M
        (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
10607
23.1M
        (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
10608
10609
27.7M
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
10610
27.7M
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
10611
10612
27.7M
    *pi4_coeff_off = 0;
10613
27.7M
    *pi4_tu_bits = 0;
10614
27.7M
    pu1_is_recon_available[0] = 0;
10615
10616
27.7M
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
10617
27.7M
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
10618
27.7M
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
10619
10620
27.7M
    if(2 == trans_size)
10621
0
    {
10622
0
        trans_size = 4;
10623
0
    }
10624
10625
    /* translate the transform size to index */
10626
27.7M
    trans_idx = trans_size >> 2;
10627
10628
27.7M
    if(16 == trans_size)
10629
3.93M
    {
10630
3.93M
        trans_idx = 3;
10631
3.93M
    }
10632
10633
27.7M
    if(u1_is_skip)
10634
0
    {
10635
0
        pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10636
0
            pu1_pred,
10637
0
            pu1_src,
10638
0
            pred_strd,
10639
0
            src_strd,
10640
0
            trans_size,
10641
0
            trans_size,
10642
0
            e_chroma_plane);
10643
10644
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10645
0
        {
10646
            /* buffer copy fromp pred to recon */
10647
0
            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
10648
0
                pu1_pred,
10649
0
                pred_strd,
10650
0
                pu1_recon,
10651
0
                i4_recon_stride,
10652
0
                trans_size,
10653
0
                trans_size,
10654
0
                e_chroma_plane);
10655
10656
0
            pu1_is_recon_available[0] = 1;
10657
0
        }
10658
10659
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10660
0
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
10661
0
        {
10662
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10663
0
                pu1_src,
10664
0
                src_strd,
10665
0
                pu1_pred,
10666
0
                pred_strd,
10667
0
                pi8_cost[0],
10668
0
                i4_alpha_stim_multiplier,
10669
0
                trans_size,
10670
0
                0,
10671
0
                ps_ctxt->u1_enable_psyRDOPT,
10672
0
                e_chroma_plane);
10673
0
        }
10674
0
#endif
10675
10676
0
#if ENABLE_INTER_ZCU_COST
10677
#if !WEIGH_CHROMA_COST
10678
        /* cbf = 0, accumulate cu not coded cost */
10679
        ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
10680
#else
10681
0
        ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
10682
0
                                          (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
10683
0
                                         CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
10684
0
#endif
10685
0
#endif
10686
10687
0
        return 0;
10688
0
    }
10689
10690
27.7M
    if(intra_flag == 1)
10691
21.2M
    {
10692
21.2M
        quant_scale_mat_offset = 0;
10693
10694
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10695
        ai4_quant_rounding_factors[0][0] =
10696
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
10697
10698
        for(i = 0; i < trans_size * trans_size; i++)
10699
        {
10700
            ai4_quant_rounding_factors[1][i] =
10701
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
10702
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10703
            ai4_quant_rounding_factors[2][i] =
10704
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
10705
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10706
        }
10707
#endif
10708
21.2M
    }
10709
6.43M
    else
10710
6.43M
    {
10711
6.43M
        quant_scale_mat_offset = NUM_TRANS_TYPES;
10712
6.43M
    }
10713
10714
27.7M
    switch(trans_size)
10715
27.7M
    {
10716
13.9M
    case 4:
10717
13.9M
    {
10718
13.9M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
10719
10720
13.9M
        break;
10721
0
    }
10722
9.83M
    case 8:
10723
9.83M
    {
10724
9.83M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
10725
10726
9.83M
        break;
10727
0
    }
10728
3.93M
    case 16:
10729
3.93M
    {
10730
3.93M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
10731
10732
3.93M
        break;
10733
0
    }
10734
0
    case 32:
10735
0
    {
10736
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
10737
10738
0
        break;
10739
0
    }
10740
27.7M
    }
10741
10742
    /* ---------- call residue and transform block ------- */
10743
27.7M
    u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
10744
27.7M
        pu1_src,
10745
27.7M
        pu1_pred,
10746
27.7M
        pi4_trans_scratch,
10747
27.7M
        pi2_trans_values,
10748
27.7M
        src_strd,
10749
27.7M
        pred_strd,
10750
27.7M
        trans_size,
10751
27.7M
        e_chroma_plane);
10752
27.7M
    (void)u4_blk_sad;
10753
    /* -------- calculate SSD calculation in Transform Domain ------ */
10754
10755
27.7M
    cbf = ps_ctxt->apf_quant_iquant_ssd
10756
27.7M
              [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
10757
10758
27.7M
          (pi2_trans_values,
10759
27.7M
           ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
10760
27.7M
           pi2_quant_coeffs,
10761
27.7M
           pi2_deq_data,
10762
27.7M
           trans_size,
10763
27.7M
           ps_ctxt->i4_chrm_cu_qp_div6,
10764
27.7M
           ps_ctxt->i4_chrm_cu_qp_mod6,
10765
27.7M
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10766
27.7M
           ps_ctxt->i4_quant_rnd_factor[intra_flag],
10767
27.7M
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10768
27.7M
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10769
#else
10770
           intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
10771
           intra_flag ? ai4_quant_rounding_factors[1]
10772
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10773
           intra_flag ? ai4_quant_rounding_factors[2]
10774
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10775
#endif
10776
27.7M
           trans_size,
10777
27.7M
           trans_size,
10778
27.7M
           deq_data_strd,
10779
27.7M
           pu1_csbf_buf,
10780
27.7M
           csbf_strd,
10781
27.7M
           pi4_zero_col,
10782
27.7M
           pi4_zero_row,
10783
27.7M
           ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
10784
27.7M
           pi8_cost);
10785
10786
27.7M
    if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
10787
15.1M
    {
10788
15.1M
        pi8_cost[0] = UINT_MAX;
10789
15.1M
    }
10790
10791
27.7M
    if(0 != cbf)
10792
4.22M
    {
10793
4.22M
        if(i4_perform_sbh || i4_perform_rdoq)
10794
3.05M
        {
10795
3.05M
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
10796
3.05M
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
10797
10798
3.05M
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
10799
3.05M
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
10800
3.05M
            ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
10801
3.05M
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
10802
3.05M
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
10803
10804
3.05M
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
10805
3.05M
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
10806
3.05M
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
10807
3.05M
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
10808
3.05M
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
10809
3.05M
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
10810
3.05M
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
10811
10812
3.05M
            if((!i4_perform_rdoq))
10813
1.51M
            {
10814
1.51M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10815
10816
1.51M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10817
1.51M
            }
10818
3.05M
        }
10819
10820
        /* ------- call coeffs scan function ------- */
10821
4.22M
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10822
4.22M
            pi2_quant_coeffs,
10823
4.22M
            pi4_subBlock2csbfId_map,
10824
4.22M
            i4_scan_idx,
10825
4.22M
            trans_size,
10826
4.22M
            pu1_ecd_data,
10827
4.22M
            pu1_csbf_buf,
10828
4.22M
            csbf_strd);
10829
4.22M
    }
10830
10831
    /*  Normalize Cost. Note : trans_idx, not (trans_idx-1) */
10832
27.7M
    pi8_cost[0] >>= ga_trans_shift[trans_idx];
10833
10834
27.7M
#if RDOPT_ZERO_CBF_ENABLE
10835
27.7M
    if((0 != cbf))
10836
4.22M
    {
10837
4.22M
        WORD32 tu_bits;
10838
4.22M
        LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
10839
10840
4.22M
        zero_cbf_cost_u = 0;
10841
10842
        /*Populating the feilds of rdoq_ctxt structure*/
10843
4.22M
        if(i4_perform_rdoq)
10844
1.54M
        {
10845
            //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
10846
            /* transform size to log2transform size */
10847
1.54M
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
10848
1.54M
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
10849
10850
1.54M
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
10851
1.54M
            ps_rdoq_sbh_ctxt->i4_is_luma = 0;
10852
1.54M
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
10853
1.54M
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
10854
1.54M
                (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
10855
1.54M
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
10856
1.54M
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
10857
1.54M
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
10858
1.54M
        }
10859
2.68M
        else if(i4_perform_zcbf)
10860
868k
        {
10861
            /* cost of zero cbf encoding */
10862
868k
            zero_cbf_cost_u =
10863
10864
868k
                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10865
868k
                    pu1_pred,
10866
868k
                    pu1_src,
10867
868k
                    pred_strd,
10868
868k
                    src_strd,
10869
868k
                    trans_size,
10870
868k
                    trans_size,
10871
868k
                    e_chroma_plane);
10872
868k
        }
10873
10874
        /************************************************************************/
10875
        /* call the entropy rdo encode to get the bit estimate for current tu   */
10876
        /* note that tu includes only residual coding bits and does not include */
10877
        /* tu split, cbf and qp delta encoding bits for a TU                    */
10878
        /************************************************************************/
10879
4.22M
        if(i4_perform_rdoq)
10880
1.54M
        {
10881
1.54M
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
10882
1.54M
                &ps_ctxt->s_rdopt_entropy_ctxt,
10883
1.54M
                pu1_ecd_data,
10884
1.54M
                trans_size,
10885
1.54M
                0,
10886
1.54M
                ps_rdoq_sbh_ctxt,
10887
1.54M
                pi8_cost,
10888
1.54M
                &zero_cbf_cost_u,
10889
1.54M
                0);
10890
            //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
10891
10892
1.54M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
10893
73.0k
            {
10894
73.0k
                cbf = 0;
10895
10896
                /* num bytes is set to 0 */
10897
73.0k
                *pi4_coeff_off = 0;
10898
73.0k
            }
10899
10900
1.54M
            (*pi4_tu_bits) += tu_bits;
10901
10902
1.54M
            if((i4_perform_sbh) && (0 != cbf))
10903
1.46M
            {
10904
1.46M
                ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
10905
10906
1.46M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10907
10908
1.46M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10909
1.46M
            }
10910
10911
            /*Add round value before normalizing*/
10912
1.54M
            pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
10913
1.54M
            pi8_cost[0] >>= ga_trans_shift[trans_idx];
10914
10915
1.54M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
10916
1.46M
            {
10917
1.46M
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10918
1.46M
                    pi2_quant_coeffs,
10919
1.46M
                    pi4_subBlock2csbfId_map,
10920
1.46M
                    i4_scan_idx,
10921
1.46M
                    trans_size,
10922
1.46M
                    pu1_ecd_data,
10923
1.46M
                    ps_rdoq_sbh_ctxt->pu1_csbf_buf,
10924
1.46M
                    csbf_strd);
10925
1.46M
            }
10926
1.54M
        }
10927
2.68M
        else
10928
2.68M
        {
10929
            /************************************************************************/
10930
            /* call the entropy rdo encode to get the bit estimate for current tu   */
10931
            /* note that tu includes only residual coding bits and does not include */
10932
            /* tu split, cbf and qp delta encoding bits for a TU                    */
10933
            /************************************************************************/
10934
2.68M
            tu_bits = ihevce_entropy_rdo_encode_tu(
10935
2.68M
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
10936
10937
2.68M
            (*pi4_tu_bits) += tu_bits;
10938
2.68M
        }
10939
10940
4.22M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10941
1.38M
        {
10942
1.38M
            pi8_cost[0] = ihevce_it_recon_ssd(
10943
1.38M
                ps_ctxt,
10944
1.38M
                pu1_src,
10945
1.38M
                src_strd,
10946
1.38M
                pu1_pred,
10947
1.38M
                pred_strd,
10948
1.38M
                pi2_deq_data,
10949
1.38M
                deq_data_strd,
10950
1.38M
                pu1_recon,
10951
1.38M
                i4_recon_stride,
10952
1.38M
                pu1_ecd_data,
10953
1.38M
                trans_size,
10954
1.38M
                PRED_MODE_INTRA,
10955
1.38M
                cbf,
10956
1.38M
                pi4_zero_col[0],
10957
1.38M
                pi4_zero_row[0],
10958
1.38M
                e_chroma_plane);
10959
10960
1.38M
            pu1_is_recon_available[0] = 1;
10961
1.38M
        }
10962
10963
4.22M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10964
4.22M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10965
0
        {
10966
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10967
0
                pu1_src,
10968
0
                src_strd,
10969
0
                pu1_recon,
10970
0
                i4_recon_stride,
10971
0
                pi8_cost[0],
10972
0
                i4_alpha_stim_multiplier,
10973
0
                trans_size,
10974
0
                0,
10975
0
                ps_ctxt->u1_enable_psyRDOPT,
10976
0
                e_chroma_plane);
10977
0
        }
10978
4.22M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10979
0
        {
10980
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10981
0
                pu1_src,
10982
0
                src_strd,
10983
0
                pu1_pred,
10984
0
                pred_strd,
10985
0
                pi8_cost[0],
10986
0
                i4_alpha_stim_multiplier,
10987
0
                trans_size,
10988
0
                0,
10989
0
                ps_ctxt->u1_enable_psyRDOPT,
10990
0
                e_chroma_plane);
10991
0
        }
10992
4.22M
#endif
10993
10994
4.22M
        curr_cb_cod_cost = pi8_cost[0];
10995
10996
        /* add the SSD cost to bits estimate given by ECD */
10997
4.22M
        curr_cb_cod_cost +=
10998
4.22M
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
10999
11000
4.22M
        if(i4_perform_zcbf)
11001
1.40M
        {
11002
1.40M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11003
1.40M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
11004
0
            {
11005
0
                zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
11006
0
                    pu1_src,
11007
0
                    src_strd,
11008
0
                    pu1_pred,
11009
0
                    pred_strd,
11010
0
                    zero_cbf_cost_u,
11011
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11012
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11013
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11014
0
                                                 100.0,
11015
0
                    trans_size,
11016
0
                    0,
11017
0
                    ps_ctxt->u1_enable_psyRDOPT,
11018
0
                    e_chroma_plane);
11019
0
            }
11020
1.40M
#endif
11021
            /* force the tu as zero cbf if zero_cbf_cost is lower */
11022
1.40M
            if(zero_cbf_cost_u < curr_cb_cod_cost)
11023
22.0k
            {
11024
22.0k
                *pi4_coeff_off = 0;
11025
22.0k
                cbf = 0;
11026
22.0k
                (*pi4_tu_bits) = 0;
11027
22.0k
                pi8_cost[0] = zero_cbf_cost_u;
11028
11029
22.0k
                pu1_is_recon_available[0] = 0;
11030
11031
22.0k
                if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11032
10.4k
                {
11033
10.4k
                    ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
11034
10.4k
                        pu1_pred,
11035
10.4k
                        pred_strd,
11036
10.4k
                        pu1_recon,
11037
10.4k
                        i4_recon_stride,
11038
10.4k
                        trans_size,
11039
10.4k
                        trans_size,
11040
10.4k
                        e_chroma_plane);
11041
11042
10.4k
                    pu1_is_recon_available[0] = 1;
11043
10.4k
                }
11044
22.0k
            }
11045
11046
1.40M
#if ENABLE_INTER_ZCU_COST
11047
1.40M
            if(!intra_flag)
11048
1.40M
            {
11049
#if !WEIGH_CHROMA_COST
11050
                ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
11051
#else
11052
1.40M
                ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11053
1.40M
                    (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
11054
1.40M
                     (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11055
1.40M
                    CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11056
1.40M
#endif
11057
1.40M
            }
11058
1.40M
#endif
11059
1.40M
        }
11060
4.22M
    }
11061
23.4M
    else
11062
23.4M
    {
11063
23.4M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11064
13.7M
        {
11065
13.7M
            pi8_cost[0] = ihevce_it_recon_ssd(
11066
13.7M
                ps_ctxt,
11067
13.7M
                pu1_src,
11068
13.7M
                src_strd,
11069
13.7M
                pu1_pred,
11070
13.7M
                pred_strd,
11071
13.7M
                pi2_deq_data,
11072
13.7M
                deq_data_strd,
11073
13.7M
                pu1_recon,
11074
13.7M
                i4_recon_stride,
11075
13.7M
                pu1_ecd_data,
11076
13.7M
                trans_size,
11077
13.7M
                PRED_MODE_INTRA,
11078
13.7M
                cbf,
11079
13.7M
                pi4_zero_col[0],
11080
13.7M
                pi4_zero_row[0],
11081
13.7M
                e_chroma_plane);
11082
11083
13.7M
            pu1_is_recon_available[0] = 1;
11084
13.7M
        }
11085
11086
23.4M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11087
23.4M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11088
0
        {
11089
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11090
0
                pu1_src,
11091
0
                src_strd,
11092
0
                pu1_recon,
11093
0
                i4_recon_stride,
11094
0
                pi8_cost[0],
11095
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11096
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11097
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11098
0
                                             100.0,
11099
0
                trans_size,
11100
0
                0,
11101
0
                ps_ctxt->u1_enable_psyRDOPT,
11102
0
                e_chroma_plane);
11103
0
        }
11104
23.4M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11105
0
        {
11106
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11107
0
                pu1_src,
11108
0
                src_strd,
11109
0
                pu1_pred,
11110
0
                pred_strd,
11111
0
                pi8_cost[0],
11112
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11113
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11114
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11115
0
                                             100.0,
11116
0
                trans_size,
11117
0
                0,
11118
0
                ps_ctxt->u1_enable_psyRDOPT,
11119
0
                e_chroma_plane);
11120
0
        }
11121
23.4M
#endif
11122
11123
23.4M
#if ENABLE_INTER_ZCU_COST
11124
23.4M
        if(!intra_flag)
11125
5.02M
        {
11126
#if !WEIGH_CHROMA_COST
11127
            /* cbf = 0, accumulate cu not coded cost */
11128
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
11129
#else
11130
            /* cbf = 0, accumulate cu not coded cost */
11131
11132
5.02M
            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11133
5.02M
                (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
11134
5.02M
                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11135
5.02M
                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11136
5.02M
#endif
11137
5.02M
        }
11138
23.4M
#endif
11139
23.4M
    }
11140
27.7M
#endif /* RDOPT_ZERO_CBF_ENABLE */
11141
11142
27.7M
    return (cbf);
11143
27.7M
}