Coverage Report

Created: 2025-08-03 06:13

/src/libhevc/encoder/ihevce_enc_loop_utils.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*!
22
******************************************************************************
23
* \file ihevce_enc_loop_utils.c
24
*
25
* \brief
26
*    This file contains utility functions of Encode loop
27
*
28
* \date
29
*    18/09/2012
30
*
31
* \author
32
*    Ittiam
33
*
34
*
35
* List of Functions
36
*
37
*
38
******************************************************************************
39
*/
40
41
/*****************************************************************************/
42
/* File Includes                                                             */
43
/*****************************************************************************/
44
/* System include files */
45
#include <stdio.h>
46
#include <string.h>
47
#include <stdlib.h>
48
#include <assert.h>
49
#include <stdarg.h>
50
#include <math.h>
51
#include <limits.h>
52
53
/* User include files */
54
#include "ihevc_typedefs.h"
55
#include "itt_video_api.h"
56
#include "ihevce_api.h"
57
58
#include "rc_cntrl_param.h"
59
#include "rc_frame_info_collector.h"
60
#include "rc_look_ahead_params.h"
61
62
#include "ihevc_defs.h"
63
#include "ihevc_macros.h"
64
#include "ihevc_debug.h"
65
#include "ihevc_structs.h"
66
#include "ihevc_platform_macros.h"
67
#include "ihevc_deblk.h"
68
#include "ihevc_itrans_recon.h"
69
#include "ihevc_chroma_itrans_recon.h"
70
#include "ihevc_chroma_intra_pred.h"
71
#include "ihevc_intra_pred.h"
72
#include "ihevc_inter_pred.h"
73
#include "ihevc_mem_fns.h"
74
#include "ihevc_padding.h"
75
#include "ihevc_weighted_pred.h"
76
#include "ihevc_sao.h"
77
#include "ihevc_resi_trans.h"
78
#include "ihevc_quant_iquant_ssd.h"
79
#include "ihevc_cabac_tables.h"
80
#include "ihevc_common_tables.h"
81
82
#include "ihevce_defs.h"
83
#include "ihevce_hle_interface.h"
84
#include "ihevce_lap_enc_structs.h"
85
#include "ihevce_multi_thrd_structs.h"
86
#include "ihevce_multi_thrd_funcs.h"
87
#include "ihevce_me_common_defs.h"
88
#include "ihevce_had_satd.h"
89
#include "ihevce_error_codes.h"
90
#include "ihevce_bitstream.h"
91
#include "ihevce_cabac.h"
92
#include "ihevce_rdoq_macros.h"
93
#include "ihevce_function_selector.h"
94
#include "ihevce_enc_structs.h"
95
#include "ihevce_entropy_structs.h"
96
#include "ihevce_cmn_utils_instr_set_router.h"
97
#include "ihevce_ipe_instr_set_router.h"
98
#include "ihevce_decomp_pre_intra_structs.h"
99
#include "ihevce_decomp_pre_intra_pass.h"
100
#include "ihevce_enc_loop_structs.h"
101
#include "ihevce_nbr_avail.h"
102
#include "ihevce_enc_loop_utils.h"
103
#include "ihevce_sub_pic_rc.h"
104
#include "ihevce_global_tables.h"
105
#include "ihevce_bs_compute_ctb.h"
106
#include "ihevce_cabac_rdo.h"
107
#include "ihevce_deblk.h"
108
#include "ihevce_frame_process.h"
109
#include "ihevce_rc_enc_structs.h"
110
#include "hme_datatype.h"
111
#include "hme_interface.h"
112
#include "hme_common_defs.h"
113
#include "hme_defs.h"
114
#include "hme_common_utils.h"
115
#include "ihevce_me_instr_set_router.h"
116
#include "ihevce_enc_subpel_gen.h"
117
#include "ihevce_inter_pred.h"
118
#include "ihevce_mv_pred.h"
119
#include "ihevce_mv_pred_merge.h"
120
#include "ihevce_enc_loop_inter_mode_sifter.h"
121
#include "ihevce_enc_cu_recursion.h"
122
#include "ihevce_enc_loop_pass.h"
123
#include "ihevce_common_utils.h"
124
#include "ihevce_dep_mngr_interface.h"
125
#include "ihevce_sao.h"
126
#include "ihevce_tile_interface.h"
127
#include "ihevce_profile.h"
128
#include "ihevce_stasino_helpers.h"
129
#include "ihevce_tu_tree_selector.h"
130
131
/*****************************************************************************/
132
/* Globals                                                                   */
133
/*****************************************************************************/
134
135
extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
136
extern const UWORD8 gu1_hevce_scan4x4[3][16];
137
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
138
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
139
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
140
141
/*****************************************************************************/
142
/* Constant Macros                                                           */
143
/*****************************************************************************/
144
#define ENABLE_ZERO_CBF 1
145
#define DISABLE_RDOQ_INTRA 0
146
147
/*****************************************************************************/
148
/* Function Definitions                                                      */
149
/*****************************************************************************/
150
void *ihevce_tu_tree_update(
151
    tu_prms_t *ps_tu_prms,
152
    WORD32 *pnum_tu_in_cu,
153
    WORD32 depth,
154
    WORD32 tu_split_flag,
155
    WORD32 tu_early_cbf,
156
    WORD32 i4_x_off,
157
    WORD32 i4_y_off)
158
2.08M
{
159
    //WORD32 tu_split_flag = p_tu_split_flag[0];
160
2.08M
    WORD32 p_tu_split_flag[4];
161
2.08M
    WORD32 p_tu_early_cbf[4];
162
163
2.08M
    WORD32 tu_size = ps_tu_prms->u1_tu_size;
164
165
2.08M
    if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
166
162k
    {
167
162k
        if((tu_size >> depth) == 32)
168
34.5k
        {
169
            /* Get the individual TU split flags */
170
34.5k
            p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
171
34.5k
            p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
172
34.5k
            p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
173
34.5k
            p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
174
175
            /* Get the early CBF flags */
176
34.5k
            p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
177
34.5k
            p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
178
34.5k
            p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
179
34.5k
            p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
180
34.5k
        }
181
127k
        else
182
127k
        {
183
            /* Get the individual TU split flags */
184
127k
            p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
185
127k
            p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
186
127k
            p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
187
127k
            p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
188
189
            /* Get the early CBF flags */
190
127k
            p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
191
127k
            p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
192
127k
            p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
193
127k
            p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
194
127k
        }
195
196
162k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
197
162k
            ps_tu_prms,
198
162k
            pnum_tu_in_cu,
199
162k
            depth + 1,
200
162k
            p_tu_split_flag[0],
201
162k
            p_tu_early_cbf[0],
202
162k
            i4_x_off,
203
162k
            i4_y_off);
204
205
162k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
206
162k
            ps_tu_prms,
207
162k
            pnum_tu_in_cu,
208
162k
            depth + 1,
209
162k
            p_tu_split_flag[1],
210
162k
            p_tu_early_cbf[1],
211
162k
            (i4_x_off + (tu_size >> (depth + 1))),
212
162k
            i4_y_off);
213
214
162k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
215
162k
            ps_tu_prms,
216
162k
            pnum_tu_in_cu,
217
162k
            depth + 1,
218
162k
            p_tu_split_flag[2],
219
162k
            p_tu_early_cbf[2],
220
162k
            i4_x_off,
221
162k
            (i4_y_off + (tu_size >> (depth + 1))));
222
223
162k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
224
162k
            ps_tu_prms,
225
162k
            pnum_tu_in_cu,
226
162k
            depth + 1,
227
162k
            p_tu_split_flag[3],
228
162k
            p_tu_early_cbf[3],
229
162k
            (i4_x_off + (tu_size >> (depth + 1))),
230
162k
            (i4_y_off + (tu_size >> (depth + 1))));
231
162k
    }
232
1.91M
    else
233
1.91M
    {
234
1.91M
        if(tu_split_flag & 0x1)
235
136k
        {
236
            /* This piece of code will be entered for the 8x8, if it is split
237
            Update the 4 child TU's accordingly. */
238
239
136k
            (*pnum_tu_in_cu) += 4;
240
241
            /* TL TU update */
242
136k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
243
244
136k
            ps_tu_prms->u1_x_off = i4_x_off;
245
246
136k
            ps_tu_prms->u1_y_off = i4_y_off;
247
248
            /* Early CBF is not done for 4x4 transforms */
249
136k
            ps_tu_prms->i4_early_cbf = 1;
250
251
136k
            ps_tu_prms++;
252
253
            /* TR TU update */
254
136k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
255
256
136k
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
257
258
136k
            ps_tu_prms->u1_y_off = i4_y_off;
259
260
            /* Early CBF is not done for 4x4 transforms */
261
136k
            ps_tu_prms->i4_early_cbf = 1;
262
263
136k
            ps_tu_prms++;
264
265
            /* BL TU update */
266
136k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
267
268
136k
            ps_tu_prms->u1_x_off = i4_x_off;
269
270
136k
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
271
272
            /* Early CBF is not done for 4x4 transforms */
273
136k
            ps_tu_prms->i4_early_cbf = 1;
274
275
136k
            ps_tu_prms++;
276
277
            /* BR TU update */
278
136k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
279
280
136k
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
281
282
136k
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
283
284
            /* Early CBF is not done for 4x4 transforms */
285
136k
            ps_tu_prms->i4_early_cbf = 1;
286
136k
        }
287
1.78M
        else
288
1.78M
        {
289
            /* Update the TU params */
290
1.78M
            ps_tu_prms->u1_tu_size = tu_size >> depth;
291
292
1.78M
            ps_tu_prms->u1_x_off = i4_x_off;
293
294
1.78M
            ps_tu_prms->u1_y_off = i4_y_off;
295
296
1.78M
            (*pnum_tu_in_cu)++;
297
298
            /* Early CBF update for current TU */
299
1.78M
            ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
300
1.78M
        }
301
1.91M
        if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
302
1.91M
        {
303
1.91M
            ps_tu_prms++;
304
305
1.91M
            ps_tu_prms->u1_tu_size = tu_size;
306
1.91M
        }
307
1.91M
    }
308
309
2.08M
    return ps_tu_prms;
310
2.08M
}
311
312
/*!
313
******************************************************************************
314
* \if Function name : ihevce_compute_quant_rel_param \endif
315
*
316
* \brief
317
*    This function updates quantization related parameters like qp_mod_6 etc in
318
*       context according to new qp
319
*
320
* \date
321
*    08/01/2013
322
*
323
* \author
324
*    Ittiam
325
*
326
* \return
327
*
328
* List of Functions
329
*
330
*
331
******************************************************************************
332
*/
333
void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
334
8.08M
{
335
8.08M
    WORD32 i4_div_factor;
336
337
8.08M
    ps_ctxt->i4_chrm_cu_qp =
338
8.08M
        (ps_ctxt->u1_chroma_array_type == 2)
339
8.08M
            ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
340
8.08M
            : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
341
8.08M
    ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
342
8.08M
    i4_div_factor = (i1_cu_qp + 3) / 6;
343
8.08M
    i4_div_factor = CLIP3(i4_div_factor, 3, 6);
344
8.08M
    ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
345
8.08M
    ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
346
8.08M
    ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
347
348
8.08M
#define INTER_RND_QP_BY_6
349
8.08M
#ifdef INTER_RND_QP_BY_6
350
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
351
8.08M
    {
352
8.08M
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
353
8.08M
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
354
8.08M
    }
355
#else
356
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
357
    ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
358
#endif
359
360
8.08M
    if(ISLICE == ps_ctxt->i1_slice_type)
361
3.24M
    {
362
        /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
363
3.24M
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
364
3.24M
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
365
3.24M
    }
366
4.83M
    else
367
4.83M
    {
368
4.83M
        if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
369
0
        {
370
            /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
371
0
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
372
0
                (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
373
0
        }
374
4.83M
        else
375
4.83M
        {
376
            /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
377
4.83M
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
378
4.83M
                ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
379
            /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
380
4.83M
        }
381
4.83M
    }
382
8.08M
}
383
384
/*!
385
******************************************************************************
386
* \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
387
*
388
* \brief
389
*    Function whihc calculates the Lambda params for current picture
390
*
391
* \param[in] ps_enc_ctxt : encoder ctxt pointer
392
* \param[in] ps_cur_pic_ctxt : current pic ctxt
393
* \param[in] i4_cur_frame_qp : current pic QP
394
* \param[in] first_field : is first field flag
395
* \param[in] i4_temporal_lyr_id : Current picture layer id
396
*
397
* \return
398
*    None
399
*
400
* \author
401
*  Ittiam
402
*
403
*****************************************************************************
404
*/
405
void ihevce_populate_cl_cu_lambda_prms(
406
    ihevce_enc_loop_ctxt_t *ps_ctxt,
407
    frm_lambda_ctxt_t *ps_frm_lamda,
408
    WORD32 i4_slice_type,
409
    WORD32 i4_temporal_lyr_id,
410
    WORD32 i4_lambda_type)
411
150k
{
412
150k
    WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
413
150k
    double lambda_modifier;
414
150k
    double lambda_uv_modifier;
415
150k
    double lambda;
416
150k
    double lambda_uv;
417
418
150k
    WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
419
420
    /*Populate lamda modifier */
421
150k
    ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
422
150k
    ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
423
150k
    ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
424
425
150k
    for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
426
7.83M
        i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
427
7.68M
        i4_curr_cu_qp++)
428
7.68M
    {
429
7.68M
        WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
430
7.68M
                               ? MIN(i4_curr_cu_qp, 51)
431
7.68M
                               : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
432
433
7.68M
        i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
434
435
7.68M
        lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
436
7.68M
        lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
437
438
7.68M
        if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
439
1.17M
        {
440
1.17M
            lambda_modifier = ps_frm_lamda->lambda_modifier *
441
1.17M
                              CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
442
1.17M
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
443
1.17M
                                 CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
444
1.17M
        }
445
6.50M
        else
446
6.50M
        {
447
6.50M
            lambda_modifier = ps_frm_lamda->lambda_modifier;
448
6.50M
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
449
6.50M
        }
450
7.68M
        if(ps_ctxt->i4_use_const_lamda_modifier)
451
0
        {
452
0
            if(ISLICE == ps_ctxt->i1_slice_type)
453
0
            {
454
0
                lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
455
0
                lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
456
0
            }
457
0
            else
458
0
            {
459
0
                lambda_modifier = CONST_LAMDA_MOD_VAL;
460
0
                lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
461
0
            }
462
0
        }
463
7.68M
        switch(i4_lambda_type)
464
7.68M
        {
465
0
        case 0:
466
0
        {
467
0
            i4_qp_bdoffset = 0;
468
469
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
470
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
471
472
0
            lambda *= lambda_modifier;
473
0
            lambda_uv *= lambda_uv_modifier;
474
475
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
476
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
477
478
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
479
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
480
481
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
482
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
483
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
484
0
            {
485
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
486
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
487
0
            }
488
0
            else
489
0
            {
490
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
491
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
492
0
            }
493
494
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
495
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
496
497
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
498
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
499
500
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
501
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
502
503
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
504
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
505
506
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
507
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
508
509
0
            break;
510
0
        }
511
0
        case 1:
512
0
        {
513
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
514
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
515
516
0
            lambda *= lambda_modifier;
517
0
            lambda_uv *= lambda_uv_modifier;
518
519
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
520
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
521
522
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
523
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
524
525
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
526
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
527
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
528
0
            {
529
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
530
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
531
0
            }
532
0
            else
533
0
            {
534
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
535
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
536
0
            }
537
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
538
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
539
540
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
541
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
542
543
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
544
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
545
546
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
547
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
548
549
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
550
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
551
552
0
            break;
553
0
        }
554
7.68M
        case 2:
555
7.68M
        {
556
7.68M
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
557
7.68M
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
558
559
7.68M
            lambda *= lambda_modifier;
560
7.68M
            lambda_uv *= lambda_uv_modifier;
561
562
7.68M
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
563
7.68M
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
564
565
7.68M
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
566
7.68M
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
567
568
7.68M
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
569
7.68M
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
570
571
7.68M
            if(ps_ctxt->i4_use_const_lamda_modifier)
572
0
            {
573
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
574
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
575
0
            }
576
7.68M
            else
577
7.68M
            {
578
7.68M
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
579
7.68M
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
580
7.68M
            }
581
7.68M
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
582
7.68M
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
583
584
            /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
585
7.68M
            lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
586
7.68M
            lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
587
588
7.68M
            lambda *= lambda_modifier;
589
7.68M
            lambda_uv *= lambda_uv_modifier;
590
591
7.68M
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
592
7.68M
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
593
594
7.68M
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
595
7.68M
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
596
597
7.68M
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
598
7.68M
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
599
7.68M
            if(ps_ctxt->i4_use_const_lamda_modifier)
600
0
            {
601
0
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
602
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
603
0
            }
604
7.68M
            else
605
7.68M
            {
606
7.68M
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
607
7.68M
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
608
7.68M
            }
609
610
7.68M
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
611
7.68M
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
612
613
7.68M
            break;
614
0
        }
615
0
        default:
616
0
        {
617
            /* Intended to be a barren wasteland! */
618
0
            ASSERT(0);
619
0
        }
620
7.68M
        }
621
7.68M
    }
622
150k
}
623
624
/*!
625
******************************************************************************
626
* \if Function name : ihevce_get_cl_cu_lambda_prms \endif
627
*
628
* \brief
629
*    Function whihc calculates the Lambda params for current picture
630
*
631
* \param[in] ps_enc_ctxt : encoder ctxt pointer
632
* \param[in] ps_cur_pic_ctxt : current pic ctxt
633
* \param[in] i4_cur_frame_qp : current pic QP
634
* \param[in] first_field : is first field flag
635
* \param[in] i4_temporal_lyr_id : Current picture layer id
636
*
637
* \return
638
*    None
639
*
640
* \author
641
*  Ittiam
642
*
643
*****************************************************************************
644
*/
645
void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
646
8.08M
{
647
8.08M
    WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
648
8.08M
                           ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
649
8.08M
                           : gai1_ihevc_chroma_qp_scale
650
8.08M
                                 [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
651
652
    /* closed loop ssd lambda is same as final lambda */
653
8.08M
    ps_ctxt->i8_cl_ssd_lambda_qf =
654
8.08M
        ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
655
8.08M
    ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
656
8.08M
        ps_ctxt
657
8.08M
            ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
658
8.08M
    ps_ctxt->u4_chroma_cost_weighing_factor =
659
8.08M
        ps_ctxt->au4_chroma_cost_weighing_factor_array
660
8.08M
            [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
661
    /* --- Initialized the lambda for SATD computations --- */
662
    /* --- 0.95 is the multiplication factor as per HM --- */
663
    /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
664
8.08M
    ps_ctxt->i4_satd_lamda =
665
8.08M
        ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
666
8.08M
    ps_ctxt->i4_sad_lamda =
667
8.08M
        ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
668
8.08M
}
669
670
/*!
671
******************************************************************************
672
* \if Function name : ihevce_update_pred_qp \endif
673
*
674
* \brief
675
*    Computes pred qp for the given CU
676
*
677
* \param[in]
678
*
679
* \return
680
*
681
*
682
* \author
683
*  Ittiam
684
*
685
*****************************************************************************
686
*/
687
void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
688
2.92M
{
689
2.92M
    WORD32 i4_pred_qp = 0x7FFFFFFF;
690
2.92M
    WORD32 i4_top, i4_left;
691
2.92M
    if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
692
365k
    {
693
365k
        i4_pred_qp = ps_ctxt->i4_prev_QP;
694
365k
    }
695
2.55M
    else
696
2.55M
    {
697
2.55M
        if(cu_pos_y == 0) /*CTB boundary*/
698
595k
        {
699
595k
            i4_top = ps_ctxt->i4_prev_QP;
700
595k
        }
701
1.96M
        else /*within CTB*/
702
1.96M
        {
703
1.96M
            i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
704
1.96M
        }
705
2.55M
        if(cu_pos_x == 0) /*CTB boundary*/
706
610k
        {
707
610k
            i4_left = ps_ctxt->i4_prev_QP;
708
610k
        }
709
1.94M
        else /*within CTB*/
710
1.94M
        {
711
1.94M
            i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
712
1.94M
        }
713
2.55M
        i4_pred_qp = (i4_left + i4_top + 1) >> 1;
714
2.55M
    }
715
2.92M
    ps_ctxt->i4_pred_qp = i4_pred_qp;
716
2.92M
    return;
717
2.92M
}
718
/*!
719
******************************************************************************
720
* \if Function name : ihevce_compute_cu_level_QP \endif
721
*
722
* \brief
723
*    Computes cu level QP with Traqo,Spatial Mod and In-frame RC
724
*
725
* \param[in]
726
*
727
* \return
728
*
729
*
730
* \author
731
*  Ittiam
732
*
733
*****************************************************************************
734
*/
735
void ihevce_compute_cu_level_QP(
736
    ihevce_enc_loop_ctxt_t *ps_ctxt,
737
    WORD32 i4_activity_for_qp,
738
    WORD32 i4_activity_for_lamda,
739
    WORD32 i4_reduce_qp)
740
7.04M
{
741
    /*modify quant related param in ctxt based on current cu qp*/
742
7.04M
    WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
743
7.04M
    WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
744
745
7.04M
    WORD32 i4_max_qp_allowed;
746
7.04M
    WORD32 i4_min_qp_allowed;
747
7.04M
    WORD32 i4_pred_qp;
748
749
7.04M
    i4_pred_qp = ps_ctxt->i4_pred_qp;
750
751
7.04M
    if(ps_ctxt->i4_sub_pic_level_rc)
752
0
    {
753
0
        i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
754
0
        i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
755
0
    }
756
7.04M
    else
757
7.04M
    {
758
7.04M
        i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
759
7.04M
        i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
760
7.04M
    }
761
7.04M
    if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
762
0
        return;
763
764
#if LAMDA_BASED_ON_QUANT
765
    i4_activity_for_lamda = i4_activity_for_qp;
766
#endif
767
768
7.04M
    if(i4_activity_for_qp != -1)
769
7.04M
    {
770
7.04M
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
771
7.04M
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
772
7.04M
        if(ps_ctxt->i4_qp_mod)
773
7.04M
        {
774
            /*Recompute the Qp as per enc thread's frame level Qp*/
775
7.04M
            ASSERT(i4_activity_for_qp > 0);
776
7.04M
            cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
777
7.04M
                    QP_LEVEL_MOD_ACT_FACTOR;
778
7.04M
        }
779
780
        // To avoid access of uninitialised Qscale to qp conversion table
781
7.04M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
782
211k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
783
6.82M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
784
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
785
786
7.04M
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
787
788
7.04M
        if((1 == i4_reduce_qp) && (cu_qp > 1))
789
0
            cu_qp--;
790
791
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
792
7.04M
        if(cu_qp > i4_max_qp_allowed)
793
0
            cu_qp = i4_max_qp_allowed;
794
7.04M
        else if(cu_qp < i4_min_qp_allowed)
795
0
            cu_qp = i4_min_qp_allowed;
796
797
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
798
7.04M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
799
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
800
7.04M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
801
600k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
802
803
        /*cu qp must be populated in cu_analyse_t struct*/
804
7.04M
        ps_ctxt->i4_cu_qp = cu_qp;
805
        /*recompute quant related param at every cu level*/
806
7.04M
        ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
807
7.04M
    }
808
809
    /*Decoupling qp and lamda calculation */
810
7.04M
    if(i4_activity_for_lamda != -1)
811
7.04M
    {
812
7.04M
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
813
7.04M
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
814
815
7.04M
        if(ps_ctxt->i4_qp_mod)
816
7.04M
        {
817
7.04M
#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
818
            /*Recompute the Qp as per enc thread's frame level Qp*/
819
7.04M
            ASSERT(i4_activity_for_lamda > 0);
820
7.04M
            cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
821
7.04M
                    QP_LEVEL_MOD_ACT_FACTOR;
822
7.04M
#endif
823
7.04M
        }
824
7.04M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
825
122k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
826
6.91M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
827
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
828
829
7.04M
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
830
831
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
832
7.04M
        if(cu_qp > i4_max_qp_allowed)
833
0
            cu_qp = i4_max_qp_allowed;
834
7.04M
        else if(cu_qp < i4_min_qp_allowed)
835
0
            cu_qp = i4_min_qp_allowed;
836
837
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
838
7.04M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
839
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
840
7.04M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
841
1.16M
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
842
        /* get frame level lambda params */
843
7.04M
        ihevce_get_cl_cu_lambda_prms(
844
7.04M
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
845
7.04M
    }
846
7.04M
}
847
848
void ihevce_update_cu_level_qp_lamda(
849
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra)
850
7.04M
{
851
7.04M
    WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
852
853
7.04M
    if(ps_cu_analyse->u1_cu_size == 64)
854
166k
    {
855
166k
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
856
166k
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
857
166k
        i4_act_counter_lamda = 3;
858
166k
    }
859
6.87M
    else if(ps_cu_analyse->u1_cu_size == 32)
860
1.27M
    {
861
1.27M
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
862
1.27M
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
863
1.27M
        i4_act_counter_lamda = 0;
864
1.27M
    }
865
5.60M
    else if(ps_cu_analyse->u1_cu_size == 16)
866
3.06M
    {
867
3.06M
        ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
868
3.06M
        i4_act_counter = (trans_size == 8) || (trans_size == 4);
869
3.06M
        i4_act_counter_lamda = 0;
870
3.06M
    }
871
2.53M
    else if(ps_cu_analyse->u1_cu_size == 8)
872
2.53M
    {
873
2.53M
        ASSERT((trans_size == 8) || (trans_size == 4));
874
2.53M
        i4_act_counter = 1;
875
2.53M
        i4_act_counter_lamda = 0;
876
2.53M
    }
877
0
    else
878
0
    {
879
0
        ASSERT(0);
880
0
    }
881
882
7.04M
    if(ps_ctxt->i4_use_ctb_level_lamda)
883
0
    {
884
0
        ihevce_compute_cu_level_QP(
885
0
            ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0);
886
0
    }
887
7.04M
    else
888
7.04M
    {
889
7.04M
        ihevce_compute_cu_level_QP(
890
7.04M
            ps_ctxt,
891
7.04M
            ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra],
892
7.04M
            ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra],
893
7.04M
            0);
894
7.04M
    }
895
896
7.04M
    ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
897
7.04M
}
898
899
/**
900
*******************************************************************************
901
* \if Function name : ihevce_scan_coeffs \endif
902
*
903
* @brief * Computes the coeff buffer for a coded TU for entropy coding
904
*
905
* @par   Description
906
* Computes the coeff buffer for a coded TU for entropy coding
907
*
908
* \param[in] pi2_quan_coeffs Quantized coefficient context
909
*
910
* \param[in] scan_idx Scan index specifying the scan order
911
*
912
* \param[in] trans_size Transform unit size
913
*
914
* \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
915
*
916
* \param[in] pu1_csbf_buf csb flag buffer
917
*
918
* @returns num_bytes
919
* Number of bytes written to pu1_out_data
920
*
921
* @remarks
922
*
923
* \author
924
*  Ittiam
925
*
926
*******************************************************************************
927
*/
928
929
WORD32 ihevce_scan_coeffs(
930
    WORD16 *pi2_quant_coeffs,
931
    WORD32 *pi4_subBlock2csbfId_map,
932
    WORD32 scan_idx,
933
    WORD32 trans_size,
934
    UWORD8 *pu1_out_data,
935
    UWORD8 *pu1_csbf_buf,
936
    WORD32 i4_csbf_stride)
937
21.1M
{
938
21.1M
    WORD32 i, trans_unit_idx, num_gt1_flag;
939
21.1M
    UWORD16 u2_csbf0flags;
940
21.1M
    WORD32 num_bytes = 0;
941
21.1M
    UWORD8 *pu1_trans_table;
942
21.1M
    UWORD8 *pu1_csb_table;
943
21.1M
    WORD32 shift_value, mask_value;
944
21.1M
    UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
945
21.1M
    UWORD16 u2_sign_flags;
946
21.1M
    UWORD16 u2_abs_coeff_remaining[16];
947
21.1M
    WORD32 blk_row, blk_col;
948
949
21.1M
    UWORD8 *pu1_out_data_header;
950
21.1M
    UWORD16 *pu2_out_data_coeff;
951
952
21.1M
    WORD32 x_pos, y_pos;
953
21.1M
    WORD32 quant_coeff;
954
955
21.1M
    WORD32 num_gt0_flag;
956
21.1M
    (void)i4_csbf_stride;
957
21.1M
    pu1_out_data_header = pu1_out_data;
958
    /* Need only last 3 bits, rest are reserved for debugging and making */
959
    /* WORD alignment */
960
21.1M
    u2_csbf0flags = 0xBAD0;
961
962
    /* Select proper order for your transform unit and csb based on scan_idx*/
963
    /* and the trans_size */
964
965
    /* scan order inside a csb */
966
21.1M
    pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
967
    /* GETRANGE will give the log_2 of trans_size to shift_value */
968
21.1M
    GETRANGE(shift_value, trans_size);
969
21.1M
    shift_value = shift_value - 3; /* for finding. row no. from scan index */
970
21.1M
    mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
971
21.1M
    switch(trans_size)
972
21.1M
    {
973
481k
    case 32:
974
481k
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
975
481k
        break;
976
1.63M
    case 16:
977
1.63M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
978
1.63M
        break;
979
3.94M
    case 8:
980
3.94M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
981
3.94M
        break;
982
15.0M
    case 4:
983
15.0M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
984
15.0M
        break;
985
0
    default:
986
0
        DBG_PRINTF("Invalid Trans Size\n");
987
0
        return -1;
988
0
        break;
989
21.1M
    }
990
991
    /*go through each csb in the scan order for first non-zero coded sub-block*/
992
45.9M
    for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
993
45.9M
    {
994
        /* check for the first csb flag in our scan order */
995
45.9M
        if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
996
21.1M
        {
997
21.1M
            UWORD8 u1_last_x, u1_last_y;
998
            /* row of csb */
999
21.1M
            blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
1000
            /* col of csb */
1001
21.1M
            blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
1002
1003
            /*check for the 1st non-0 values inside the csb in our scan order*/
1004
82.5M
            for(i = 15; i >= 0; i--)
1005
82.5M
            {
1006
82.5M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1007
82.5M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1008
1009
82.5M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1010
1011
82.5M
                if(quant_coeff != 0)
1012
21.1M
                    break;
1013
82.5M
            }
1014
1015
21.1M
            ASSERT(i >= 0);
1016
1017
21.1M
            u1_last_x = x_pos;
1018
21.1M
            u1_last_y = y_pos;
1019
1020
            /* storing last_x and last_y */
1021
21.1M
            *pu1_out_data_header = u1_last_x;
1022
21.1M
            pu1_out_data_header++;
1023
21.1M
            num_bytes++;
1024
21.1M
            *pu1_out_data_header = u1_last_y;
1025
21.1M
            pu1_out_data_header++;
1026
21.1M
            num_bytes++;
1027
1028
            /* storing the scan order */
1029
21.1M
            *pu1_out_data_header = scan_idx;
1030
21.1M
            pu1_out_data_header++;
1031
21.1M
            num_bytes++;
1032
            /* storing last_sub_block pos. in scan order count */
1033
21.1M
            *pu1_out_data_header = trans_unit_idx;
1034
21.1M
            pu1_out_data_header++;
1035
21.1M
            num_bytes++;
1036
1037
            /*stored the first 4 bytes, now all are word16. So word16 pointer*/
1038
21.1M
            pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
1039
1040
            /* u2_csbf0flags word */
1041
21.1M
            u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
1042
            /* storing u2_csbf0flags word */
1043
21.1M
            *pu2_out_data_coeff = u2_csbf0flags;
1044
21.1M
            pu2_out_data_coeff++;
1045
21.1M
            num_bytes += 2;
1046
1047
21.1M
            num_gt0_flag = 1;
1048
21.1M
            num_gt1_flag = 0;
1049
21.1M
            u2_sign_flags = 0;
1050
1051
            /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1052
21.1M
            u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
1053
21.1M
            if(abs(quant_coeff) > 1)
1054
11.7M
            {
1055
                /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1056
11.7M
                u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
1057
                /* update u2_abs_coeff_remaining */
1058
11.7M
                u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1059
1060
11.7M
                num_gt1_flag++;
1061
11.7M
            }
1062
1063
21.1M
            if(quant_coeff < 0)
1064
11.0M
            {
1065
                /* set the i th bit of u2_sign_flags */
1066
11.0M
                u2_sign_flags = u2_sign_flags | (1 << i);
1067
11.0M
            }
1068
1069
            /* Test remaining elements in our scan order */
1070
            /* Can optimize further by CLZ macro */
1071
277M
            for(i = i - 1; i >= 0; i--)
1072
255M
            {
1073
255M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1074
255M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1075
1076
255M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1077
1078
255M
                if(quant_coeff != 0)
1079
210M
                {
1080
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1081
210M
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1082
1083
210M
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1084
177M
                    {
1085
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1086
177M
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1087
1088
                        /* update u2_abs_coeff_remaining */
1089
177M
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1090
1091
177M
                        num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
1092
177M
                    }
1093
1094
210M
                    if(quant_coeff < 0)
1095
106M
                    {
1096
                        /* set the i th bit of u2_sign_flags */
1097
106M
                        u2_sign_flags |= (1 << i);
1098
106M
                    }
1099
1100
210M
                    num_gt0_flag++;
1101
210M
                }
1102
255M
            }
1103
1104
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1105
21.1M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1106
21.1M
            pu2_out_data_coeff++;
1107
21.1M
            num_bytes += 2;
1108
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1109
21.1M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1110
21.1M
            pu2_out_data_coeff++;
1111
21.1M
            num_bytes += 2;
1112
            /* storing u2_sign_flags 2 bytes */
1113
21.1M
            *pu2_out_data_coeff = u2_sign_flags;
1114
21.1M
            pu2_out_data_coeff++;
1115
21.1M
            num_bytes += 2;
1116
1117
            /* Store the u2_abs_coeff_remaining[] */
1118
209M
            for(i = 0; i < num_gt1_flag; i++)
1119
188M
            {
1120
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1121
188M
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1122
188M
                pu2_out_data_coeff++;
1123
188M
                num_bytes += 2;
1124
188M
            }
1125
1126
21.1M
            break; /*We just need this loop for finding 1st non-zero csb only*/
1127
21.1M
        }
1128
45.9M
    }
1129
1130
    /* go through remaining csb in the scan order */
1131
63.0M
    for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
1132
41.8M
    {
1133
41.8M
        blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
1134
41.8M
        blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
1135
1136
        /* u2_csbf0flags word */
1137
41.8M
        u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
1138
41.8M
                        (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
1139
1140
        /********************************************************************/
1141
        /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
1142
        /* block0, instead sig coeff map is directly signalled. This is     */
1143
        /* taken care by forcing csbf for block0 to be 1 even if it is 0    */
1144
        /********************************************************************/
1145
41.8M
        if(0 == trans_unit_idx)
1146
5.16M
        {
1147
5.16M
            u2_csbf0flags |= 1;
1148
5.16M
        }
1149
1150
41.8M
        if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
1151
35.1M
        {
1152
35.1M
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
1153
28.3M
            {
1154
                /* set the 2nd bit of u2_csbf0flags for right csbf */
1155
28.3M
                u2_csbf0flags = u2_csbf0flags | (1 << 1);
1156
28.3M
            }
1157
35.1M
        }
1158
41.8M
        if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
1159
34.5M
        {
1160
34.5M
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
1161
28.9M
            {
1162
                /* set the 3rd bit of u2_csbf0flags  for bottom csbf */
1163
28.9M
                u2_csbf0flags = u2_csbf0flags | (1 << 2);
1164
28.9M
            }
1165
34.5M
        }
1166
1167
        /* storing u2_csbf0flags word */
1168
41.8M
        *pu2_out_data_coeff = u2_csbf0flags;
1169
41.8M
        pu2_out_data_coeff++;
1170
41.8M
        num_bytes += 2;
1171
1172
        /* check for the csb flag in our scan order */
1173
41.8M
        if(u2_csbf0flags & 0x1)
1174
36.9M
        {
1175
36.9M
            u2_sig_coeff_abs_gt0_flags = 0;
1176
36.9M
            u2_sig_coeff_abs_gt1_flags = 0;
1177
36.9M
            u2_sign_flags = 0;
1178
1179
36.9M
            num_gt0_flag = 0;
1180
36.9M
            num_gt1_flag = 0;
1181
            /* check for the non-0 values inside the csb in our scan order */
1182
            /* Can optimize further by CLZ macro */
1183
628M
            for(i = 15; i >= 0; i--)
1184
591M
            {
1185
591M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1186
591M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1187
1188
591M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1189
1190
591M
                if(quant_coeff != 0)
1191
463M
                {
1192
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1193
463M
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1194
1195
463M
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1196
384M
                    {
1197
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1198
384M
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1199
1200
                        /* update u2_abs_coeff_remaining */
1201
384M
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1202
1203
384M
                        num_gt1_flag++;
1204
384M
                    }
1205
1206
463M
                    if(quant_coeff < 0)
1207
238M
                    {
1208
                        /* set the i th bit of u2_sign_flags */
1209
238M
                        u2_sign_flags = u2_sign_flags | (1 << i);
1210
238M
                    }
1211
1212
463M
                    num_gt0_flag++;
1213
463M
                }
1214
591M
            }
1215
1216
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1217
36.9M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1218
36.9M
            pu2_out_data_coeff++;
1219
36.9M
            num_bytes += 2;
1220
1221
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1222
36.9M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1223
36.9M
            pu2_out_data_coeff++;
1224
36.9M
            num_bytes += 2;
1225
1226
            /* storing u2_sign_flags 2 bytes */
1227
36.9M
            *pu2_out_data_coeff = u2_sign_flags;
1228
36.9M
            pu2_out_data_coeff++;
1229
36.9M
            num_bytes += 2;
1230
1231
            /* Store the u2_abs_coeff_remaining[] */
1232
421M
            for(i = 0; i < num_gt1_flag; i++)
1233
384M
            {
1234
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1235
384M
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1236
384M
                pu2_out_data_coeff++;
1237
384M
                num_bytes += 2;
1238
384M
            }
1239
36.9M
        }
1240
41.8M
    }
1241
1242
21.1M
    return num_bytes; /* Return the number of bytes written to out_data */
1243
21.1M
}
1244
1245
/**
1246
*******************************************************************************
1247
* \if Function name : ihevce_populate_intra_pred_mode \endif
1248
*
1249
* \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
1250
* b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
1251
*
1252
* \par   Description
1253
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1254
* for a CU
1255
*
1256
* \param[in] top_intra_mode Top intra mode
1257
* \param[in] left_intra_mode Left intra mode
1258
* \param[in] available_top Top availability flag
1259
* \param[in] available_left Left availability flag
1260
* \param[in] cu_pos_y CU 'y' position
1261
* \param[in] ps_cand_mode_list pointer to populate candidate list
1262
*
1263
* \returns none
1264
*
1265
* \author
1266
*  Ittiam
1267
*
1268
*******************************************************************************
1269
*/
1270
1271
void ihevce_populate_intra_pred_mode(
1272
    WORD32 top_intra_mode,
1273
    WORD32 left_intra_mode,
1274
    WORD32 available_top,
1275
    WORD32 available_left,
1276
    WORD32 cu_pos_y,
1277
    WORD32 *ps_cand_mode_list)
1278
1.66M
{
1279
    /* local variables */
1280
1.66M
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1281
1282
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1283
    /* N = top */
1284
1.66M
    if(0 == available_top)
1285
207k
    {
1286
207k
        cand_intra_pred_mode_top = INTRA_DC;
1287
207k
    }
1288
    /* for neighbour != INTRA, setting DC is done outside */
1289
1.46M
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1290
68.8k
    {
1291
68.8k
        cand_intra_pred_mode_top = INTRA_DC;
1292
68.8k
    }
1293
1.39M
    else
1294
1.39M
    {
1295
1.39M
        cand_intra_pred_mode_top = top_intra_mode;
1296
1.39M
    }
1297
1298
    /* N = left */
1299
1.66M
    if(0 == available_left)
1300
211k
    {
1301
211k
        cand_intra_pred_mode_left = INTRA_DC;
1302
211k
    }
1303
    /* for neighbour != INTRA, setting DC is done outside */
1304
1.45M
    else
1305
1.45M
    {
1306
1.45M
        cand_intra_pred_mode_left = left_intra_mode;
1307
1.45M
    }
1308
1309
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1310
1.66M
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1311
579k
    {
1312
579k
        if(cand_intra_pred_mode_left < 2)
1313
405k
        {
1314
405k
            ps_cand_mode_list[0] = INTRA_PLANAR;
1315
405k
            ps_cand_mode_list[1] = INTRA_DC;
1316
405k
            ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1317
405k
        }
1318
174k
        else
1319
174k
        {
1320
174k
            ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1321
174k
            ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1322
174k
            ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1323
174k
        }
1324
579k
    }
1325
1.08M
    else
1326
1.08M
    {
1327
1.08M
        ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1328
1.08M
        ps_cand_mode_list[1] = cand_intra_pred_mode_top;
1329
1330
1.08M
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1331
1.08M
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1332
616k
        {
1333
616k
            ps_cand_mode_list[2] = INTRA_PLANAR;
1334
616k
        }
1335
473k
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1336
179k
        {
1337
179k
            ps_cand_mode_list[2] = INTRA_DC;
1338
179k
        }
1339
294k
        else
1340
294k
        {
1341
294k
            ps_cand_mode_list[2] = INTRA_ANGULAR(26);
1342
294k
        }
1343
1.08M
    }
1344
1.66M
}
1345
/**
1346
*******************************************************************************
1347
* \if Function name : ihevce_intra_pred_mode_signaling \endif
1348
*
1349
* \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
1350
* b5_rem_intra_pred_mode for a CU
1351
*
1352
* \par   Description
1353
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1354
* for a CU
1355
*
1356
* \param[in] ps_nbr_top Top neighbour context
1357
* \param[in] ps_nbr_left Left neighbour context
1358
* \param[in] available_top Top availability flag
1359
* \param[in] available_left Left availability flag
1360
* \param[in] cu_pos_y CU 'y' position
1361
* \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
1362
* \param[inout] ps_intra_pred_mode_current
1363
* Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
1364
* b5_rem_intra_pred_mode
1365
*
1366
* \returns none
1367
*
1368
* \author
1369
*  Ittiam
1370
*
1371
*******************************************************************************
1372
*/
1373
1374
void ihevce_intra_pred_mode_signaling(
1375
    WORD32 top_intra_mode,
1376
    WORD32 left_intra_mode,
1377
    WORD32 available_top,
1378
    WORD32 available_left,
1379
    WORD32 cu_pos_y,
1380
    WORD32 luma_intra_pred_mode_current,
1381
    intra_prev_rem_flags_t *ps_intra_pred_mode_current)
1382
27.0M
{
1383
    /* local variables */
1384
27.0M
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1385
27.0M
    WORD32 cand_mode_list[3];
1386
1387
27.0M
    ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1388
27.0M
    ps_intra_pred_mode_current->b2_mpm_idx = 0;  // for safety purpose
1389
27.0M
    ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
1390
1391
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1392
    /* N = top */
1393
27.0M
    if(0 == available_top)
1394
3.51M
    {
1395
3.51M
        cand_intra_pred_mode_top = INTRA_DC;
1396
3.51M
    }
1397
    /* for neighbour != INTRA, setting DC is done outside */
1398
23.5M
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1399
1.69M
    {
1400
1.69M
        cand_intra_pred_mode_top = INTRA_DC;
1401
1.69M
    }
1402
21.8M
    else
1403
21.8M
    {
1404
21.8M
        cand_intra_pred_mode_top = top_intra_mode;
1405
21.8M
    }
1406
1407
    /* N = left */
1408
27.0M
    if(0 == available_left)
1409
3.11M
    {
1410
3.11M
        cand_intra_pred_mode_left = INTRA_DC;
1411
3.11M
    }
1412
    /* for neighbour != INTRA, setting DC is done outside */
1413
23.9M
    else
1414
23.9M
    {
1415
23.9M
        cand_intra_pred_mode_left = left_intra_mode;
1416
23.9M
    }
1417
1418
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1419
27.0M
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1420
12.5M
    {
1421
12.5M
        if(cand_intra_pred_mode_left < 2)
1422
9.66M
        {
1423
9.66M
            cand_mode_list[0] = INTRA_PLANAR;
1424
9.66M
            cand_mode_list[1] = INTRA_DC;
1425
9.66M
            cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1426
9.66M
        }
1427
2.92M
        else
1428
2.92M
        {
1429
2.92M
            cand_mode_list[0] = cand_intra_pred_mode_left;
1430
2.92M
            cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1431
2.92M
            cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1432
2.92M
        }
1433
12.5M
    }
1434
14.4M
    else
1435
14.4M
    {
1436
14.4M
        cand_mode_list[0] = cand_intra_pred_mode_left;
1437
14.4M
        cand_mode_list[1] = cand_intra_pred_mode_top;
1438
1439
14.4M
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1440
14.4M
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1441
6.73M
        {
1442
6.73M
            cand_mode_list[2] = INTRA_PLANAR;
1443
6.73M
        }
1444
7.72M
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1445
2.53M
        {
1446
2.53M
            cand_mode_list[2] = INTRA_DC;
1447
2.53M
        }
1448
5.18M
        else
1449
5.18M
        {
1450
5.18M
            cand_mode_list[2] = INTRA_ANGULAR(26);
1451
5.18M
        }
1452
14.4M
    }
1453
1454
    /* Signal Generation */
1455
1456
    /* Flag & mpm_index generation */
1457
27.0M
    if(cand_mode_list[0] == luma_intra_pred_mode_current)
1458
8.96M
    {
1459
8.96M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1460
8.96M
        ps_intra_pred_mode_current->b2_mpm_idx = 0;
1461
8.96M
    }
1462
18.0M
    else if(cand_mode_list[1] == luma_intra_pred_mode_current)
1463
7.41M
    {
1464
7.41M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1465
7.41M
        ps_intra_pred_mode_current->b2_mpm_idx = 1;
1466
7.41M
    }
1467
10.6M
    else if(cand_mode_list[2] == luma_intra_pred_mode_current)
1468
3.79M
    {
1469
3.79M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1470
3.79M
        ps_intra_pred_mode_current->b2_mpm_idx = 2;
1471
3.79M
    }
1472
    /* Flag & b5_rem_intra_pred_mode generation */
1473
6.86M
    else
1474
6.86M
    {
1475
6.86M
        WORD32 rem_mode;
1476
1477
6.86M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1478
1479
        /* sorting cand_mode_list */
1480
6.86M
        if(cand_mode_list[0] > cand_mode_list[1])
1481
3.07M
        {
1482
3.07M
            SWAP(cand_mode_list[0], cand_mode_list[1]);
1483
3.07M
        }
1484
6.86M
        if(cand_mode_list[0] > cand_mode_list[2])
1485
2.77M
        {
1486
2.77M
            SWAP(cand_mode_list[0], cand_mode_list[2]);
1487
2.77M
        }
1488
6.86M
        if(cand_mode_list[1] > cand_mode_list[2])
1489
3.62M
        {
1490
3.62M
            SWAP(cand_mode_list[1], cand_mode_list[2]);
1491
3.62M
        }
1492
1493
6.86M
        rem_mode = luma_intra_pred_mode_current;
1494
1495
6.86M
        if((rem_mode) >= cand_mode_list[2])
1496
2.30M
        {
1497
2.30M
            (rem_mode)--;
1498
2.30M
        }
1499
6.86M
        if((rem_mode) >= cand_mode_list[1])
1500
5.72M
        {
1501
5.72M
            (rem_mode)--;
1502
5.72M
        }
1503
6.86M
        if((rem_mode) >= cand_mode_list[0])
1504
6.29M
        {
1505
6.29M
            (rem_mode)--;
1506
6.29M
        }
1507
6.86M
        ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
1508
6.86M
    }
1509
27.0M
}
1510
1511
void ihevce_quant_rounding_factor_gen(
1512
    WORD32 i4_trans_size,
1513
    WORD32 is_luma,
1514
    rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
1515
    WORD32 *pi4_quant_round_0_1,
1516
    WORD32 *pi4_quant_round_1_2,
1517
    double i4_lamda_modifier,
1518
    UWORD8 i4_is_tu_level_quant_rounding)
1519
8.35M
{
1520
    //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
1521
8.35M
    UWORD8 *pu1_ctxt_model;
1522
8.35M
    WORD32 scan_pos;
1523
8.35M
    WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1524
8.35M
    WORD32 abs_gt1_base_ctxt;
1525
8.35M
    WORD32 log2_tr_size, i;
1526
8.35M
    UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
1527
8.35M
    UWORD16 u4_bits_estimated_r1_temp;
1528
8.35M
    WORD32 j = 0;
1529
8.35M
    WORD32 k = 0;
1530
8.35M
    WORD32 temp2;
1531
1532
8.35M
    double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
1533
8.35M
    LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
1534
    /* transform size to log2transform size */
1535
8.35M
    GETRANGE(log2_tr_size, i4_trans_size);
1536
8.35M
    log2_tr_size -= 1;
1537
1538
8.35M
    if(1 == i4_is_tu_level_quant_rounding)
1539
0
    {
1540
0
        entropy_context_t *ps_cur_tu_entropy;
1541
0
        cab_ctxt_t *ps_cabac;
1542
0
        WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1543
0
        ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
1544
1545
0
        ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
1546
1547
0
        pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1548
0
    }
1549
8.35M
    else
1550
8.35M
    {
1551
8.35M
        pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
1552
8.35M
    }
1553
    /*If transform size is 4x4, then only one sub-block*/
1554
8.35M
    if(is_luma)
1555
5.08M
    {
1556
5.08M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1557
5.08M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1558
1559
5.08M
        if(3 == log2_tr_size)
1560
1.81M
        {
1561
            /* 8x8 transform size */
1562
            /* Assuming diagnol scan idx for now */
1563
1.81M
            sig_coeff_base_ctxt += 9;
1564
1.81M
        }
1565
3.26M
        else if(3 < log2_tr_size)
1566
1.45M
        {
1567
            /* larger transform sizes */
1568
1.45M
            sig_coeff_base_ctxt += 21;
1569
1.45M
        }
1570
5.08M
    }
1571
3.26M
    else
1572
3.26M
    {
1573
        /* chroma context initializations */
1574
3.26M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1575
3.26M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1576
1577
3.26M
        if(3 == log2_tr_size)
1578
1.08M
        {
1579
            /* 8x8 transform size */
1580
1.08M
            sig_coeff_base_ctxt += 9;
1581
1.08M
        }
1582
2.17M
        else if(3 < log2_tr_size)
1583
363k
        {
1584
            /* larger transform sizes */
1585
363k
            sig_coeff_base_ctxt += 12;
1586
363k
        }
1587
3.26M
    }
1588
1589
    /*Transform size of 4x4 will have only a single CSB */
1590
    /* derive the context inc as per section 9.3.3.1.4 */
1591
1592
8.35M
    if(2 == log2_tr_size)
1593
3.63M
    {
1594
3.63M
        UWORD8 sig_ctxinc;
1595
3.63M
        WORD32 state_mps;
1596
3.63M
        WORD32 gt1_ctxt = 0;
1597
3.63M
        WORD32 ctxt_set = 0;
1598
3.63M
        WORD32 ctxt_idx = 0;
1599
1600
        /* context set based on luma subblock pos */
1601
1602
        /* Encodet the abs level gt1 bins */
1603
        /* Currently calculating trade off between mps(2) and mps(1)*/
1604
        /* The estimation has to be further done for mps(11) and mps(111)*/
1605
        /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
1606
        /* gt1_ctxt = 0 for the co-ef value to be 2 */
1607
1608
3.63M
        ctxt_set = gt1_ctxt = 0;
1609
3.63M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1610
1611
3.63M
        state_mps = pu1_ctxt_model[ctxt_idx];
1612
1613
3.63M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1614
1615
3.63M
        u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1616
1617
3.63M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
1618
61.7M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1619
58.0M
        {
1620
58.0M
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1621
58.0M
        }
1622
1623
61.7M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1624
58.0M
        {
1625
            //UWORD8 nbr_csbf = 1;
1626
            /* derive the x,y pos */
1627
58.0M
            UWORD8 y_pos_x_pos = scan_pos;  //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1628
1629
            /* 4x4 transform size increment uses lookup */
1630
58.0M
            sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1631
1632
            /*Get the mps state based on ctxt modes */
1633
58.0M
            state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
1634
1635
            /* Bits taken to encode sig co-ef flag as 0 */
1636
58.0M
            u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1637
1638
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1639
            //
1640
58.0M
            u4_bits_estimated_r1 =
1641
58.0M
                (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1642
1643
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1644
58.0M
            u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1645
1646
58.0M
            QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1647
58.0M
            *(pi4_quant_round_0_1 + scan_pos) = temp2;
1648
58.0M
        }
1649
3.63M
    }
1650
4.72M
    else
1651
4.72M
    {
1652
4.72M
        UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
1653
4.72M
        WORD32 is_nbr_csb_state_mps;
1654
1655
4.72M
        WORD32 state_mps;
1656
4.72M
        WORD32 gt1_ctxt = 0;
1657
4.72M
        WORD32 ctxt_set = 0;
1658
4.72M
        WORD32 ctxt_idx;
1659
        /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
1660
        /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
1661
1662
        /*ctxt_set = 0 DC subblock, the previous state did not have 2
1663
        ctxt_set = 1 DC subblock, the previous state did have >= 2
1664
        ctxt_set = 2 AC subblock, the previous state did not have 2
1665
        ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1666
4.72M
        i = 1;
1667
4.72M
        ctxt_set = (i && is_luma) ? 2 : 0;
1668
1669
4.72M
        ctxt_set++;
1670
1671
        /*0th position indicates the probability of 2 */
1672
        /*1th position indicates the probability of 1 */
1673
        /*2th position indicates the probability of 11 */
1674
        /*3th position indicates the probability of 111 */
1675
1676
4.72M
        gt1_ctxt = 0;
1677
4.72M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1678
1679
4.72M
        state_mps = pu1_ctxt_model[ctxt_idx];
1680
1681
4.72M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1682
1683
4.72M
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1684
4.72M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1685
1686
934M
        for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
1687
929M
        {
1688
929M
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1689
929M
        }
1690
1691
4.72M
        i = 0;
1692
4.72M
        ctxt_set = (i && is_luma) ? 2 : 0;
1693
4.72M
        ctxt_set++;
1694
1695
        /*0th position indicates the probability of 2 */
1696
        /*1th position indicates the probability of 1 */
1697
        /*2th position indicates the probability of 11 */
1698
        /*3th position indicates the probability of 111 */
1699
1700
4.72M
        gt1_ctxt = 0;
1701
4.72M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1702
1703
4.72M
        state_mps = pu1_ctxt_model[ctxt_idx];
1704
1705
4.72M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1706
1707
4.72M
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1708
4.72M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1709
1710
80.2M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1711
75.5M
        {
1712
75.5M
            *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1713
75.5M
        }
1714
1715
4.72M
        {
1716
4.72M
            WORD32 ctxt_idx;
1717
1718
4.72M
            WORD32 nbr_csbf_0, nbr_csbf_1;
1719
4.72M
            WORD32 state_mps_0, state_mps_1;
1720
4.72M
            ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1721
4.72M
            ctxt_idx += is_luma ? 0 : 2;
1722
1723
            /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1724
            /* if neibhor not available, ctxt idx = 0*/
1725
4.72M
            nbr_csbf_0 = 0;
1726
4.72M
            ctxt_idx += nbr_csbf_0 ? 1 : 0;
1727
4.72M
            state_mps_0 = pu1_ctxt_model[ctxt_idx];
1728
1729
4.72M
            nbr_csbf_1 = 1;
1730
4.72M
            ctxt_idx += nbr_csbf_1 ? 1 : 0;
1731
4.72M
            state_mps_1 = pu1_ctxt_model[ctxt_idx];
1732
1733
4.72M
            is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
1734
4.72M
        }
1735
1736
4.72M
        if(1 == is_nbr_csb_state_mps)
1737
955k
        {
1738
13.8M
            for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
1739
12.9M
            {
1740
12.9M
                UWORD8 sig_ctxinc;
1741
12.9M
                WORD32 state_mps;
1742
12.9M
                WORD32 gt1_ctxt = 0;
1743
12.9M
                WORD32 ctxt_set = 0;
1744
1745
12.9M
                WORD32 ctxt_idx;
1746
1747
                /*Check if the cabac states had previous nbr available */
1748
1749
12.9M
                if(i == 0)
1750
955k
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
1751
11.9M
                else if(i < (i4_trans_size >> 2))
1752
2.07M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
1753
9.87M
                else if((i % (i4_trans_size >> 2)) == 0)
1754
2.07M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
1755
7.79M
                else
1756
7.79M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1757
1758
12.9M
                if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
1759
2.07M
                    k++;
1760
1761
12.9M
                j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
1762
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1763
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1764
                ctxt_set = 2 AC subblock, the previous state did not have 2
1765
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1766
1767
12.9M
                ctxt_set = (i && is_luma) ? 2 : 0;
1768
1769
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1770
12.9M
                gt1_ctxt = 0;
1771
12.9M
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1772
1773
12.9M
                state_mps = pu1_ctxt_model[ctxt_idx];
1774
1775
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1776
12.9M
                u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1777
1778
219M
                for(scan_pos = 0; scan_pos < 16; scan_pos++)
1779
206M
                {
1780
206M
                    UWORD8 y_pos_x_pos;
1781
1782
206M
                    if(scan_pos || i)
1783
205M
                    {
1784
205M
                        y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1785
                        /* ctxt for AC coeff depends on curpos and neigbour csbf */
1786
205M
                        sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1787
1788
                        /* based on luma subblock pos */
1789
205M
                        sig_ctxinc += (i && is_luma) ? 3 : 0;
1790
1791
205M
                        sig_ctxinc += sig_coeff_base_ctxt;
1792
205M
                    }
1793
955k
                    else
1794
955k
                    {
1795
                        /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1796
                        /* DC coeff has fixed context for luma and chroma */
1797
955k
                        sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1798
955k
                    }
1799
1800
                    /*Get the mps state based on ctxt modes */
1801
206M
                    state_mps = pu1_ctxt_model[sig_ctxinc];
1802
1803
                    /* Bits taken to encode sig co-ef flag as 0 */
1804
206M
                    u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1805
1806
206M
                    u4_bits_estimated_r1 =
1807
206M
                        (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1808
1809
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1810
206M
                    u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1811
206M
                    {
1812
206M
                        QUANT_ROUND_FACTOR(
1813
206M
                            temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1814
206M
                        *(pi4_quant_round_0_1 +
1815
206M
                          ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
1816
206M
                    }
1817
206M
                }
1818
12.9M
            }
1819
955k
        }
1820
3.76M
        else
1821
3.76M
        {
1822
            /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
1823
            Hence will write the same value to all sub block, and overwrite for the 1st one */
1824
3.76M
            i = 1;
1825
3.76M
            {
1826
3.76M
                UWORD8 sig_ctxinc;
1827
3.76M
                UWORD8 y_pos_x_pos;
1828
3.76M
                WORD32 quant_rounding_0_1;
1829
1830
3.76M
                pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
1831
1832
3.76M
                scan_pos = 0;
1833
3.76M
                y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1834
                /* ctxt for AC coeff depends on curpos and neigbour csbf */
1835
3.76M
                sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1836
1837
                /* based on luma subblock pos */
1838
3.76M
                sig_ctxinc += (is_luma) ? 3 : 0;
1839
1840
3.76M
                sig_ctxinc += sig_coeff_base_ctxt;
1841
1842
                /*Get the mps state based on ctxt modes */
1843
3.76M
                state_mps = pu1_ctxt_model[sig_ctxinc];
1844
1845
                /* Bits taken to encode sig co-ef flag as 0 */
1846
3.76M
                u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1847
1848
3.76M
                u4_bits_estimated_r1 =
1849
3.76M
                    (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1850
1851
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1852
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1853
                ctxt_set = 2 AC subblock, the previous state did not have 2
1854
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1855
1856
3.76M
                ctxt_set = (i && is_luma) ? 2 : 0;
1857
1858
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1859
3.76M
                gt1_ctxt = 0;
1860
3.76M
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1861
1862
3.76M
                state_mps = pu1_ctxt_model[ctxt_idx];
1863
1864
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1865
3.76M
                u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1866
1867
3.76M
                QUANT_ROUND_FACTOR(
1868
3.76M
                    quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1869
1870
727M
                for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
1871
723M
                    scan_pos++)
1872
723M
                {
1873
723M
                    *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
1874
723M
                }
1875
3.76M
            }
1876
1877
            /*First Subblock*/
1878
3.76M
            i = 0;
1879
1880
3.76M
            {
1881
3.76M
                UWORD8 sig_ctxinc;
1882
3.76M
                WORD32 state_mps;
1883
3.76M
                WORD32 gt1_ctxt = 0;
1884
3.76M
                WORD32 ctxt_set = 0;
1885
1886
3.76M
                WORD32 ctxt_idx;
1887
1888
                /*Check if the cabac states had previous nbr available */
1889
1890
3.76M
                {
1891
3.76M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1892
1893
                    /*ctxt_set = 0 DC subblock, the previous state did not have 2
1894
                    ctxt_set = 1 DC subblock, the previous state did have >= 2
1895
                    ctxt_set = 2 AC subblock, the previous state did not have 2
1896
                    ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1897
3.76M
                    ctxt_set = (i && is_luma) ? 2 : 0;
1898
1899
                    /* gt1_ctxt = 1 for the co-ef value to be 1 */
1900
3.76M
                    gt1_ctxt = 0;
1901
3.76M
                    ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1902
1903
3.76M
                    state_mps = pu1_ctxt_model[ctxt_idx];
1904
1905
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1906
3.76M
                    u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1907
1908
64.0M
                    for(scan_pos = 0; scan_pos < 16; scan_pos++)
1909
60.2M
                    {
1910
60.2M
                        UWORD8 y_pos_x_pos;
1911
1912
60.2M
                        if(scan_pos)
1913
56.4M
                        {
1914
56.4M
                            y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1915
                            /* ctxt for AC coeff depends on curpos and neigbour csbf */
1916
56.4M
                            sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1917
1918
                            /* based on luma subblock pos */
1919
56.4M
                            sig_ctxinc += (i && is_luma) ? 3 : 0;
1920
1921
56.4M
                            sig_ctxinc += sig_coeff_base_ctxt;
1922
56.4M
                        }
1923
3.76M
                        else
1924
3.76M
                        {
1925
                            /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1926
                            /* DC coeff has fixed context for luma and chroma */
1927
3.76M
                            sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1928
3.76M
                        }
1929
1930
                        /*Get the mps state based on ctxt modes */
1931
60.2M
                        state_mps = pu1_ctxt_model[sig_ctxinc];
1932
1933
                        /* Bits taken to encode sig co-ef flag as 0 */
1934
60.2M
                        u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1935
1936
60.2M
                        u4_bits_estimated_r1 =
1937
60.2M
                            (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1938
1939
                        /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1940
60.2M
                        u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1941
60.2M
                        {
1942
60.2M
                            QUANT_ROUND_FACTOR(
1943
60.2M
                                temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1944
60.2M
                            *(pi4_quant_round_0_1 +
1945
60.2M
                              ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1946
60.2M
                        }
1947
60.2M
                    }
1948
3.76M
                }
1949
3.76M
            }
1950
3.76M
        }
1951
4.72M
    }
1952
8.35M
    return;
1953
8.35M
}
1954
1955
/*!
1956
******************************************************************************
1957
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
1958
*
1959
* \brief
1960
*    Transform unit level (Luma) enc_loop function
1961
*
1962
* \param[in] ps_ctxt    enc_loop module ctxt pointer
1963
* \param[in] pu1_pred   pointer to predicted data buffer
1964
* \param[in] pred_strd  predicted buffer stride
1965
* \param[in] pu1_src    pointer to source data buffer
1966
* \param[in] src_strd   source buffer stride
1967
* \param[in] pi2_deq_data   pointer to store iq data
1968
* \param[in] deq_data_strd  iq data buffer stride
1969
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
1970
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
1971
*                           block
1972
* \param[out] csbf_strd  csbf buffer stride
1973
* \param[in] trans_size transform size (4, 8, 16,32)
1974
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
1975
* \param[out] pi4_cost      pointer to store the cost
1976
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
1977
*                           coeff buffer
1978
* \param[out] pu4_tu_bits   pointer to store the best TU bits required encode
1979
the current TU in RDopt Mode
1980
* \param[out] pu4_blk_sad   pointer to store the block sad for RC
1981
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
1982
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
1983
* \param[in]  i4_perform_rdoq Indicates if RDOQ should be performed or not
1984
* \param[in]  i4_perform_sbh Indicates if SBH should be performed or not
1985
*
1986
* \return
1987
*    CBF of the current block
1988
*
1989
* \author
1990
*  Ittiam
1991
*
1992
*****************************************************************************
1993
*/
1994
1995
WORD32 ihevce_t_q_iq_ssd_scan_fxn(
1996
    ihevce_enc_loop_ctxt_t *ps_ctxt,
1997
    UWORD8 *pu1_pred,
1998
    WORD32 pred_strd,
1999
    UWORD8 *pu1_src,
2000
    WORD32 src_strd,
2001
    WORD16 *pi2_deq_data,
2002
    WORD32 deq_data_strd,
2003
    UWORD8 *pu1_recon,
2004
    WORD32 i4_recon_stride,
2005
    UWORD8 *pu1_ecd_data,
2006
    UWORD8 *pu1_csbf_buf,
2007
    WORD32 csbf_strd,
2008
    WORD32 trans_size,
2009
    WORD32 packed_pred_mode,
2010
    LWORD64 *pi8_cost,
2011
    WORD32 *pi4_coeff_off,
2012
    WORD32 *pi4_tu_bits,
2013
    UWORD32 *pu4_blk_sad,
2014
    WORD32 *pi4_zero_col,
2015
    WORD32 *pi4_zero_row,
2016
    UWORD8 *pu1_is_recon_available,
2017
    WORD32 i4_perform_rdoq,
2018
    WORD32 i4_perform_sbh,
2019
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2020
    WORD32 i4_alpha_stim_multiplier,
2021
    UWORD8 u1_is_cu_noisy,
2022
#endif
2023
    SSD_TYPE_T e_ssd_type,
2024
    WORD32 early_cbf)
2025
34.5M
{
2026
34.5M
    WORD32 cbf = 0;
2027
34.5M
    WORD32 trans_idx;
2028
34.5M
    WORD32 quant_scale_mat_offset;
2029
34.5M
    WORD32 *pi4_trans_scratch;
2030
34.5M
    WORD16 *pi2_trans_values;
2031
34.5M
    WORD16 *pi2_quant_coeffs;
2032
34.5M
    WORD32 *pi4_subBlock2csbfId_map = NULL;
2033
2034
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2035
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
2036
#endif
2037
2038
34.5M
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
2039
2040
34.5M
    WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
2041
34.5M
                             (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
2042
34.5M
    WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
2043
34.5M
    WORD8 intra_flag = 0;
2044
34.5M
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
2045
2046
34.5M
    *pi4_tu_bits = 0;
2047
34.5M
    *pi4_coeff_off = 0;
2048
34.5M
    pu1_is_recon_available[0] = 0;
2049
2050
34.5M
    if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
2051
1.79M
    {
2052
1.79M
        if(e_ssd_type != NULL_TYPE)
2053
1.79M
        {
2054
            /* SSD cost is stored to the pointer */
2055
1.79M
            pi8_cost[0] =
2056
2057
1.79M
                ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
2058
1.79M
                    pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
2059
2060
1.79M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2061
1.79M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2062
0
            {
2063
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2064
0
                    pu1_src,
2065
0
                    src_strd,
2066
0
                    pu1_pred,
2067
0
                    pred_strd,
2068
0
                    pi8_cost[0],
2069
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2070
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2071
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2072
0
                                                 100.0,
2073
0
                    trans_size,
2074
0
                    0,
2075
0
                    ps_ctxt->u1_enable_psyRDOPT,
2076
0
                    NULL_PLANE);
2077
0
            }
2078
1.79M
#endif
2079
2080
            /* copy pred to recon for skip mode */
2081
1.79M
            if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2082
666k
            {
2083
666k
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2084
666k
                    pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2085
666k
                pu1_is_recon_available[0] = 1;
2086
666k
            }
2087
1.12M
            else
2088
1.12M
            {
2089
1.12M
                pu1_is_recon_available[0] = 0;
2090
1.12M
            }
2091
2092
1.79M
#if ENABLE_INTER_ZCU_COST
2093
1.79M
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
2094
1.79M
#endif
2095
1.79M
        }
2096
0
        else
2097
0
        {
2098
0
            pi8_cost[0] = UINT_MAX;
2099
0
        }
2100
2101
        /* cbf is returned as 0 */
2102
1.79M
        return (0);
2103
1.79M
    }
2104
2105
    /* derive context variables */
2106
32.7M
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
2107
32.7M
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2108
32.7M
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
2109
2110
    /* translate the transform size to index for 4x4 and 8x8 */
2111
32.7M
    trans_idx = trans_size >> 2;
2112
2113
32.7M
    if(PRED_MODE_INTRA == packed_pred_mode)
2114
27.0M
    {
2115
27.0M
        quant_scale_mat_offset = 0;
2116
27.0M
        intra_flag = 1;
2117
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2118
        ai4_quant_rounding_factors[0][0] =
2119
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
2120
2121
        for(i = 0; i < trans_size * trans_size; i++)
2122
        {
2123
            ai4_quant_rounding_factors[1][i] =
2124
                MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
2125
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2126
            ai4_quant_rounding_factors[2][i] =
2127
                MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
2128
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2129
        }
2130
#endif
2131
27.0M
    }
2132
5.69M
    else
2133
5.69M
    {
2134
5.69M
        quant_scale_mat_offset = NUM_TRANS_TYPES;
2135
5.69M
    }
2136
    /* for intra 4x4 DST transform should be used */
2137
32.7M
    if((1 == trans_idx) && (1 == intra_flag))
2138
12.6M
    {
2139
12.6M
        trans_idx = 0;
2140
12.6M
    }
2141
    /* for 16x16 cases */
2142
20.0M
    else if(16 == trans_size)
2143
7.00M
    {
2144
7.00M
        trans_idx = 3;
2145
7.00M
    }
2146
    /* for 32x32 cases */
2147
13.0M
    else if(32 == trans_size)
2148
2.67M
    {
2149
2.67M
        trans_idx = 4;
2150
2.67M
    }
2151
2152
32.7M
    switch(trans_size)
2153
32.7M
    {
2154
14.2M
    case 4:
2155
14.2M
    {
2156
14.2M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
2157
2158
14.2M
        break;
2159
0
    }
2160
8.77M
    case 8:
2161
8.77M
    {
2162
8.77M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
2163
2164
8.77M
        break;
2165
0
    }
2166
7.00M
    case 16:
2167
7.00M
    {
2168
7.00M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
2169
2170
7.00M
        break;
2171
0
    }
2172
2.67M
    case 32:
2173
2.67M
    {
2174
2.67M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
2175
2176
2.67M
        break;
2177
0
    }
2178
32.7M
    }
2179
2180
    /* Do not call the FT and Quant functions if early_cbf is 0 */
2181
32.7M
    if(1 == early_cbf)
2182
32.7M
    {
2183
        /* ---------- call residue and transform block ------- */
2184
32.7M
        *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
2185
32.7M
            pu1_src,
2186
32.7M
            pu1_pred,
2187
32.7M
            pi4_trans_scratch,
2188
32.7M
            pi2_trans_values,
2189
32.7M
            src_strd,
2190
32.7M
            pred_strd,
2191
32.7M
            trans_size,
2192
32.7M
            NULL_PLANE);
2193
2194
32.7M
        cbf = ps_ctxt->apf_quant_iquant_ssd
2195
32.7M
                  [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
2196
32.7M
                      pi2_trans_values,
2197
32.7M
                      ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
2198
32.7M
                      pi2_quant_coeffs,
2199
32.7M
                      pi2_deq_data,
2200
32.7M
                      trans_size,
2201
32.7M
                      ps_ctxt->i4_cu_qp_div6,
2202
32.7M
                      ps_ctxt->i4_cu_qp_mod6,
2203
32.7M
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2204
32.7M
                      ps_ctxt->i4_quant_rnd_factor[intra_flag],
2205
32.7M
                      ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2206
32.7M
                      ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2207
#else
2208
                      intra_flag ? ai4_quant_rounding_factors[0][0]
2209
                                 : ps_ctxt->i4_quant_rnd_factor[intra_flag],
2210
                      intra_flag ? ai4_quant_rounding_factors[1]
2211
                                 : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2212
                      intra_flag ? ai4_quant_rounding_factors[2]
2213
                                 : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2214
#endif
2215
32.7M
                      trans_size,
2216
32.7M
                      trans_size,
2217
32.7M
                      deq_data_strd,
2218
32.7M
                      pu1_csbf_buf,
2219
32.7M
                      csbf_strd,
2220
32.7M
                      pi4_zero_col,
2221
32.7M
                      pi4_zero_row,
2222
32.7M
                      ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
2223
32.7M
                      pi8_cost);
2224
2225
32.7M
        if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
2226
13.9M
        {
2227
13.9M
            pi8_cost[0] = UINT_MAX;
2228
13.9M
        }
2229
32.7M
    }
2230
2231
32.7M
    if(0 != cbf)
2232
10.3M
    {
2233
10.3M
        if(i4_perform_sbh || i4_perform_rdoq)
2234
7.30M
        {
2235
7.30M
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
2236
7.30M
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
2237
7.30M
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
2238
2239
7.30M
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
2240
7.30M
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
2241
7.30M
            ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
2242
7.30M
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2243
7.30M
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
2244
2245
7.30M
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
2246
7.30M
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
2247
7.30M
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
2248
7.30M
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
2249
7.30M
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
2250
7.30M
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
2251
2252
            /* ------- call coeffs scan function ------- */
2253
7.30M
            if((!i4_perform_rdoq))
2254
2.80M
            {
2255
2.80M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2256
2257
2.80M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2258
2.80M
            }
2259
7.30M
        }
2260
2261
10.3M
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2262
10.3M
            pi2_quant_coeffs,
2263
10.3M
            pi4_subBlock2csbfId_map,
2264
10.3M
            ps_ctxt->i4_scan_idx,
2265
10.3M
            trans_size,
2266
10.3M
            pu1_ecd_data,
2267
10.3M
            pu1_csbf_buf,
2268
10.3M
            csbf_strd);
2269
10.3M
    }
2270
32.7M
    *pi8_cost >>= ga_trans_shift[trans_idx];
2271
2272
32.7M
#if RDOPT_ZERO_CBF_ENABLE
2273
    /* compare null cbf cost with encode tu rd-cost */
2274
32.7M
    if(cbf != 0)
2275
10.3M
    {
2276
10.3M
        WORD32 tu_bits;
2277
10.3M
        LWORD64 tu_rd_cost;
2278
2279
10.3M
        LWORD64 zero_cbf_cost = 0;
2280
2281
        /*Populating the feilds of rdoq_ctxt structure*/
2282
10.3M
        if(i4_perform_rdoq)
2283
4.50M
        {
2284
            /* transform size to log2transform size */
2285
4.50M
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
2286
4.50M
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
2287
4.50M
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
2288
4.50M
            ps_rdoq_sbh_ctxt->i4_is_luma = 1;
2289
4.50M
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
2290
4.50M
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
2291
4.50M
                (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
2292
4.50M
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
2293
4.50M
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
2294
4.50M
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
2295
4.50M
        }
2296
5.84M
        else if(i4_perform_zcbf)
2297
934k
        {
2298
934k
            zero_cbf_cost =
2299
2300
934k
                ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
2301
934k
                    pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
2302
934k
        }
2303
2304
        /************************************************************************/
2305
        /* call the entropy rdo encode to get the bit estimate for current tu   */
2306
        /* note that tu includes only residual coding bits and does not include */
2307
        /* tu split, cbf and qp delta encoding bits for a TU                    */
2308
        /************************************************************************/
2309
10.3M
        if(i4_perform_rdoq)
2310
4.50M
        {
2311
4.50M
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
2312
4.50M
                &ps_ctxt->s_rdopt_entropy_ctxt,
2313
4.50M
                (pu1_ecd_data),
2314
4.50M
                trans_size,
2315
4.50M
                1,
2316
4.50M
                ps_rdoq_sbh_ctxt,
2317
4.50M
                pi8_cost,
2318
4.50M
                &zero_cbf_cost,
2319
4.50M
                0);
2320
2321
4.50M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
2322
141k
            {
2323
141k
                cbf = 0;
2324
141k
                *pi4_coeff_off = 0;
2325
141k
            }
2326
2327
4.50M
            if((i4_perform_sbh) && (0 != cbf))
2328
4.36M
            {
2329
4.36M
                ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2330
4.36M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2331
4.36M
                *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2332
4.36M
            }
2333
2334
            /*Add round value before normalizing*/
2335
4.50M
            *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
2336
4.50M
            *pi8_cost >>= ga_trans_shift[trans_idx];
2337
2338
4.50M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
2339
4.36M
            {
2340
4.36M
                pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2341
4.36M
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2342
4.36M
                    pi2_quant_coeffs,
2343
4.36M
                    pi4_subBlock2csbfId_map,
2344
4.36M
                    ps_ctxt->i4_scan_idx,
2345
4.36M
                    trans_size,
2346
4.36M
                    pu1_ecd_data,
2347
4.36M
                    pu1_csbf_buf,
2348
4.36M
                    csbf_strd);
2349
4.36M
            }
2350
4.50M
        }
2351
5.84M
        else
2352
5.84M
        {
2353
5.84M
            tu_bits = ihevce_entropy_rdo_encode_tu(
2354
5.84M
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
2355
5.84M
        }
2356
2357
10.3M
        *pi4_tu_bits = tu_bits;
2358
2359
10.3M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2360
2.90M
        {
2361
2.90M
            *pi8_cost = ihevce_it_recon_ssd(
2362
2.90M
                ps_ctxt,
2363
2.90M
                pu1_src,
2364
2.90M
                src_strd,
2365
2.90M
                pu1_pred,
2366
2.90M
                pred_strd,
2367
2.90M
                pi2_deq_data,
2368
2.90M
                deq_data_strd,
2369
2.90M
                pu1_recon,
2370
2.90M
                i4_recon_stride,
2371
2.90M
                pu1_ecd_data,
2372
2.90M
                trans_size,
2373
2.90M
                packed_pred_mode,
2374
2.90M
                cbf,
2375
2.90M
                *pi4_zero_col,
2376
2.90M
                *pi4_zero_row,
2377
2.90M
                NULL_PLANE);
2378
2379
2.90M
            pu1_is_recon_available[0] = 1;
2380
2.90M
        }
2381
2382
10.3M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2383
10.3M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2384
0
        {
2385
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2386
0
                pu1_src,
2387
0
                src_strd,
2388
0
                pu1_recon,
2389
0
                i4_recon_stride,
2390
0
                pi8_cost[0],
2391
0
                i4_alpha_stim_multiplier,
2392
0
                trans_size,
2393
0
                0,
2394
0
                ps_ctxt->u1_enable_psyRDOPT,
2395
0
                NULL_PLANE);
2396
0
        }
2397
10.3M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2398
0
        {
2399
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2400
0
                pu1_src,
2401
0
                src_strd,
2402
0
                pu1_pred,
2403
0
                pred_strd,
2404
0
                pi8_cost[0],
2405
0
                i4_alpha_stim_multiplier,
2406
0
                trans_size,
2407
0
                0,
2408
0
                ps_ctxt->u1_enable_psyRDOPT,
2409
0
                NULL_PLANE);
2410
0
        }
2411
10.3M
#endif
2412
2413
        /* add the SSD cost to bits estimate given by ECD */
2414
10.3M
        tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
2415
10.3M
                                     tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
2416
2417
10.3M
        if(i4_perform_zcbf)
2418
1.82M
        {
2419
1.82M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2420
1.82M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2421
0
            {
2422
0
                zero_cbf_cost = ihevce_inject_stim_into_distortion(
2423
0
                    pu1_src,
2424
0
                    src_strd,
2425
0
                    pu1_pred,
2426
0
                    pred_strd,
2427
0
                    zero_cbf_cost,
2428
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2429
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2430
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2431
0
                                                 100.0,
2432
0
                    trans_size,
2433
0
                    0,
2434
0
                    ps_ctxt->u1_enable_psyRDOPT,
2435
0
                    NULL_PLANE);
2436
0
            }
2437
1.82M
#endif
2438
2439
            /* force the tu as zero cbf if zero_cbf_cost is lower */
2440
1.82M
            if(zero_cbf_cost < tu_rd_cost)
2441
46.0k
            {
2442
                /* num bytes is set to 0 */
2443
46.0k
                *pi4_coeff_off = 0;
2444
2445
                /* cbf is returned as 0 */
2446
46.0k
                cbf = 0;
2447
2448
                /* cost is returned as 0 cbf cost */
2449
46.0k
                *pi8_cost = zero_cbf_cost;
2450
2451
                /* TU bits is set to 0 */
2452
46.0k
                *pi4_tu_bits = 0;
2453
46.0k
                pu1_is_recon_available[0] = 0;
2454
2455
46.0k
                if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2456
8.72k
                {
2457
                    /* copy pred to recon for zcbf mode */
2458
2459
8.72k
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2460
8.72k
                        pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2461
2462
8.72k
                    pu1_is_recon_available[0] = 1;
2463
8.72k
                }
2464
46.0k
            }
2465
            /* accumulate cu not coded cost with zcbf cost */
2466
1.82M
#if ENABLE_INTER_ZCU_COST
2467
1.82M
            ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
2468
1.82M
#endif
2469
1.82M
        }
2470
10.3M
    }
2471
22.3M
    else
2472
22.3M
    {
2473
        /* cbf = 0, accumulate cu not coded cost */
2474
22.3M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2475
11.0M
        {
2476
11.0M
            *pi8_cost = ihevce_it_recon_ssd(
2477
11.0M
                ps_ctxt,
2478
11.0M
                pu1_src,
2479
11.0M
                src_strd,
2480
11.0M
                pu1_pred,
2481
11.0M
                pred_strd,
2482
11.0M
                pi2_deq_data,
2483
11.0M
                deq_data_strd,
2484
11.0M
                pu1_recon,
2485
11.0M
                i4_recon_stride,
2486
11.0M
                pu1_ecd_data,
2487
11.0M
                trans_size,
2488
11.0M
                packed_pred_mode,
2489
11.0M
                cbf,
2490
11.0M
                *pi4_zero_col,
2491
11.0M
                *pi4_zero_row,
2492
11.0M
                NULL_PLANE);
2493
2494
11.0M
            pu1_is_recon_available[0] = 1;
2495
11.0M
        }
2496
2497
22.3M
#if ENABLE_INTER_ZCU_COST
2498
22.3M
        {
2499
22.3M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2500
22.3M
            if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2501
0
            {
2502
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2503
0
                    pu1_src,
2504
0
                    src_strd,
2505
0
                    pu1_recon,
2506
0
                    i4_recon_stride,
2507
0
                    pi8_cost[0],
2508
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2509
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2510
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2511
0
                                                 100.0,
2512
0
                    trans_size,
2513
0
                    0,
2514
0
                    ps_ctxt->u1_enable_psyRDOPT,
2515
0
                    NULL_PLANE);
2516
0
            }
2517
22.3M
            else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2518
0
            {
2519
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2520
0
                    pu1_src,
2521
0
                    src_strd,
2522
0
                    pu1_pred,
2523
0
                    pred_strd,
2524
0
                    pi8_cost[0],
2525
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2526
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2527
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2528
0
                                                 100.0,
2529
0
                    trans_size,
2530
0
                    0,
2531
0
                    ps_ctxt->u1_enable_psyRDOPT,
2532
0
                    NULL_PLANE);
2533
0
            }
2534
22.3M
#endif
2535
2536
22.3M
            ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
2537
22.3M
        }
2538
22.3M
#endif /* ENABLE_INTER_ZCU_COST */
2539
22.3M
    }
2540
32.7M
#endif
2541
2542
32.7M
    return (cbf);
2543
32.7M
}
2544
2545
/*!
2546
******************************************************************************
2547
* \if Function name : ihevce_it_recon_fxn \endif
2548
*
2549
* \brief
2550
*    Transform unit level (Luma) IT Recon function
2551
*
2552
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2553
* \param[in] pi2_deq_data   pointer to iq data
2554
* \param[in] deq_data_strd  iq data buffer stride
2555
* \param[in] pu1_pred       pointer to predicted data buffer
2556
* \param[in] pred_strd      predicted buffer stride
2557
* \param[in] pu1_recon      pointer to recon buffer
2558
* \param[in] recon_strd     recon buffer stride
2559
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2560
* \param[in] trans_size     transform size (4, 8, 16,32)
2561
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
2562
* \param[in] cbf            CBF of the current block
2563
* \param[in] zero_cols      zero_cols of the current block
2564
* \param[in] zero_rows      zero_rows of the current block
2565
*
2566
* \return
2567
*
2568
* \author
2569
*  Ittiam
2570
*
2571
*****************************************************************************
2572
*/
2573
2574
void ihevce_it_recon_fxn(
2575
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2576
    WORD16 *pi2_deq_data,
2577
    WORD32 deq_dat_strd,
2578
    UWORD8 *pu1_pred,
2579
    WORD32 pred_strd,
2580
    UWORD8 *pu1_recon,
2581
    WORD32 recon_strd,
2582
    UWORD8 *pu1_ecd_data,
2583
    WORD32 trans_size,
2584
    WORD32 packed_pred_mode,
2585
    WORD32 cbf,
2586
    WORD32 zero_cols,
2587
    WORD32 zero_rows)
2588
22.4M
{
2589
22.4M
    WORD32 dc_add_flag = 0;
2590
22.4M
    WORD32 trans_idx;
2591
2592
    /* translate the transform size to index for 4x4 and 8x8 */
2593
22.4M
    trans_idx = trans_size >> 2;
2594
2595
    /* if SKIP mode needs to be evaluated the pred is copied to recon */
2596
22.4M
    if(PRED_MODE_SKIP == packed_pred_mode)
2597
217k
    {
2598
217k
        UWORD8 *pu1_curr_recon, *pu1_curr_pred;
2599
2600
217k
        pu1_curr_pred = pu1_pred;
2601
217k
        pu1_curr_recon = pu1_recon;
2602
2603
        /* 2D copy of data */
2604
2605
217k
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2606
217k
            pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
2607
2608
217k
        return;
2609
217k
    }
2610
2611
    /* for intra 4x4 DST transform should be used */
2612
22.2M
    if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
2613
7.00M
    {
2614
7.00M
        trans_idx = 0;
2615
7.00M
    }
2616
    /* for 16x16 cases */
2617
15.2M
    else if(16 == trans_size)
2618
5.39M
    {
2619
5.39M
        trans_idx = 3;
2620
5.39M
    }
2621
    /* for 32x32 cases */
2622
9.82M
    else if(32 == trans_size)
2623
2.09M
    {
2624
2.09M
        trans_idx = 4;
2625
2.09M
    }
2626
2627
    /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
2628
22.2M
    if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2629
4.50M
    {
2630
4.50M
        dc_add_flag = 1;
2631
4.50M
    }
2632
2633
22.2M
    if(0 == cbf)
2634
17.0M
    {
2635
        /* buffer copy */
2636
17.0M
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2637
17.0M
            pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
2638
17.0M
    }
2639
5.18M
    else if((1 == dc_add_flag) && (0 != trans_idx))
2640
164k
    {
2641
        /* dc add */
2642
164k
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2643
164k
            pu1_pred,
2644
164k
            pred_strd,
2645
164k
            pu1_recon,
2646
164k
            recon_strd,
2647
164k
            trans_size,
2648
164k
            pi2_deq_data[0],
2649
164k
            NULL_PLANE /* luma */
2650
164k
        );
2651
164k
    }
2652
5.02M
    else
2653
5.02M
    {
2654
5.02M
        ps_ctxt->apf_it_recon[trans_idx](
2655
5.02M
            pi2_deq_data,
2656
5.02M
            &ps_ctxt->ai2_scratch[0],
2657
5.02M
            pu1_pred,
2658
5.02M
            pu1_recon,
2659
5.02M
            deq_dat_strd,
2660
5.02M
            pred_strd,
2661
5.02M
            recon_strd,
2662
5.02M
            zero_cols,
2663
5.02M
            zero_rows);
2664
5.02M
    }
2665
22.2M
}
2666
2667
/*!
2668
******************************************************************************
2669
* \if Function name : ihevce_chroma_it_recon_fxn \endif
2670
*
2671
* \brief
2672
*    Transform unit level (Chroma) IT Recon function
2673
*
2674
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2675
* \param[in] pi2_deq_data   pointer to iq data
2676
* \param[in] deq_data_strd  iq data buffer stride
2677
* \param[in] pu1_pred       pointer to predicted data buffer
2678
* \param[in] pred_strd      predicted buffer stride
2679
* \param[in] pu1_recon      pointer to recon buffer
2680
* \param[in] recon_strd     recon buffer stride
2681
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2682
* \param[in] trans_size     transform size (4, 8, 16)
2683
* \param[in] cbf            CBF of the current block
2684
* \param[in] zero_cols      zero_cols of the current block
2685
* \param[in] zero_rows      zero_rows of the current block
2686
*
2687
* \return
2688
*
2689
* \author
2690
*  Ittiam
2691
*
2692
*****************************************************************************
2693
*/
2694
2695
void ihevce_chroma_it_recon_fxn(
2696
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2697
    WORD16 *pi2_deq_data,
2698
    WORD32 deq_dat_strd,
2699
    UWORD8 *pu1_pred,
2700
    WORD32 pred_strd,
2701
    UWORD8 *pu1_recon,
2702
    WORD32 recon_strd,
2703
    UWORD8 *pu1_ecd_data,
2704
    WORD32 trans_size,
2705
    WORD32 cbf,
2706
    WORD32 zero_cols,
2707
    WORD32 zero_rows,
2708
    CHROMA_PLANE_ID_T e_chroma_plane)
2709
28.5M
{
2710
28.5M
    WORD32 trans_idx;
2711
2712
28.5M
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
2713
2714
    /* since 2x2 transform is not allowed for chroma*/
2715
28.5M
    if(2 == trans_size)
2716
0
    {
2717
0
        trans_size = 4;
2718
0
    }
2719
2720
    /* translate the transform size to index */
2721
28.5M
    trans_idx = trans_size >> 2;
2722
2723
    /* for 16x16 cases */
2724
28.5M
    if(16 == trans_size)
2725
4.56M
    {
2726
4.56M
        trans_idx = 3;
2727
4.56M
    }
2728
2729
28.5M
    if(0 == cbf)
2730
25.4M
    {
2731
        /* buffer copy */
2732
25.4M
        ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
2733
25.4M
            pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
2734
25.4M
    }
2735
3.15M
    else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2736
275k
    {
2737
        /* dc add */
2738
275k
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2739
275k
            pu1_pred,
2740
275k
            pred_strd,
2741
275k
            pu1_recon,
2742
275k
            recon_strd,
2743
275k
            trans_size,
2744
275k
            pi2_deq_data[0],
2745
275k
            e_chroma_plane /* chroma plane */
2746
275k
        );
2747
275k
    }
2748
2.87M
    else
2749
2.87M
    {
2750
2.87M
        ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
2751
2.87M
            pi2_deq_data,
2752
2.87M
            &ps_ctxt->ai2_scratch[0],
2753
2.87M
            pu1_pred + (WORD32)e_chroma_plane,
2754
2.87M
            pu1_recon + (WORD32)e_chroma_plane,
2755
2.87M
            deq_dat_strd,
2756
2.87M
            pred_strd,
2757
2.87M
            recon_strd,
2758
2.87M
            zero_cols,
2759
2.87M
            zero_rows);
2760
2.87M
    }
2761
28.5M
}
2762
2763
/**
2764
*******************************************************************************
2765
* \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
2766
*
2767
* \brief * Filters the RDOPT candidates based on mpm_idx
2768
*
2769
* \par   Description
2770
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
2771
* for a CU
2772
*
2773
* \param[in] ps_ctxt : ptr to enc loop context
2774
* \param[in] ps_cu_analyse : ptr to CU analyse structure
2775
* \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
2776
* \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
2777
* \param[in] pu1_luma_mode luma mode
2778
*
2779
* \returns none
2780
*
2781
* \author
2782
*  Ittiam
2783
*
2784
*******************************************************************************
2785
*/
2786
2787
void ihevce_mpm_idx_based_filter_RDOPT_cand(
2788
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2789
    cu_analyse_t *ps_cu_analyse,
2790
    nbr_4x4_t *ps_left_nbr_4x4,
2791
    nbr_4x4_t *ps_top_nbr_4x4,
2792
    UWORD8 *pu1_luma_mode,
2793
    UWORD8 *pu1_eval_mark)
2794
389k
{
2795
389k
    WORD32 cu_pos_x;
2796
389k
    WORD32 cu_pos_y;
2797
389k
    nbr_avail_flags_t s_nbr;
2798
389k
    WORD32 trans_size;
2799
389k
    WORD32 au4_cand_mode_list[3];
2800
389k
    WORD32 nbr_flags;
2801
389k
    UWORD8 *pu1_intra_luma_modes;
2802
389k
    WORD32 rdopt_cand_ctr = 0;
2803
389k
    UWORD8 *pu1_luma_eval_mark;
2804
2805
389k
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
2806
389k
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
2807
389k
    trans_size = ps_cu_analyse->u1_cu_size;
2808
2809
    /* get the neighbour availability flags */
2810
389k
    nbr_flags = ihevce_get_nbr_intra(
2811
389k
        &s_nbr,
2812
389k
        ps_ctxt->pu1_ctb_nbr_map,
2813
389k
        ps_ctxt->i4_nbr_map_strd,
2814
389k
        cu_pos_x,
2815
389k
        cu_pos_y,
2816
389k
        trans_size >> 2);
2817
389k
    (void)nbr_flags;
2818
    /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
2819
    *TU=CU/2 also since the modes are same in both the cases.
2820
    */
2821
389k
    ihevce_populate_intra_pred_mode(
2822
389k
        ps_top_nbr_4x4->b6_luma_intra_mode,
2823
389k
        ps_left_nbr_4x4->b6_luma_intra_mode,
2824
389k
        s_nbr.u1_top_avail,
2825
389k
        s_nbr.u1_left_avail,
2826
389k
        cu_pos_y,
2827
389k
        &au4_cand_mode_list[0]);
2828
2829
    /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
2830
    *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
2831
    */
2832
2833
389k
    pu1_intra_luma_modes = pu1_luma_mode;
2834
389k
    pu1_luma_eval_mark = pu1_eval_mark;
2835
2836
1.35M
    while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
2837
966k
    {
2838
966k
        WORD32 i;
2839
966k
        WORD32 found_flag = 0;
2840
2841
        /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
2842
        *irrespective of whether the cand is present in the mpm idx list or not
2843
        */
2844
966k
        if(rdopt_cand_ctr == 0)
2845
341k
        {
2846
341k
            rdopt_cand_ctr++;
2847
341k
            continue;
2848
341k
        }
2849
2850
1.88M
        for(i = 0; i < 3; i++)
2851
1.56M
        {
2852
1.56M
            if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
2853
311k
            {
2854
311k
                found_flag = 1;
2855
311k
                break;
2856
311k
            }
2857
1.56M
        }
2858
2859
624k
        if(found_flag == 0)
2860
313k
        {
2861
313k
            pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
2862
313k
        }
2863
2864
624k
        rdopt_cand_ctr++;
2865
624k
    }
2866
389k
}
2867
2868
/*!
2869
******************************************************************************
2870
* \if Function name : ihevce_intra_rdopt_cu_ntu \endif
2871
*
2872
* \brief
2873
*    Intra Coding unit funtion for RD opt mode
2874
*
2875
* \param[in] ps_ctxt    enc_loop module ctxt pointer
2876
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
2877
* \param[in] pu1_luma_mode : pointer to luma mode
2878
* \param[in] ps_cu_analyse  pointer to cu analyse pointer
2879
* \param[in] pu1_src    pointer to source data buffer
2880
* \param[in] src_strd   source buffer stride
2881
* \param[in] pu1_cu_left pointer to left recon data buffer
2882
* \param[in] pu1_cu_top  pointer to top recon data buffer
2883
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
2884
* \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
2885
* \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
2886
* \param[in] nbr_4x4_left_strd left nbr4x4 stride
2887
* \param[in] cu_left_stride left recon buffer stride
2888
* \param[in] curr_buf_idx RD opt buffer index for current usage
2889
* \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
2890
*
2891
* \return
2892
*    RDopt cost
2893
*
2894
* \author
2895
*  Ittiam
2896
*
2897
*****************************************************************************
2898
*/
2899
LWORD64 ihevce_intra_rdopt_cu_ntu(
2900
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2901
    enc_loop_cu_prms_t *ps_cu_prms,
2902
    void *pv_pred_org,
2903
    WORD32 pred_strd_org,
2904
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
2905
    UWORD8 *pu1_luma_mode,
2906
    cu_analyse_t *ps_cu_analyse,
2907
    void *pv_curr_src,
2908
    void *pv_cu_left,
2909
    void *pv_cu_top,
2910
    void *pv_cu_top_left,
2911
    nbr_4x4_t *ps_left_nbr_4x4,
2912
    nbr_4x4_t *ps_top_nbr_4x4,
2913
    WORD32 nbr_4x4_left_strd,
2914
    WORD32 cu_left_stride,
2915
    WORD32 curr_buf_idx,
2916
    WORD32 func_proc_mode,
2917
    WORD32 i4_alpha_stim_multiplier)
2918
9.69M
{
2919
9.69M
    enc_loop_cu_final_prms_t *ps_final_prms;
2920
9.69M
    nbr_avail_flags_t s_nbr;
2921
9.69M
    nbr_4x4_t *ps_nbr_4x4;
2922
9.69M
    nbr_4x4_t *ps_tmp_lt_4x4;
2923
9.69M
    recon_datastore_t *ps_recon_datastore;
2924
2925
9.69M
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
2926
2927
9.69M
    UWORD32 *pu4_nbr_flags;
2928
9.69M
    UWORD8 *pu1_intra_pred_mode;
2929
9.69M
    WORD32 cu_pos_x;
2930
9.69M
    WORD32 cu_pos_y;
2931
9.69M
    WORD32 trans_size = 0;
2932
9.69M
    UWORD8 *pu1_left;
2933
9.69M
    UWORD8 *pu1_top;
2934
9.69M
    UWORD8 *pu1_top_left;
2935
9.69M
    UWORD8 *pu1_recon;
2936
9.69M
    UWORD8 *pu1_csbf_buf;
2937
9.69M
    UWORD8 *pu1_ecd_data;
2938
9.69M
    WORD16 *pi2_deq_data;
2939
9.69M
    WORD32 deq_data_strd;
2940
9.69M
    LWORD64 total_rdopt_cost;
2941
9.69M
    WORD32 ctr;
2942
9.69M
    WORD32 left_strd;
2943
9.69M
    WORD32 i4_recon_stride;
2944
9.69M
    WORD32 csbf_strd;
2945
9.69M
    WORD32 ecd_data_bytes_cons;
2946
9.69M
    WORD32 num_4x4_in_tu;
2947
9.69M
    WORD32 num_4x4_in_cu;
2948
9.69M
    WORD32 chrm_present_flag;
2949
9.69M
    WORD32 tx_size;
2950
9.69M
    WORD32 cu_bits;
2951
9.69M
    WORD32 num_cu_parts = 0;
2952
9.69M
    WORD32 num_cands = 0;
2953
9.69M
    WORD32 cu_pos_x_8pelunits;
2954
9.69M
    WORD32 cu_pos_y_8pelunits;
2955
9.69M
    WORD32 i4_perform_rdoq;
2956
9.69M
    WORD32 i4_perform_sbh;
2957
9.69M
    UWORD8 u1_compute_spatial_ssd;
2958
9.69M
    UWORD8 u1_compute_recon;
2959
9.69M
    UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
2960
2961
9.69M
    UWORD16 u2_num_tus_in_cu = 0;
2962
9.69M
    WORD32 is_sub_pu_in_hq = 0;
2963
    /* Get the RDOPT cost of the best CU mode for early_exit */
2964
9.69M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
2965
    /* cabac context of prev intra luma pred flag */
2966
9.69M
    UWORD8 u1_prev_flag_cabac_ctxt =
2967
9.69M
        ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
2968
9.69M
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
2969
2970
9.69M
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
2971
2972
9.69M
    total_rdopt_cost = 0;
2973
9.69M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
2974
9.69M
    ps_recon_datastore = &ps_final_prms->s_recon_datastore;
2975
9.69M
    i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
2976
9.69M
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
2977
9.69M
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
2978
9.69M
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
2979
9.69M
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
2980
9.69M
    deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
2981
9.69M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
2982
9.69M
    ps_tmp_lt_4x4 = ps_left_nbr_4x4;
2983
9.69M
    pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
2984
9.69M
    pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
2985
9.69M
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
2986
9.69M
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
2987
9.69M
    cu_pos_x_8pelunits = cu_pos_x;
2988
9.69M
    cu_pos_y_8pelunits = cu_pos_y;
2989
2990
    /* reset cu not coded cost */
2991
9.69M
    ps_ctxt->i8_cu_not_coded_cost = 0;
2992
2993
    /* based on the Processng mode */
2994
9.69M
    if(TU_EQ_CU == func_proc_mode)
2995
6.55M
    {
2996
6.55M
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
2997
6.55M
        trans_size = ps_cu_analyse->u1_cu_size;
2998
6.55M
        num_cu_parts = 1;
2999
6.55M
        num_cands = 1;
3000
6.55M
        u2_num_tus_in_cu = 1;
3001
6.55M
    }
3002
3.14M
    else if(TU_EQ_CU_DIV2 == func_proc_mode)
3003
2.61M
    {
3004
2.61M
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
3005
2.61M
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3006
2.61M
        num_cu_parts = 4;
3007
2.61M
        num_cands = 1;
3008
2.61M
        u2_num_tus_in_cu = 4;
3009
2.61M
    }
3010
530k
    else if(TU_EQ_SUBCU == func_proc_mode)
3011
530k
    {
3012
530k
        ps_final_prms->u1_part_mode = SIZE_NxN;
3013
530k
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3014
530k
        num_cu_parts = 4;
3015
        /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
3016
530k
        if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
3017
324k
        {
3018
324k
            if(ps_ctxt->i1_slice_type != BSLICE)
3019
295k
            {
3020
295k
                num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
3021
295k
            }
3022
28.8k
            else
3023
28.8k
            {
3024
28.8k
                num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
3025
28.8k
            }
3026
324k
        }
3027
206k
        else
3028
206k
        {
3029
206k
            num_cands = MAX_INTRA_CU_CANDIDATES;
3030
206k
        }
3031
530k
        u2_num_tus_in_cu = 4;
3032
530k
    }
3033
0
    else
3034
0
    {
3035
        /* should not enter here */
3036
0
        ASSERT(0);
3037
0
    }
3038
3039
9.69M
    if(ps_ctxt->i1_cu_qp_delta_enable)
3040
3.85M
    {
3041
3.85M
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1);
3042
3.85M
    }
3043
3044
9.69M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
3045
0
    {
3046
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
3047
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
3048
0
             100.0f);
3049
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
3050
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
3051
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
3052
0
    }
3053
3054
9.69M
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
3055
9.69M
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3056
9.69M
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3057
3058
9.69M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
3059
0
    {
3060
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
3061
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3062
0
    }
3063
3064
    /* populate the neigbours */
3065
9.69M
    pu1_left = (UWORD8 *)pv_cu_left;
3066
9.69M
    pu1_top = (UWORD8 *)pv_cu_top;
3067
9.69M
    pu1_top_left = (UWORD8 *)pv_cu_top_left;
3068
9.69M
    left_strd = cu_left_stride;
3069
9.69M
    num_4x4_in_tu = (trans_size >> 2);
3070
9.69M
    num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
3071
9.69M
    chrm_present_flag = 1;
3072
9.69M
    ecd_data_bytes_cons = 0;
3073
9.69M
    cu_bits = 0;
3074
3075
    /* get the 4x4 level postion of current cu */
3076
9.69M
    cu_pos_x = cu_pos_x << 1;
3077
9.69M
    cu_pos_y = cu_pos_y << 1;
3078
3079
    /* pouplate cu level params knowing that current is intra */
3080
9.69M
    ps_final_prms->u1_skip_flag = 0;
3081
9.69M
    ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
3082
9.69M
    ps_final_prms->u2_num_pus_in_cu = 1;
3083
    /*init the is_cu_coded flag*/
3084
9.69M
    ps_final_prms->u1_is_cu_coded = 0;
3085
9.69M
    ps_final_prms->u4_cu_sad = 0;
3086
3087
9.69M
    ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
3088
9.69M
    ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
3089
9.69M
    ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
3090
9.69M
    ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
3091
9.69M
    ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
3092
9.69M
    ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
3093
3094
9.69M
    ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
3095
3096
    /*copy qp directly as intra cant be skip*/
3097
9.69M
    ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
3098
9.69M
    ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
3099
9.69M
    ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
3100
9.69M
    ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
3101
9.69M
    ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
3102
9.69M
    ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
3103
9.69M
    ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
3104
9.69M
    ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
3105
9.69M
    ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
3106
3107
    /* RDOPT copy States :  TU init (best until prev TU) to current */
3108
9.69M
    memcpy(
3109
9.69M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3110
9.69M
             .s_cabac_ctxt.au1_ctxt_models[0],
3111
9.69M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3112
9.69M
        IHEVC_CAB_COEFFX_PREFIX);
3113
3114
    /* RDOPT copy States :update to init state if 0 cbf */
3115
9.69M
    memcpy(
3116
9.69M
        &au1_intra_nxn_rdopt_ctxt_models[0][0],
3117
9.69M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3118
9.69M
        IHEVC_CAB_COEFFX_PREFIX);
3119
9.69M
    memcpy(
3120
9.69M
        &au1_intra_nxn_rdopt_ctxt_models[1][0],
3121
9.69M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3122
9.69M
        IHEVC_CAB_COEFFX_PREFIX);
3123
3124
    /* loop for all partitions in CU  blocks */
3125
27.5M
    for(ctr = 0; ctr < num_cu_parts; ctr++)
3126
18.7M
    {
3127
18.7M
        UWORD8 *pu1_curr_mode;
3128
18.7M
        WORD32 cand_ctr;
3129
18.7M
        WORD32 nbr_flags;
3130
3131
        /* for NxN case to track the best mode       */
3132
        /* for other cases zeroth index will be used */
3133
18.7M
        intra_prev_rem_flags_t as_intra_prev_rem[2];
3134
18.7M
        LWORD64 ai8_cand_rdopt_cost[2];
3135
18.7M
        UWORD32 au4_tu_sad[2];
3136
18.7M
        WORD32 ai4_tu_bits[2];
3137
18.7M
        WORD32 ai4_cbf[2];
3138
18.7M
        WORD32 ai4_curr_bytes[2];
3139
18.7M
        WORD32 ai4_zero_col[2];
3140
18.7M
        WORD32 ai4_zero_row[2];
3141
        /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
3142
        cand. are there) ping-pong buffer to store the best and current */
3143
18.7M
        UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
3144
18.7M
        UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
3145
18.7M
        WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
3146
        /* Context models stored for RDopt store and restore purpose */
3147
3148
18.7M
        UWORD8 au1_recon_availability[2];
3149
3150
18.7M
        WORD32 best_cand_idx = 0;
3151
18.7M
        LWORD64 best_cand_cost = MAX_COST_64;
3152
        /* counters to toggle b/w best and current */
3153
18.7M
        WORD32 best_intra_buf_idx = 1;
3154
18.7M
        WORD32 curr_intra_buf_idx = 0;
3155
3156
        /* copy the mode pointer to be used in inner loop */
3157
18.7M
        pu1_curr_mode = pu1_luma_mode;
3158
3159
        /* get the neighbour availability flags */
3160
18.7M
        nbr_flags = ihevce_get_nbr_intra(
3161
18.7M
            &s_nbr,
3162
18.7M
            ps_ctxt->pu1_ctb_nbr_map,
3163
18.7M
            ps_ctxt->i4_nbr_map_strd,
3164
18.7M
            cu_pos_x,
3165
18.7M
            cu_pos_y,
3166
18.7M
            num_4x4_in_tu);
3167
3168
        /* copy the nbr flags for chroma reuse */
3169
18.7M
        if(4 != trans_size)
3170
14.4M
        {
3171
14.4M
            *pu4_nbr_flags = nbr_flags;
3172
14.4M
        }
3173
4.33M
        else if(1 == chrm_present_flag)
3174
1.10M
        {
3175
            /* compute the avail flags assuming luma trans is 8x8 */
3176
            /* get the neighbour availability flags */
3177
1.10M
            *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
3178
1.10M
                ps_ctxt->pu1_ctb_nbr_map,
3179
1.10M
                ps_ctxt->i4_nbr_map_strd,
3180
1.10M
                cu_pos_x,
3181
1.10M
                cu_pos_y,
3182
1.10M
                (num_4x4_in_tu << 1),
3183
1.10M
                (num_4x4_in_tu << 1));
3184
1.10M
        }
3185
3186
18.7M
        u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
3187
3188
18.7M
        if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
3189
6.52M
        {
3190
6.52M
            ps_recon_datastore->u1_is_lumaRecon_available = 1;
3191
6.52M
        }
3192
12.2M
        else if(!ctr)
3193
3.16M
        {
3194
3.16M
            ps_recon_datastore->u1_is_lumaRecon_available = 0;
3195
3.16M
        }
3196
3197
18.7M
        ihevc_intra_pred_luma_ref_substitution_fptr =
3198
18.7M
            ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3199
3200
        /* call reference array substitution */
3201
18.7M
        ihevc_intra_pred_luma_ref_substitution_fptr(
3202
18.7M
            pu1_top_left,
3203
18.7M
            pu1_top,
3204
18.7M
            pu1_left,
3205
18.7M
            left_strd,
3206
18.7M
            trans_size,
3207
18.7M
            nbr_flags,
3208
18.7M
            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3209
18.7M
            1);
3210
3211
        /* Intra Mode gating based on MPM cand list and encoder quality preset */
3212
18.7M
        if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
3213
18.7M
           (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
3214
154k
        {
3215
154k
            ihevce_mpm_idx_based_filter_RDOPT_cand(
3216
154k
                ps_ctxt,
3217
154k
                ps_cu_analyse,
3218
154k
                ps_left_nbr_4x4,
3219
154k
                ps_top_nbr_4x4,
3220
154k
                pu1_luma_mode,
3221
154k
                &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
3222
154k
        }
3223
3224
18.7M
        if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3225
18.7M
           (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
3226
1.28M
        {
3227
1.28M
            WORD32 ai4_mpm_mode_list[3];
3228
1.28M
            WORD32 i;
3229
3230
1.28M
            WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
3231
3232
1.28M
            ihevce_populate_intra_pred_mode(
3233
1.28M
                ps_top_nbr_4x4->b6_luma_intra_mode,
3234
1.28M
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3235
1.28M
                s_nbr.u1_top_avail,
3236
1.28M
                s_nbr.u1_left_avail,
3237
1.28M
                cu_pos_y,
3238
1.28M
                &ai4_mpm_mode_list[0]);
3239
3240
5.12M
            for(i = 0; i < 3; i++)
3241
3.84M
            {
3242
3.84M
                if(ps_cu_analyse->s_cu_intra_cand
3243
3.84M
                       .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
3244
685k
                {
3245
685k
                    ASSERT(ai4_mpm_mode_list[i] < 35);
3246
3247
685k
                    ps_cu_analyse->s_cu_intra_cand
3248
685k
                        .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
3249
685k
                    pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
3250
685k
                    ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
3251
685k
                    i4_curr_index++;
3252
685k
                }
3253
3.84M
            }
3254
3255
1.28M
            pu1_luma_mode[i4_curr_index] = 255;
3256
1.28M
        }
3257
3258
        /* loop over candidates for each partition */
3259
45.9M
        for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
3260
28.7M
        {
3261
28.7M
            WORD32 curr_pred_mode;
3262
28.7M
            WORD32 bits = 0;
3263
28.7M
            LWORD64 curr_cost;
3264
28.7M
            WORD32 luma_pred_func_idx;
3265
28.7M
            UWORD8 *pu1_curr_ecd_data;
3266
28.7M
            WORD16 *pi2_curr_deq_data;
3267
28.7M
            WORD32 curr_deq_data_strd;
3268
28.7M
            WORD32 pred_strd;
3269
28.7M
            UWORD8 *pu1_pred;
3270
3271
            /* if NXN case the recon and ecd data is stored in temp buffers */
3272
28.7M
            if(TU_EQ_SUBCU == func_proc_mode)
3273
12.0M
            {
3274
12.0M
                pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
3275
12.0M
                pred_strd = trans_size;
3276
12.0M
                pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
3277
12.0M
                pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
3278
12.0M
                curr_deq_data_strd = trans_size;
3279
3280
12.0M
                ASSERT(trans_size == MIN_TU_SIZE);
3281
12.0M
            }
3282
16.6M
            else
3283
16.6M
            {
3284
16.6M
                pu1_pred = (UWORD8 *)pv_pred_org;
3285
16.6M
                pred_strd = pred_strd_org;
3286
16.6M
                pu1_curr_ecd_data = pu1_ecd_data;
3287
16.6M
                pi2_curr_deq_data = pi2_deq_data;
3288
16.6M
                curr_deq_data_strd = deq_data_strd;
3289
16.6M
            }
3290
3291
28.7M
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
3292
28.7M
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3293
3294
28.7M
            if(is_sub_pu_in_hq == 1)
3295
0
            {
3296
0
                curr_pred_mode = cand_ctr;
3297
0
            }
3298
28.7M
            else
3299
28.7M
            {
3300
28.7M
                curr_pred_mode = pu1_curr_mode[cand_ctr];
3301
28.7M
            }
3302
3303
            /* If the candidate mode is 255, then break */
3304
28.7M
            if(255 == curr_pred_mode)
3305
1.50M
            {
3306
1.50M
                break;
3307
1.50M
            }
3308
27.2M
            else if(250 == curr_pred_mode)
3309
0
            {
3310
0
                continue;
3311
0
            }
3312
3313
            /* check if this mode needs to be evaluated or not. For 2nx2n cases, this   */
3314
            /* function will be called once per candidate, so this check has been done  */
3315
            /* outside this function call. For NxN case, this function will be called   */
3316
            /* only once, and all the candidates will be evaluated here.                */
3317
27.2M
            if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
3318
8.41M
            {
3319
8.41M
                if((TU_EQ_SUBCU == func_proc_mode) &&
3320
8.41M
                   (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
3321
167k
                {
3322
167k
                    continue;
3323
167k
                }
3324
8.41M
            }
3325
3326
            /* call reference filtering */
3327
27.0M
            ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
3328
27.0M
                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3329
27.0M
                trans_size,
3330
27.0M
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3331
27.0M
                curr_pred_mode,
3332
27.0M
                ps_ctxt->i1_strong_intra_smoothing_enable_flag);
3333
3334
            /* use the look up to get the function idx */
3335
27.0M
            luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
3336
3337
            /* call the intra prediction function */
3338
27.0M
            ps_ctxt->apf_lum_ip[luma_pred_func_idx](
3339
27.0M
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3340
27.0M
                1,
3341
27.0M
                pu1_pred,
3342
27.0M
                pred_strd,
3343
27.0M
                trans_size,
3344
27.0M
                curr_pred_mode);
3345
3346
            /* populate the coeffs scan idx */
3347
27.0M
            ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
3348
3349
            /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
3350
27.0M
            if(trans_size < 16)
3351
19.6M
            {
3352
                /* for modes from 22 upto 30 horizontal scan is used */
3353
19.6M
                if((curr_pred_mode > 21) && (curr_pred_mode < 31))
3354
5.48M
                {
3355
5.48M
                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
3356
5.48M
                }
3357
                /* for modes from 6 upto 14 horizontal scan is used */
3358
14.2M
                else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
3359
3.91M
                {
3360
3.91M
                    ps_ctxt->i4_scan_idx = SCAN_VERT;
3361
3.91M
                }
3362
19.6M
            }
3363
3364
            /* RDOPT copy States :  TU init (best until prev TU) to current */
3365
27.0M
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3366
27.0M
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3367
27.0M
                        .s_cabac_ctxt.au1_ctxt_models[0] +
3368
27.0M
                    IHEVC_CAB_COEFFX_PREFIX,
3369
27.0M
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3370
27.0M
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3371
3372
27.0M
            i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
3373
27.0M
            i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
3374
3375
#if DISABLE_RDOQ_INTRA
3376
            i4_perform_rdoq = 0;
3377
#endif
3378
3379
            /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
3380
            /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
3381
            /* Currently the complete array will contain only single value*/
3382
            /*The rounding factor is calculated with the formula
3383
            Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
3384
            rounding factor = (1 - DeadZone Val)
3385
3386
            Assumption: Cabac states of All the sub-blocks in the TU are considered independent
3387
            */
3388
27.0M
            if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
3389
18.8M
            {
3390
18.8M
                if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
3391
0
                {
3392
0
                    double i4_lamda_modifier;
3393
3394
0
                    if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
3395
0
                    {
3396
0
                        i4_lamda_modifier =
3397
0
                            ps_ctxt->i4_lamda_modifier *
3398
0
                            CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3399
0
                    }
3400
0
                    else
3401
0
                    {
3402
0
                        i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
3403
0
                    }
3404
0
                    if(ps_ctxt->i4_use_const_lamda_modifier)
3405
0
                    {
3406
0
                        if(ISLICE == ps_ctxt->i1_slice_type)
3407
0
                        {
3408
0
                            i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3409
0
                        }
3410
0
                        else
3411
0
                        {
3412
0
                            i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
3413
0
                        }
3414
0
                    }
3415
3416
0
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3417
0
                        &ps_ctxt->i4_quant_round_tu[0][0];
3418
0
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3419
0
                        &ps_ctxt->i4_quant_round_tu[1][0];
3420
3421
0
                    memset(
3422
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3423
0
                        0,
3424
0
                        trans_size * trans_size * sizeof(WORD32));
3425
0
                    memset(
3426
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3427
0
                        0,
3428
0
                        trans_size * trans_size * sizeof(WORD32));
3429
3430
0
                    ihevce_quant_rounding_factor_gen(
3431
0
                        trans_size,
3432
0
                        1,
3433
0
                        &ps_ctxt->s_rdopt_entropy_ctxt,
3434
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3435
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3436
0
                        i4_lamda_modifier,
3437
0
                        1);
3438
0
                }
3439
18.8M
                else
3440
18.8M
                {
3441
18.8M
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3442
18.8M
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
3443
18.8M
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3444
18.8M
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
3445
18.8M
                }
3446
18.8M
            }
3447
3448
            /* call T Q IT IQ and recon function */
3449
27.0M
            ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
3450
27.0M
                ps_ctxt,
3451
27.0M
                pu1_pred,
3452
27.0M
                pred_strd,
3453
27.0M
                (UWORD8 *)pv_curr_src,
3454
27.0M
                src_strd,
3455
27.0M
                pi2_curr_deq_data,
3456
27.0M
                curr_deq_data_strd,
3457
27.0M
                pu1_recon,
3458
27.0M
                i4_recon_stride,
3459
27.0M
                pu1_curr_ecd_data,
3460
27.0M
                pu1_csbf_buf,
3461
27.0M
                csbf_strd,
3462
27.0M
                trans_size,
3463
27.0M
                PRED_MODE_INTRA,
3464
27.0M
                &ai8_cand_rdopt_cost[curr_intra_buf_idx],
3465
27.0M
                &ai4_curr_bytes[curr_intra_buf_idx],
3466
27.0M
                &ai4_tu_bits[curr_intra_buf_idx],
3467
27.0M
                &au4_tu_sad[curr_intra_buf_idx],
3468
27.0M
                &ai4_zero_col[curr_intra_buf_idx],
3469
27.0M
                &ai4_zero_row[curr_intra_buf_idx],
3470
27.0M
                &au1_recon_availability[curr_intra_buf_idx],
3471
27.0M
                i4_perform_rdoq,
3472
27.0M
                i4_perform_sbh,
3473
27.0M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3474
27.0M
                i4_alpha_stim_multiplier,
3475
27.0M
                u1_is_cu_noisy,
3476
27.0M
#endif
3477
27.0M
                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
3478
27.0M
                1 /*early_cbf */
3479
27.0M
            );
3480
3481
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3482
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
3483
            {
3484
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
3485
                ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3486
                    pv_curr_src,
3487
                    src_strd,
3488
                    pu1_pred,
3489
                    pred_strd,
3490
                    ai8_cand_rdopt_cost[curr_intra_buf_idx],
3491
                    i4_alpha_stim_multiplier,
3492
                    trans_size,
3493
                    0,
3494
                    ps_ctxt->u1_enable_psyRDOPT,
3495
                    NULL_PLANE);
3496
#else
3497
                if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
3498
                {
3499
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3500
                        pv_curr_src,
3501
                        src_strd,
3502
                        pu1_recon,
3503
                        i4_recon_stride,
3504
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3505
                        i4_alpha_stim_multiplier,
3506
                        trans_size,
3507
                        0,
3508
                        ps_ctxt->u1_enable_psyRDOPT,
3509
                        NULL_PLANE);
3510
                }
3511
                else
3512
                {
3513
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3514
                        pv_curr_src,
3515
                        src_strd,
3516
                        pu1_pred,
3517
                        pred_strd,
3518
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3519
                        i4_alpha_stim_multiplier,
3520
                        trans_size,
3521
                        0,
3522
                        ps_ctxt->u1_enable_psyRDOPT,
3523
                        NULL_PLANE);
3524
                }
3525
#endif
3526
            }
3527
#endif
3528
3529
27.0M
            if(TU_EQ_SUBCU == func_proc_mode)
3530
10.4M
            {
3531
10.4M
                ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
3532
10.4M
            }
3533
3534
            /* based on CBF/No CBF copy the corresponding state */
3535
27.0M
            if(0 == ai4_cbf[curr_intra_buf_idx])
3536
18.6M
            {
3537
                /* RDOPT copy States :update to init state if 0 cbf */
3538
18.6M
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3539
18.6M
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3540
18.6M
                        IHEVC_CAB_COEFFX_PREFIX,
3541
18.6M
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3542
18.6M
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3543
18.6M
            }
3544
8.41M
            else
3545
8.41M
            {
3546
                /* RDOPT copy States :update to new state only if CBF is non zero */
3547
8.41M
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3548
8.41M
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3549
8.41M
                        IHEVC_CAB_COEFFX_PREFIX,
3550
8.41M
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3551
8.41M
                            .s_cabac_ctxt.au1_ctxt_models[0] +
3552
8.41M
                        IHEVC_CAB_COEFFX_PREFIX,
3553
8.41M
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3554
8.41M
            }
3555
3556
            /* call the function which perform intra mode prediction */
3557
27.0M
            ihevce_intra_pred_mode_signaling(
3558
27.0M
                ps_top_nbr_4x4->b6_luma_intra_mode,
3559
27.0M
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3560
27.0M
                s_nbr.u1_top_avail,
3561
27.0M
                s_nbr.u1_left_avail,
3562
27.0M
                cu_pos_y,
3563
27.0M
                curr_pred_mode,
3564
27.0M
                &as_intra_prev_rem[curr_intra_buf_idx]);
3565
            /******************************************************************/
3566
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3567
            The bits for these are evaluated for every RDO mode of current subcu
3568
            as they can significantly contribute to RDO cost.  Note that these
3569
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3570
            are accounted for in encode_cu call later */
3571
3572
            /******************************************************************/
3573
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3574
            The bits for these are evaluated for every RDO mode of current subcu
3575
            as they can significantly contribute to RDO cost.  Note that these
3576
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3577
            are accounted for in encode_cu call later */
3578
3579
            /* Estimate bits to encode prev rem flag  for NXN mode */
3580
27.0M
            {
3581
27.0M
                WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
3582
27.0M
                    [u1_prev_flag_cabac_ctxt ^
3583
27.0M
                     as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3584
3585
                /* rounding the fractional bits to nearest integer */
3586
27.0M
                bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
3587
27.0M
            }
3588
3589
            /* based on prev flag all the mpmidx bits and rem bits */
3590
27.0M
            if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
3591
20.1M
            {
3592
                /* mpm_idx */
3593
20.1M
                bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
3594
20.1M
            }
3595
6.86M
            else
3596
6.86M
            {
3597
                /* rem intra mode */
3598
6.86M
                bits += 5;
3599
6.86M
            }
3600
3601
27.0M
            bits += ai4_tu_bits[curr_intra_buf_idx];
3602
3603
            /* compute the total cost for current candidate */
3604
27.0M
            curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
3605
3606
            /* get the final ssd cost */
3607
27.0M
            curr_cost +=
3608
27.0M
                COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3609
3610
            /* check of the best candidate cost */
3611
27.0M
            if(curr_cost < best_cand_cost)
3612
19.9M
            {
3613
19.9M
                best_cand_cost = curr_cost;
3614
19.9M
                best_cand_idx = cand_ctr;
3615
19.9M
                best_intra_buf_idx = curr_intra_buf_idx;
3616
19.9M
                curr_intra_buf_idx = !curr_intra_buf_idx;
3617
19.9M
            }
3618
27.0M
        }
3619
3620
        /***************    For TU_EQ_SUBCU case    *****************/
3621
        /* Copy the pred for best cand. to the final pred array     */
3622
        /* Copy the iq-coeff for best cand. to the final array      */
3623
        /* copy the best coeffs data to final buffer                */
3624
18.7M
        if(TU_EQ_SUBCU == func_proc_mode)
3625
2.09M
        {
3626
            /* Copy the pred for best cand. to the final pred array */
3627
3628
2.09M
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3629
2.09M
                (UWORD8 *)pv_pred_org,
3630
2.09M
                pred_strd_org,
3631
2.09M
                &au1_cur_pred_data[best_intra_buf_idx][0],
3632
2.09M
                trans_size,
3633
2.09M
                trans_size,
3634
2.09M
                trans_size);
3635
3636
            /* Copy the deq-coeff for best cand. to the final array */
3637
3638
2.09M
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3639
2.09M
                (UWORD8 *)pi2_deq_data,
3640
2.09M
                deq_data_strd << 1,
3641
2.09M
                (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
3642
2.09M
                trans_size << 1,
3643
2.09M
                trans_size << 1,
3644
2.09M
                trans_size);
3645
            /* copy the coeffs to final cu ecd bytes buffer */
3646
2.09M
            memcpy(
3647
2.09M
                pu1_ecd_data,
3648
2.09M
                &au1_intra_coeffs[best_intra_buf_idx][0],
3649
2.09M
                ai4_curr_bytes[best_intra_buf_idx]);
3650
3651
2.09M
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
3652
2.09M
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3653
2.09M
        }
3654
3655
        /*----------   Calculate Recon for the best INTRA mode     ---------*/
3656
        /* TU_EQ_CU case : No need for recon, otherwise recon is required   */
3657
        /* Compute recon only for the best mode for TU_EQ_SUBCU case        */
3658
18.7M
        if(u1_compute_recon)
3659
6.07M
        {
3660
6.07M
            ihevce_it_recon_fxn(
3661
6.07M
                ps_ctxt,
3662
6.07M
                pi2_deq_data,
3663
6.07M
                deq_data_strd,
3664
6.07M
                (UWORD8 *)pv_pred_org,
3665
6.07M
                pred_strd_org,
3666
6.07M
                pu1_recon,
3667
6.07M
                i4_recon_stride,
3668
6.07M
                pu1_ecd_data,
3669
6.07M
                trans_size,
3670
6.07M
                PRED_MODE_INTRA,
3671
6.07M
                ai4_cbf[best_intra_buf_idx],
3672
6.07M
                ai4_zero_col[best_intra_buf_idx],
3673
6.07M
                ai4_zero_row[best_intra_buf_idx]);
3674
3675
6.07M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3676
6.07M
        }
3677
12.6M
        else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
3678
7.56M
        {
3679
7.56M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3680
7.56M
        }
3681
5.10M
        else
3682
5.10M
        {
3683
5.10M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
3684
5.10M
        }
3685
3686
        /* RDOPT copy States :update to best modes state */
3687
18.7M
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3688
18.7M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3689
18.7M
            &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
3690
18.7M
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3691
3692
        /* copy the prev,mpm_idx and rem modes from best cand */
3693
18.7M
        ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
3694
3695
        /* update the cabac context of prev intra pred mode flag */
3696
18.7M
        u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
3697
18.7M
            [(u1_prev_flag_cabac_ctxt << 1) |
3698
18.7M
             as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3699
3700
        /* accumulate the TU bits into cu bits */
3701
18.7M
        cu_bits += ai4_tu_bits[best_intra_buf_idx];
3702
3703
        /* copy the intra pred mode for chroma reuse */
3704
18.7M
        if(is_sub_pu_in_hq == 0)
3705
18.7M
        {
3706
18.7M
            *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
3707
18.7M
        }
3708
0
        else
3709
0
        {
3710
0
            *pu1_intra_pred_mode = best_cand_idx;
3711
0
        }
3712
3713
        /* Store luma mode as chroma mode. If chroma prcs happens, and
3714
        if a diff. mode wins, it should update this!! */
3715
18.7M
        if(1 == chrm_present_flag)
3716
15.5M
        {
3717
15.5M
            if(is_sub_pu_in_hq == 0)
3718
15.5M
            {
3719
15.5M
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3720
15.5M
                    ((ps_ctxt->u1_chroma_array_type == 2)
3721
15.5M
                         ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
3722
15.5M
                         : pu1_curr_mode[best_cand_idx]);
3723
15.5M
            }
3724
0
            else
3725
0
            {
3726
0
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3727
0
                    ((ps_ctxt->u1_chroma_array_type == 2)
3728
0
                         ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
3729
0
                         : best_cand_idx);
3730
0
            }
3731
3732
15.5M
            ps_final_prms->u1_chroma_intra_pred_mode = 4;
3733
15.5M
        }
3734
3735
        /*remember the cbf flag to replicate qp for 4x4 neighbour*/
3736
18.7M
        ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
3737
3738
        /*accumulate ssd over all TU of intra CU*/
3739
18.7M
        ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
3740
3741
        /* update the bytes */
3742
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3743
18.7M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
3744
18.7M
            ai4_curr_bytes[best_intra_buf_idx];
3745
        /* update the zero_row and col info for the final mode */
3746
18.7M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
3747
18.7M
            ai4_zero_col[best_intra_buf_idx];
3748
18.7M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
3749
18.7M
            ai4_zero_row[best_intra_buf_idx];
3750
3751
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3752
3753
        /* update the total bytes cons */
3754
18.7M
        ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
3755
18.7M
        pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
3756
3757
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3758
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
3759
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
3760
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
3761
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
3762
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
3763
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
3764
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
3765
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
3766
18.7M
        GETRANGE(tx_size, trans_size);
3767
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
3768
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
3769
18.7M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
3770
3771
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
3772
18.7M
        ps_nbr_4x4->b1_skip_flag = 0;
3773
18.7M
        ps_nbr_4x4->b1_intra_flag = 1;
3774
18.7M
        ps_nbr_4x4->b1_pred_l0_flag = 0;
3775
18.7M
        ps_nbr_4x4->b1_pred_l1_flag = 0;
3776
3777
18.7M
        if(is_sub_pu_in_hq == 0)
3778
18.7M
        {
3779
18.7M
            ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
3780
18.7M
        }
3781
0
        else
3782
0
        {
3783
0
            ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
3784
0
        }
3785
3786
18.7M
        ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3787
3788
        /* since tu size can be less than cusize, replication is done with strd */
3789
18.7M
        {
3790
18.7M
            WORD32 i, j;
3791
18.7M
            nbr_4x4_t *ps_tmp_4x4;
3792
3793
18.7M
            ps_tmp_4x4 = ps_nbr_4x4;
3794
3795
74.6M
            for(i = 0; i < num_4x4_in_tu; i++)
3796
55.9M
            {
3797
302M
                for(j = 0; j < num_4x4_in_tu; j++)
3798
246M
                {
3799
246M
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
3800
246M
                }
3801
                /* row level update*/
3802
55.9M
                ps_tmp_4x4 += num_4x4_in_cu;
3803
55.9M
            }
3804
18.7M
        }
3805
3806
18.7M
        if(TU_EQ_SUBCU == func_proc_mode)
3807
2.09M
        {
3808
2.09M
            pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
3809
2.09M
        }
3810
3811
18.7M
        if((num_cu_parts > 1) && (ctr < 3))
3812
9.22M
        {
3813
            /* set the neighbour map to 1 */
3814
9.22M
            ihevce_set_nbr_map(
3815
9.22M
                ps_ctxt->pu1_ctb_nbr_map,
3816
9.22M
                ps_ctxt->i4_nbr_map_strd,
3817
9.22M
                cu_pos_x,
3818
9.22M
                cu_pos_y,
3819
9.22M
                trans_size >> 2,
3820
9.22M
                1);
3821
3822
            /* block level updates block number (1 & 3 )*/
3823
9.22M
            pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
3824
9.22M
            pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
3825
9.22M
            pi2_deq_data += trans_size;
3826
3827
9.22M
            switch(ctr)
3828
9.22M
            {
3829
3.14M
            case 0:
3830
3.14M
            {
3831
3.14M
                pu1_left = pu1_recon + trans_size - 1;
3832
3.14M
                pu1_top += trans_size;
3833
3.14M
                pu1_top_left = pu1_top - 1;
3834
3.14M
                left_strd = i4_recon_stride;
3835
3836
3.14M
                break;
3837
0
            }
3838
3.06M
            case 1:
3839
3.06M
            {
3840
3.06M
                ASSERT(
3841
3.06M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
3842
3.06M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
3843
3844
                /* Since the 'lumaRefSubstitution' function expects both Top and */
3845
                /* TopRight recon pixels to be present in the same buffer */
3846
3.06M
                if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
3847
3.06M
                   ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
3848
143k
                {
3849
143k
                    UWORD8 *pu1_src =
3850
143k
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3851
143k
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3852
143k
                        trans_size;
3853
143k
                    UWORD8 *pu1_dst =
3854
143k
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3855
143k
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3856
143k
                        trans_size;
3857
3858
143k
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3859
143k
                        pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
3860
3861
143k
                    ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
3862
143k
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
3863
143k
                }
3864
3865
3.06M
                pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
3866
3.06M
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3867
3.06M
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3868
3.06M
                          (trans_size - 1) * i4_recon_stride;
3869
3.06M
                pu1_top_left = pu1_left - cu_left_stride;
3870
3.06M
                left_strd = cu_left_stride;
3871
3872
3.06M
                break;
3873
3.06M
            }
3874
3.02M
            case 2:
3875
3.02M
            {
3876
3.02M
                ASSERT(
3877
3.02M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
3878
3.02M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
3879
3880
3.02M
                pu1_left = pu1_recon + trans_size - 1;
3881
3.02M
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3882
3.02M
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3883
3.02M
                          (trans_size - 1) * i4_recon_stride + trans_size;
3884
3.02M
                pu1_top_left = pu1_top - 1;
3885
3.02M
                left_strd = i4_recon_stride;
3886
3887
3.02M
                break;
3888
3.02M
            }
3889
9.22M
            }
3890
3891
9.22M
            pu1_csbf_buf += num_4x4_in_tu;
3892
9.22M
            cu_pos_x += num_4x4_in_tu;
3893
9.22M
            ps_nbr_4x4 += num_4x4_in_tu;
3894
9.22M
            ps_top_nbr_4x4 += num_4x4_in_tu;
3895
9.22M
            ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
3896
3897
9.22M
            pu1_intra_pred_mode++;
3898
3899
            /* after 2 blocks increment the pointers to bottom blocks */
3900
9.22M
            if(1 == ctr)
3901
3.06M
            {
3902
3.06M
                pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
3903
3.06M
                pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
3904
3905
3.06M
                pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
3906
3.06M
                pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
3907
3.06M
                pi2_deq_data -= (trans_size << 1);
3908
3.06M
                pi2_deq_data += (trans_size * deq_data_strd);
3909
3910
3.06M
                pu1_csbf_buf -= (num_4x4_in_tu << 1);
3911
3.06M
                pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
3912
3913
3.06M
                ps_nbr_4x4 -= (num_4x4_in_tu << 1);
3914
3.06M
                ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
3915
3.06M
                ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
3916
3.06M
                ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
3917
3918
                /* decrement pos x to start */
3919
3.06M
                cu_pos_x -= (num_4x4_in_tu << 1);
3920
3.06M
                cu_pos_y += num_4x4_in_tu;
3921
3.06M
            }
3922
9.22M
        }
3923
3924
18.7M
#if RDOPT_ENABLE
3925
        /* compute the RDOPT cost for the current TU */
3926
18.7M
        ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
3927
18.7M
            ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3928
18.7M
#endif
3929
3930
        /* accumulate the costs */
3931
18.7M
        total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
3932
3933
18.7M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
3934
18.7M
        {
3935
            /* Early exit : If the current running cost exceeds
3936
            the prev. best mode cost, break */
3937
18.7M
            if(total_rdopt_cost > prev_best_rdopt_cost)
3938
912k
            {
3939
912k
                return (total_rdopt_cost);
3940
912k
            }
3941
18.7M
        }
3942
3943
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
3944
17.8M
        chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
3945
3946
17.8M
        pu4_nbr_flags++;
3947
17.8M
    }
3948
    /* Modify the cost function for this CU. */
3949
    /* loop in for 8x8 blocks */
3950
8.78M
    if(ps_ctxt->u1_enable_psyRDOPT)
3951
0
    {
3952
0
        UWORD8 *pu1_recon_cu;
3953
0
        WORD32 recon_stride;
3954
0
        WORD32 curr_pos_x;
3955
0
        WORD32 curr_pos_y;
3956
0
        WORD32 start_index;
3957
0
        WORD32 num_horz_cu_in_ctb;
3958
0
        WORD32 cu_size;
3959
0
        WORD32 had_block_size;
3960
3961
        /* tODO: sreenivasa ctb size has to be used appropriately */
3962
0
        had_block_size = 8;
3963
0
        cu_size = ps_cu_analyse->u1_cu_size; /* todo */
3964
0
        num_horz_cu_in_ctb = 64 / had_block_size;
3965
3966
0
        curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
3967
0
        curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
3968
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
3969
0
        pu1_recon_cu =
3970
0
            ((UWORD8 *)ps_final_prms->s_recon_datastore
3971
0
                 .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
3972
        /* + \  curr_pos_x + curr_pos_y * recon_stride; */
3973
3974
        /* start index to index the source satd of curr cu int he current ctb*/
3975
0
        start_index =
3976
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
3977
3978
0
        {
3979
0
            total_rdopt_cost += ihevce_psy_rd_cost(
3980
0
                ps_ctxt->ai4_source_satd_8x8,
3981
0
                pu1_recon_cu,
3982
0
                recon_stride,
3983
0
                1,  //
3984
0
                cu_size,
3985
0
                0,  // pic type
3986
0
                0,  //layer id
3987
0
                ps_ctxt->i4_satd_lamda,  // lambda
3988
0
                start_index,
3989
0
                ps_ctxt->u1_is_input_data_hbd,
3990
0
                ps_ctxt->u4_psy_strength,
3991
0
                &ps_ctxt->s_cmn_opt_func
3992
3993
0
            );  // 8 bit
3994
0
        }
3995
0
    }
3996
3997
#if !FORCE_INTRA_TU_DEPTH_TO_0  //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
3998
8.78M
    if(TU_EQ_SUBCU == func_proc_mode)
3999
510k
    {
4000
510k
        UWORD8 au1_tu_eq_cu_div2_modes[4];
4001
510k
        UWORD8 au1_freq_of_mode[4];
4002
4003
510k
        WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
4004
510k
            ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
4005
4006
510k
        if(1 == i4_num_clusters)
4007
130k
        {
4008
130k
            ps_final_prms->u2_num_pus_in_cu = 1;
4009
130k
            ps_final_prms->u1_part_mode = SIZE_2Nx2N;
4010
130k
        }
4011
510k
    }
4012
8.78M
#endif
4013
4014
    /* store the num TUs*/
4015
8.78M
    ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
4016
4017
    /* update the bytes consumed */
4018
8.78M
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4019
4020
    /* store the current cu size to final prms */
4021
8.78M
    ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
4022
4023
    /* cu bits will be having luma residual bits till this point    */
4024
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4025
8.78M
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4026
4027
    /* ------------- Chroma processing -------------- */
4028
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4029
8.78M
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4030
7.06M
    {
4031
7.06M
        LWORD64 chrm_rdopt_cost;
4032
7.06M
        WORD32 chrm_rdopt_tu_bits;
4033
4034
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4035
7.06M
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4036
4037
7.06M
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4038
7.06M
            ps_ctxt,
4039
7.06M
            curr_buf_idx,
4040
7.06M
            func_proc_mode,
4041
7.06M
            ps_chrm_cu_buf_prms->pu1_curr_src,
4042
7.06M
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4043
7.06M
            ps_chrm_cu_buf_prms->pu1_cu_left,
4044
7.06M
            ps_chrm_cu_buf_prms->pu1_cu_top,
4045
7.06M
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4046
7.06M
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4047
7.06M
            cu_pos_x_8pelunits,
4048
7.06M
            cu_pos_y_8pelunits,
4049
7.06M
            &chrm_rdopt_tu_bits,
4050
7.06M
            i4_alpha_stim_multiplier,
4051
7.06M
            u1_is_cu_noisy);
4052
4053
7.06M
#if WEIGH_CHROMA_COST
4054
7.06M
        chrm_rdopt_cost = (LWORD64)(
4055
7.06M
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4056
7.06M
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4057
7.06M
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4058
7.06M
#endif
4059
4060
7.06M
#if CHROMA_RDOPT_ENABLE
4061
7.06M
        total_rdopt_cost += chrm_rdopt_cost;
4062
7.06M
#endif
4063
7.06M
        cu_bits += chrm_rdopt_tu_bits;
4064
4065
        /* cu bits for chroma residual if chroma rdopt is on       */
4066
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4067
7.06M
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4068
4069
7.06M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4070
7.06M
        {
4071
            /* Early exit : If the current running cost exceeds
4072
            the prev. best mode cost, break */
4073
7.06M
            if(total_rdopt_cost > prev_best_rdopt_cost)
4074
609k
            {
4075
609k
                return (total_rdopt_cost);
4076
609k
            }
4077
7.06M
        }
4078
7.06M
    }
4079
1.71M
    else
4080
1.71M
    {}
4081
4082
    /* RDOPT copy States :  Best after all luma TUs to current */
4083
8.17M
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4084
8.17M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4085
8.17M
                .s_cabac_ctxt.au1_ctxt_models[0] +
4086
8.17M
            IHEVC_CAB_COEFFX_PREFIX,
4087
8.17M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4088
8.17M
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4089
4090
    /* get the neighbour availability flags for current cu  */
4091
8.17M
    ihevce_get_only_nbr_flag(
4092
8.17M
        &s_nbr,
4093
8.17M
        ps_ctxt->pu1_ctb_nbr_map,
4094
8.17M
        ps_ctxt->i4_nbr_map_strd,
4095
8.17M
        (cu_pos_x_8pelunits << 1),
4096
8.17M
        (cu_pos_y_8pelunits << 1),
4097
8.17M
        (trans_size << 1),
4098
8.17M
        (trans_size << 1));
4099
4100
    /* call the entropy rdo encode to get the bit estimate for current cu */
4101
    /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
4102
8.17M
    {
4103
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4104
8.17M
        WORD32 cbf_bits, header_bits;
4105
4106
8.17M
        header_bits = ihevce_entropy_rdo_encode_cu(
4107
8.17M
            &ps_ctxt->s_rdopt_entropy_ctxt,
4108
8.17M
            ps_final_prms,
4109
8.17M
            cu_pos_x_8pelunits,
4110
8.17M
            cu_pos_y_8pelunits,
4111
8.17M
            ps_cu_analyse->u1_cu_size,
4112
8.17M
            s_nbr.u1_top_avail,
4113
8.17M
            s_nbr.u1_left_avail,
4114
8.17M
            &ps_final_prms->pu1_cu_coeffs[0],
4115
8.17M
            &cbf_bits);
4116
4117
8.17M
        cu_bits += header_bits;
4118
4119
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4120
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4121
8.17M
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4122
8.17M
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4123
4124
8.17M
#if RDOPT_ENABLE
4125
        /* add the cost of coding the cu bits */
4126
8.17M
        total_rdopt_cost +=
4127
8.17M
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4128
8.17M
#endif
4129
8.17M
    }
4130
8.17M
    return (total_rdopt_cost);
4131
8.78M
}
4132
/*!
4133
******************************************************************************
4134
* \if Function name : ihevce_inter_rdopt_cu_ntu \endif
4135
*
4136
* \brief
4137
*    Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
4138
*
4139
* \param[in] ps_ctxt       enc_loop module ctxt pointer
4140
* \param[in] ps_inter_cand pointer to inter candidate structure
4141
* \param[in] pu1_src       pointer to source data buffer
4142
* \param[in] cu_size       Current CU size
4143
* \param[in] cu_pos_x      cu position x w.r.t to ctb
4144
* \param[in] cu_pos_y      cu position y w.r.t to ctb
4145
* \param[in] src_strd      source buffer stride
4146
* \param[in] curr_buf_idx  buffer index for current output storage
4147
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
4148
*
4149
* \return
4150
*    Rdopt cost
4151
*
4152
* \author
4153
*  Ittiam
4154
*
4155
*****************************************************************************
4156
*/
4157
LWORD64 ihevce_inter_rdopt_cu_ntu(
4158
    ihevce_enc_loop_ctxt_t *ps_ctxt,
4159
    enc_loop_cu_prms_t *ps_cu_prms,
4160
    void *pv_src,
4161
    WORD32 cu_size,
4162
    WORD32 cu_pos_x,
4163
    WORD32 cu_pos_y,
4164
    WORD32 curr_buf_idx,
4165
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
4166
    cu_inter_cand_t *ps_inter_cand,
4167
    cu_analyse_t *ps_cu_analyse,
4168
    WORD32 i4_alpha_stim_multiplier)
4169
1.31M
{
4170
1.31M
    enc_loop_cu_final_prms_t *ps_final_prms;
4171
1.31M
    nbr_4x4_t *ps_nbr_4x4;
4172
1.31M
    tu_prms_t s_tu_prms[64 * 4];
4173
1.31M
    tu_prms_t *ps_tu_prms;
4174
4175
1.31M
    WORD32 i4_perform_rdoq;
4176
1.31M
    WORD32 i4_perform_sbh;
4177
1.31M
    WORD32 ai4_tu_split_flags[4];
4178
1.31M
    WORD32 ai4_tu_early_cbf[4];
4179
1.31M
    WORD32 num_split_flags = 1;
4180
1.31M
    WORD32 i;
4181
1.31M
    UWORD8 u1_tu_size;
4182
1.31M
    UWORD8 *pu1_pred;
4183
1.31M
    UWORD8 *pu1_ecd_data;
4184
1.31M
    WORD16 *pi2_deq_data;
4185
1.31M
    UWORD8 *pu1_csbf_buf;
4186
1.31M
    UWORD8 *pu1_tu_sz_sft;
4187
1.31M
    UWORD8 *pu1_tu_posx;
4188
1.31M
    UWORD8 *pu1_tu_posy;
4189
1.31M
    LWORD64 total_rdopt_cost;
4190
1.31M
    WORD32 ctr;
4191
1.31M
    WORD32 chrm_ctr;
4192
1.31M
    WORD32 num_tu_in_cu = 0;
4193
1.31M
    WORD32 pred_stride;
4194
1.31M
    WORD32 recon_stride;
4195
1.31M
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
4196
1.31M
    WORD32 csbf_strd;
4197
1.31M
    WORD32 chrm_present_flag;
4198
1.31M
    WORD32 ecd_data_bytes_cons;
4199
1.31M
    WORD32 num_4x4_in_cu;
4200
1.31M
    WORD32 num_4x4_in_tu;
4201
1.31M
    WORD32 recon_func_mode;
4202
1.31M
    WORD32 cu_bits;
4203
1.31M
    UWORD8 u1_compute_spatial_ssd;
4204
4205
    /* min_trans_size is initialized to some huge number than usual TU sizes */
4206
1.31M
    WORD32 i4_min_trans_size = 256;
4207
    /* Get the RDOPT cost of the best CU mode for early_exit */
4208
1.31M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
4209
1.31M
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
4210
4211
    /* model for no residue syntax qt root cbf flag */
4212
1.31M
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
4213
4214
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4215
1.31M
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
4216
4217
    /* for skip cases tables are not reqquired */
4218
1.31M
    UWORD8 u1_skip_tu_sz_sft = 0;
4219
1.31M
    UWORD8 u1_skip_tu_posx = 0;
4220
1.31M
    UWORD8 u1_skip_tu_posy = 0;
4221
1.31M
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
4222
4223
    /* get the pointers based on curbuf idx */
4224
1.31M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
4225
1.31M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
4226
1.31M
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
4227
1.31M
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
4228
1.31M
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
4229
1.31M
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
4230
4231
1.31M
    pred_stride = ps_inter_cand->i4_pred_data_stride;
4232
1.31M
    recon_stride = cu_size;
4233
1.31M
    pu1_pred = ps_inter_cand->pu1_pred_data;
4234
1.31M
    chrm_ctr = 0;
4235
1.31M
    ecd_data_bytes_cons = 0;
4236
1.31M
    total_rdopt_cost = 0;
4237
1.31M
    num_4x4_in_cu = cu_size >> 2;
4238
1.31M
    recon_func_mode = PRED_MODE_INTER;
4239
1.31M
    cu_bits = 0;
4240
4241
    /* get the 4x4 level postion of current cu */
4242
1.31M
    cu_pos_x = cu_pos_x << 1;
4243
1.31M
    cu_pos_y = cu_pos_y << 1;
4244
4245
    /* default value for cu coded flag */
4246
1.31M
    ps_final_prms->u1_is_cu_coded = 0;
4247
4248
    /*init of ssd of CU accuumulated over all TU*/
4249
1.31M
    ps_final_prms->u4_cu_sad = 0;
4250
4251
    /* populate the coeffs scan idx */
4252
1.31M
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
4253
4254
1.31M
#if ENABLE_INTER_ZCU_COST
4255
    /* reset cu not coded cost */
4256
1.31M
    ps_ctxt->i8_cu_not_coded_cost = 0;
4257
4258
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4259
1.31M
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
4260
1.31M
#endif
4261
4262
1.31M
    if(ps_cu_analyse->u1_cu_size == 64)
4263
39.5k
    {
4264
39.5k
        num_split_flags = 4;
4265
39.5k
        u1_tu_size = 32;
4266
39.5k
    }
4267
1.27M
    else
4268
1.27M
    {
4269
1.27M
        num_split_flags = 1;
4270
1.27M
        u1_tu_size = ps_cu_analyse->u1_cu_size;
4271
1.27M
    }
4272
4273
    /* ckeck for skip mode */
4274
1.31M
    if(1 == ps_final_prms->u1_skip_flag)
4275
449k
    {
4276
449k
        if(64 == cu_size)
4277
13.6k
        {
4278
            /* TU = CU/2 is set but no trnaform is evaluated  */
4279
13.6k
            num_tu_in_cu = 4;
4280
13.6k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4281
13.6k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4282
13.6k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4283
13.6k
        }
4284
435k
        else
4285
435k
        {
4286
            /* TU = CU is set but no trnaform is evaluated  */
4287
435k
            num_tu_in_cu = 1;
4288
435k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4289
435k
            pu1_tu_posx = &u1_skip_tu_posx;
4290
435k
            pu1_tu_posy = &u1_skip_tu_posy;
4291
435k
        }
4292
4293
449k
        recon_func_mode = PRED_MODE_SKIP;
4294
449k
    }
4295
    /* check for PU part mode being AMP or No AMP */
4296
863k
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
4297
771k
    {
4298
771k
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
4299
746k
        {
4300
            /* TU= CU is evaluated 2Nx2N inter case */
4301
746k
            num_tu_in_cu = 1;
4302
746k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4303
746k
            pu1_tu_posx = &u1_skip_tu_posx;
4304
746k
            pu1_tu_posy = &u1_skip_tu_posy;
4305
746k
        }
4306
25.4k
        else
4307
25.4k
        {
4308
            /* currently TU= CU/2 is evaluated for all inter case */
4309
25.4k
            num_tu_in_cu = 4;
4310
25.4k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4311
25.4k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4312
25.4k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4313
25.4k
        }
4314
771k
    }
4315
91.5k
    else
4316
91.5k
    {
4317
        /* for AMP cases one level of TU recurssion is done */
4318
        /* based on oreintation of the partitions           */
4319
91.5k
        num_tu_in_cu = 10;
4320
91.5k
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4321
91.5k
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4322
91.5k
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4323
91.5k
    }
4324
4325
1.31M
    ps_tu_prms = &s_tu_prms[0];
4326
1.31M
    num_tu_in_cu = 0;
4327
4328
2.74M
    for(i = 0; i < num_split_flags; i++)
4329
1.43M
    {
4330
1.43M
        WORD32 i4_x_off = 0, i4_y_off = 0;
4331
4332
1.43M
        if(i == 1 || i == 3)
4333
79.0k
        {
4334
79.0k
            i4_x_off = 32;
4335
79.0k
        }
4336
4337
1.43M
        if(i == 2 || i == 3)
4338
79.0k
        {
4339
79.0k
            i4_y_off = 32;
4340
79.0k
        }
4341
4342
1.43M
        if(1 == ps_final_prms->u1_skip_flag)
4343
490k
        {
4344
490k
            ai4_tu_split_flags[0] = 0;
4345
490k
            ps_inter_cand->ai4_tu_split_flag[i] = 0;
4346
4347
490k
            ai4_tu_early_cbf[0] = 0;
4348
490k
        }
4349
940k
        else
4350
940k
        {
4351
940k
            ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
4352
940k
            ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
4353
940k
        }
4354
4355
1.43M
        ps_tu_prms->u1_tu_size = u1_tu_size;
4356
4357
1.43M
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
4358
1.43M
            ps_tu_prms,
4359
1.43M
            &num_tu_in_cu,
4360
1.43M
            0,
4361
1.43M
            ai4_tu_split_flags[0],
4362
1.43M
            ai4_tu_early_cbf[0],
4363
1.43M
            i4_x_off,
4364
1.43M
            i4_y_off);
4365
1.43M
    }
4366
4367
    /* loop for all tu blocks in current cu */
4368
1.31M
    ps_tu_prms = &s_tu_prms[0];
4369
3.63M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4370
2.32M
    {
4371
2.32M
        trans_size = ps_tu_prms->u1_tu_size;
4372
4373
2.32M
        if(i4_min_trans_size > trans_size)
4374
1.32M
        {
4375
1.32M
            i4_min_trans_size = trans_size;
4376
1.32M
        }
4377
2.32M
        ps_tu_prms++;
4378
2.32M
    }
4379
4380
1.31M
    if(ps_ctxt->i1_cu_qp_delta_enable)
4381
332k
    {
4382
332k
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
4383
332k
    }
4384
4385
1.31M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
4386
0
    {
4387
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
4388
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
4389
0
             100.0f);
4390
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
4391
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
4392
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
4393
0
    }
4394
4395
1.31M
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
4396
1.31M
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
4397
1.31M
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4398
4399
1.31M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
4400
0
    {
4401
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
4402
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4403
0
    }
4404
4405
1.31M
    if(!u1_compute_spatial_ssd)
4406
1.15M
    {
4407
1.15M
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4408
1.15M
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4409
1.15M
    }
4410
155k
    else
4411
155k
    {
4412
155k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
4413
155k
    }
4414
4415
1.31M
    ps_tu_prms = &s_tu_prms[0];
4416
4417
1.31M
    ASSERT(num_tu_in_cu <= 256);
4418
4419
    /* RDOPT copy States :  TU init (best until prev TU) to current */
4420
1.31M
    memcpy(
4421
1.31M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4422
1.31M
             .s_cabac_ctxt.au1_ctxt_models[0],
4423
1.31M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
4424
1.31M
        IHEVC_CAB_COEFFX_PREFIX);
4425
4426
3.50M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4427
2.30M
    {
4428
2.30M
        WORD32 curr_bytes;
4429
2.30M
        WORD32 tx_size;
4430
2.30M
        WORD32 cbf, zero_col, zero_row;
4431
2.30M
        LWORD64 rdopt_cost;
4432
2.30M
        UWORD8 u1_is_recon_available;
4433
4434
2.30M
        WORD32 curr_pos_x;
4435
2.30M
        WORD32 curr_pos_y;
4436
2.30M
        nbr_4x4_t *ps_cur_nbr_4x4;
4437
2.30M
        UWORD8 *pu1_cur_pred;
4438
2.30M
        UWORD8 *pu1_cur_src;
4439
2.30M
        UWORD8 *pu1_cur_recon;
4440
2.30M
        WORD16 *pi2_cur_deq_data;
4441
2.30M
        UWORD32 u4_tu_sad;
4442
2.30M
        WORD32 tu_bits;
4443
4444
2.30M
        WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4445
4446
2.30M
        trans_size = ps_tu_prms->u1_tu_size;
4447
        /* get the current pos x and pos y in pixels */
4448
2.30M
        curr_pos_x = ps_tu_prms->u1_x_off;  //((cu_size >> 2) * pu1_tu_posx[ctr]);
4449
2.30M
        curr_pos_y = ps_tu_prms->u1_y_off;  //((cu_size >> 2) * pu1_tu_posy[ctr]);
4450
4451
2.30M
        num_4x4_in_tu = trans_size >> 2;
4452
4453
#if FORCE_8x8_TFR
4454
        if(cu_size == 64)
4455
        {
4456
            curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
4457
            curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
4458
        }
4459
#endif
4460
4461
        /* increment the pointers to start of current TU  */
4462
2.30M
        pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
4463
2.30M
        pu1_cur_src += (curr_pos_y * src_strd);
4464
2.30M
        pu1_cur_pred = (pu1_pred + curr_pos_x);
4465
2.30M
        pu1_cur_pred += (curr_pos_y * pred_stride);
4466
2.30M
        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
4467
2.30M
        pi2_cur_deq_data += (curr_pos_y * cu_size);
4468
2.30M
        pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
4469
2.30M
                        curr_pos_x + curr_pos_y * i4_recon_stride;
4470
4471
2.30M
        ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
4472
2.30M
        ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
4473
4474
        /* RDOPT copy States :  TU init (best until prev TU) to current */
4475
2.30M
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4476
2.30M
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4477
2.30M
                    .s_cabac_ctxt.au1_ctxt_models[0] +
4478
2.30M
                IHEVC_CAB_COEFFX_PREFIX,
4479
2.30M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4480
2.30M
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4481
4482
2.30M
        i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
4483
2.30M
        i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
4484
4485
        /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
4486
        /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
4487
        /* Currently the complete array will contain only single value*/
4488
        /*The rounding factor is calculated with the formula
4489
        Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
4490
        rounding factor = (1 - DeadZone Val)
4491
4492
        Assumption: Cabac states of All the sub-blocks in the TU are considered independent
4493
        */
4494
2.30M
        if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
4495
0
        {
4496
0
            double i4_lamda_modifier;
4497
4498
0
            if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
4499
0
            {
4500
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
4501
0
                                    CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
4502
0
            }
4503
0
            else
4504
0
            {
4505
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
4506
0
            }
4507
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
4508
0
            {
4509
0
                if(ISLICE == ps_ctxt->i1_slice_type)
4510
0
                {
4511
0
                    i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
4512
0
                }
4513
0
                else
4514
0
                {
4515
0
                    i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
4516
0
                }
4517
0
            }
4518
0
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4519
0
                &ps_ctxt->i4_quant_round_tu[0][0];
4520
0
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4521
0
                &ps_ctxt->i4_quant_round_tu[1][0];
4522
4523
0
            memset(
4524
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4525
0
                0,
4526
0
                trans_size * trans_size * sizeof(WORD32));
4527
0
            memset(
4528
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4529
0
                0,
4530
0
                trans_size * trans_size * sizeof(WORD32));
4531
4532
0
            ihevce_quant_rounding_factor_gen(
4533
0
                trans_size,
4534
0
                1,
4535
0
                &ps_ctxt->s_rdopt_entropy_ctxt,
4536
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4537
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4538
0
                i4_lamda_modifier,
4539
0
                1);
4540
0
        }
4541
2.30M
        else
4542
2.30M
        {
4543
2.30M
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4544
2.30M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
4545
2.30M
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4546
2.30M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
4547
2.30M
        }
4548
4549
        /* call T Q IT IQ and recon function */
4550
2.30M
        cbf = ihevce_t_q_iq_ssd_scan_fxn(
4551
2.30M
            ps_ctxt,
4552
2.30M
            pu1_cur_pred,
4553
2.30M
            pred_stride,
4554
2.30M
            pu1_cur_src,
4555
2.30M
            src_strd,
4556
2.30M
            pi2_cur_deq_data,
4557
2.30M
            cu_size,
4558
2.30M
            pu1_cur_recon,
4559
2.30M
            i4_recon_stride,
4560
2.30M
            pu1_ecd_data,
4561
2.30M
            pu1_csbf_buf,
4562
2.30M
            csbf_strd,
4563
2.30M
            trans_size,
4564
2.30M
            recon_func_mode,
4565
2.30M
            &rdopt_cost,
4566
2.30M
            &curr_bytes,
4567
2.30M
            &tu_bits,
4568
2.30M
            &u4_tu_sad,
4569
2.30M
            &zero_col,
4570
2.30M
            &zero_row,
4571
2.30M
            &u1_is_recon_available,
4572
2.30M
            i4_perform_rdoq,
4573
2.30M
            i4_perform_sbh,
4574
2.30M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4575
2.30M
            i4_alpha_stim_multiplier,
4576
2.30M
            u1_is_cu_noisy,
4577
2.30M
#endif
4578
2.30M
            u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
4579
2.30M
            ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
4580
4581
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4582
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
4583
        {
4584
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
4585
            rdopt_cost = ihevce_inject_stim_into_distortion(
4586
                pu1_cur_src,
4587
                src_strd,
4588
                pu1_cur_pred,
4589
                pred_stride,
4590
                rdopt_cost,
4591
                i4_alpha_stim_multiplier,
4592
                trans_size,
4593
                0,
4594
                ps_ctxt->u1_enable_psyRDOPT,
4595
                NULL_PLANE);
4596
#else
4597
            if(u1_compute_spatial_ssd && u1_is_recon_available)
4598
            {
4599
                rdopt_cost = ihevce_inject_stim_into_distortion(
4600
                    pu1_cur_src,
4601
                    src_strd,
4602
                    pu1_cur_recon,
4603
                    i4_recon_stride,
4604
                    rdopt_cost,
4605
                    i4_alpha_stim_multiplier,
4606
                    trans_size,
4607
                    0,
4608
                    NULL_PLANE);
4609
            }
4610
            else
4611
            {
4612
                rdopt_cost = ihevce_inject_stim_into_distortion(
4613
                    pu1_cur_src,
4614
                    src_strd,
4615
                    pu1_cur_pred,
4616
                    pred_stride,
4617
                    rdopt_cost,
4618
                    i4_alpha_stim_multiplier,
4619
                    trans_size,
4620
                    0,
4621
                    ps_ctxt->u1_enable_psyRDOPT,
4622
                    NULL_PLANE);
4623
            }
4624
#endif
4625
        }
4626
#endif
4627
4628
2.30M
        if(u1_compute_spatial_ssd && u1_is_recon_available)
4629
279k
        {
4630
279k
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
4631
279k
        }
4632
2.02M
        else
4633
2.02M
        {
4634
2.02M
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
4635
2.02M
        }
4636
4637
        /* accumulate the TU sad into cu sad */
4638
2.30M
        ps_final_prms->u4_cu_sad += u4_tu_sad;
4639
4640
        /* accumulate the TU bits into cu bits */
4641
2.30M
        cu_bits += tu_bits;
4642
4643
        /* inter cu is coded if any of the tu is coded in it */
4644
2.30M
        ps_final_prms->u1_is_cu_coded |= cbf;
4645
4646
        /* call the entropy function to get the bits */
4647
        /* add that to rd opt cost(SSD)              */
4648
4649
        /* update the bytes */
4650
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4651
2.30M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
4652
        /* update the zero_row and col info for the final mode */
4653
2.30M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
4654
2.30M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
4655
4656
        /* update the bytes */
4657
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4658
4659
        /* update the total bytes cons */
4660
2.30M
        ecd_data_bytes_cons += curr_bytes;
4661
2.30M
        pu1_ecd_data += curr_bytes;
4662
4663
        /* RDOPT copy States :  New updated after curr TU to TU init */
4664
2.30M
        if(0 != cbf)
4665
324k
        {
4666
            /* update to new state only if CBF is non zero */
4667
324k
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4668
324k
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4669
324k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4670
324k
                        .s_cabac_ctxt.au1_ctxt_models[0] +
4671
324k
                    IHEVC_CAB_COEFFX_PREFIX,
4672
324k
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4673
324k
        }
4674
4675
        /* by default chroma present is set to 1*/
4676
2.30M
        chrm_present_flag = 1;
4677
2.30M
        if(4 == trans_size)
4678
543k
        {
4679
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
4680
543k
            if(0 != chrm_ctr)
4681
407k
            {
4682
407k
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
4683
407k
            }
4684
4685
            /* increment the chrm ctr unconditionally */
4686
543k
            chrm_ctr++;
4687
4688
            /* after ctr reached 4 reset it */
4689
543k
            if(4 == chrm_ctr)
4690
135k
            {
4691
135k
                chrm_ctr = 0;
4692
135k
            }
4693
543k
        }
4694
4695
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
4696
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
4697
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
4698
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
4699
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
4700
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
4701
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
4702
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
4703
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
4704
2.30M
        GETRANGE(tx_size, trans_size);
4705
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
4706
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
4707
2.30M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
4708
4709
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
4710
2.30M
        ps_cur_nbr_4x4->b1_y_cbf = cbf;
4711
        /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
4712
2.30M
        ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
4713
4714
        /* Qp and cbf are stored for the all 4x4 in TU */
4715
2.30M
        {
4716
2.30M
            WORD32 i, j;
4717
2.30M
            nbr_4x4_t *ps_tmp_4x4;
4718
2.30M
            ps_tmp_4x4 = ps_cur_nbr_4x4;
4719
4720
11.1M
            for(i = 0; i < num_4x4_in_tu; i++)
4721
8.84M
            {
4722
59.5M
                for(j = 0; j < num_4x4_in_tu; j++)
4723
50.6M
                {
4724
50.6M
                    ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
4725
50.6M
                    ps_tmp_4x4[j].b1_y_cbf = cbf;
4726
50.6M
                }
4727
                /* row level update*/
4728
8.84M
                ps_tmp_4x4 += num_4x4_in_cu;
4729
8.84M
            }
4730
2.30M
        }
4731
4732
2.30M
#if RDOPT_ENABLE
4733
        /* compute the rdopt cost */
4734
2.30M
        rdopt_cost +=
4735
2.30M
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4736
2.30M
#endif
4737
        /* accumulate the costs */
4738
2.30M
        total_rdopt_cost += rdopt_cost;
4739
4740
2.30M
        ps_tu_prms++;
4741
4742
2.30M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4743
2.30M
        {
4744
            /* Early exit : If the current running cost exceeds
4745
            the prev. best mode cost, break */
4746
2.30M
            if(total_rdopt_cost > prev_best_rdopt_cost)
4747
113k
            {
4748
113k
                return (total_rdopt_cost);
4749
113k
            }
4750
2.30M
        }
4751
2.30M
    }
4752
4753
    /* Modify the cost function for this CU. */
4754
    /* loop in for 8x8 blocks */
4755
1.19M
    if(ps_ctxt->u1_enable_psyRDOPT)
4756
0
    {
4757
0
        UWORD8 *pu1_recon_cu;
4758
0
        WORD32 recon_stride;
4759
0
        WORD32 curr_pos_x;
4760
0
        WORD32 curr_pos_y;
4761
0
        WORD32 start_index;
4762
0
        WORD32 num_horz_cu_in_ctb;
4763
0
        WORD32 had_block_size;
4764
4765
        /* tODO: sreenivasa ctb size has to be used appropriately */
4766
0
        had_block_size = 8;
4767
0
        num_horz_cu_in_ctb = 64 / had_block_size;
4768
4769
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
4770
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
4771
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4772
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
4773
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
4774
        //+ \curr_pos_x + curr_pos_y * recon_stride;
4775
4776
        /* start index to index the source satd of curr cu int he current ctb*/
4777
0
        start_index =
4778
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
4779
4780
0
        {
4781
0
            total_rdopt_cost += ihevce_psy_rd_cost(
4782
0
                ps_ctxt->ai4_source_satd_8x8,
4783
0
                pu1_recon_cu,
4784
0
                recon_stride,
4785
0
                1,  //howz stride
4786
0
                cu_size,
4787
0
                0,  // pic type
4788
0
                0,  //layer id
4789
0
                ps_ctxt->i4_satd_lamda,  // lambda
4790
0
                start_index,
4791
0
                ps_ctxt->u1_is_input_data_hbd,
4792
0
                ps_ctxt->u4_psy_strength,
4793
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
4794
0
        }
4795
0
    }
4796
4797
    /* store the num TUs*/
4798
1.19M
    ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
4799
4800
    /* update the bytes consumed */
4801
1.19M
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4802
4803
    /* store the current cu size to final prms */
4804
1.19M
    ps_final_prms->u1_cu_size = cu_size;
4805
4806
    /* cu bits will be having luma residual bits till this point    */
4807
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4808
1.19M
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4809
4810
    /* ------------- Chroma processing -------------- */
4811
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4812
1.19M
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4813
488k
    {
4814
488k
        LWORD64 chrm_rdopt_cost;
4815
488k
        WORD32 chrm_rdopt_tu_bits;
4816
4817
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4818
488k
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4819
4820
488k
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4821
488k
            ps_ctxt,
4822
488k
            curr_buf_idx,
4823
488k
            0, /* TU mode : Don't care in Inter patrh */
4824
488k
            ps_chrm_cu_buf_prms->pu1_curr_src,
4825
488k
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4826
488k
            ps_chrm_cu_buf_prms->pu1_cu_left,
4827
488k
            ps_chrm_cu_buf_prms->pu1_cu_top,
4828
488k
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4829
488k
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4830
488k
            (cu_pos_x >> 1),
4831
488k
            (cu_pos_y >> 1),
4832
488k
            &chrm_rdopt_tu_bits,
4833
488k
            i4_alpha_stim_multiplier,
4834
488k
            u1_is_cu_noisy);
4835
4836
488k
#if WEIGH_CHROMA_COST
4837
488k
        chrm_rdopt_cost = (LWORD64)(
4838
488k
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4839
488k
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4840
488k
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4841
488k
#endif
4842
4843
488k
#if CHROMA_RDOPT_ENABLE
4844
488k
        total_rdopt_cost += chrm_rdopt_cost;
4845
488k
#endif
4846
488k
        cu_bits += chrm_rdopt_tu_bits;
4847
4848
        /* during chroma evaluation if skip decision was over written     */
4849
        /* then the current skip candidate is set to a non skip candidate */
4850
488k
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
4851
4852
        /* cu bits for chroma residual if chroma rdopt is on       */
4853
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4854
488k
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4855
4856
488k
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4857
488k
        {
4858
            /* Early exit : If the current running cost exceeds
4859
            the prev. best mode cost, break */
4860
488k
            if(total_rdopt_cost > prev_best_rdopt_cost)
4861
25.8k
            {
4862
25.8k
                return (total_rdopt_cost);
4863
25.8k
            }
4864
488k
        }
4865
488k
    }
4866
710k
    else
4867
710k
    {}
4868
4869
1.17M
#if SHRINK_INTER_TUTREE
4870
    /* ------------- Quadtree TU split  optimization ------------  */
4871
1.17M
    if(ps_final_prms->u1_is_cu_coded)
4872
152k
    {
4873
152k
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
4874
152k
            &ps_final_prms->as_tu_enc_loop[0],
4875
152k
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
4876
152k
            &ps_final_prms->s_recon_datastore,
4877
152k
            num_tu_in_cu,
4878
152k
            (ps_ctxt->u1_chroma_array_type == 2));
4879
152k
    }
4880
1.17M
#endif
4881
4882
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
4883
1.17M
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4884
1.17M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4885
1.17M
                .s_cabac_ctxt.au1_ctxt_models[0] +
4886
1.17M
            IHEVC_CAB_COEFFX_PREFIX,
4887
1.17M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4888
1.17M
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4889
4890
    /* -------- Bit estimate for RD opt -------------- */
4891
1.17M
    {
4892
1.17M
        nbr_avail_flags_t s_nbr;
4893
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4894
1.17M
        WORD32 cbf_bits, header_bits;
4895
4896
        /* get the neighbour availability flags for current cu  */
4897
1.17M
        ihevce_get_only_nbr_flag(
4898
1.17M
            &s_nbr,
4899
1.17M
            ps_ctxt->pu1_ctb_nbr_map,
4900
1.17M
            ps_ctxt->i4_nbr_map_strd,
4901
1.17M
            cu_pos_x,
4902
1.17M
            cu_pos_y,
4903
1.17M
            (cu_size >> 2),
4904
1.17M
            (cu_size >> 2));
4905
4906
        /* call the entropy rdo encode to get the bit estimate for current cu */
4907
1.17M
        header_bits = ihevce_entropy_rdo_encode_cu(
4908
1.17M
            &ps_ctxt->s_rdopt_entropy_ctxt,
4909
1.17M
            ps_final_prms,
4910
1.17M
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
4911
1.17M
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
4912
1.17M
            cu_size,
4913
1.17M
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
4914
1.17M
                                           : s_nbr.u1_top_avail,
4915
1.17M
            s_nbr.u1_left_avail,
4916
1.17M
            &ps_final_prms->pu1_cu_coeffs[0],
4917
1.17M
            &cbf_bits);
4918
4919
1.17M
        cu_bits += header_bits;
4920
4921
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4922
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4923
1.17M
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4924
1.17M
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4925
4926
1.17M
#if RDOPT_ENABLE
4927
        /* add the cost of coding the header bits */
4928
1.17M
        total_rdopt_cost +=
4929
1.17M
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4930
4931
1.17M
#if ENABLE_INTER_ZCU_COST
4932
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
4933
1.17M
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
4934
152k
        {
4935
152k
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
4936
4937
152k
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
4938
152k
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
4939
4940
152k
            cab_ctxt_t *ps_cab_ctxt =
4941
152k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
4942
4943
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
4944
152k
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
4945
4946
            /* account for coding qt_root_cbf = 0 */
4947
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
4948
152k
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
4949
152k
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
4950
252
                u4_cu_hdr_bits_q12 = 0;
4951
151k
            else
4952
151k
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
4953
4954
            /* add the cost of coding the header bits */
4955
152k
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
4956
152k
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
4957
152k
                ps_ctxt->i8_cl_ssd_lambda_qf,
4958
152k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
4959
4960
152k
            if(ps_ctxt->u1_enable_psyRDOPT)
4961
0
            {
4962
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
4963
0
            }
4964
4965
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
4966
152k
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
4967
2.92k
            {
4968
2.92k
                WORD32 tx_size;
4969
4970
                /* force cu as not coded and update the cost */
4971
2.92k
                ps_final_prms->u1_is_cu_coded = 0;
4972
2.92k
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4973
2.92k
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4974
4975
2.92k
                total_rdopt_cost = i8_cu_not_coded_cost;
4976
4977
                /* reset num TUs to 1 unless cu size id 64 */
4978
2.92k
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
4979
2.92k
                trans_size = (64 == cu_size) ? 32 : cu_size;
4980
2.92k
                GETRANGE(tx_size, trans_size);
4981
4982
                /* reset the bytes consumed */
4983
2.92k
                ps_final_prms->i4_num_bytes_ecd_data = 0;
4984
4985
                /* reset texture related bits and roll back header bits*/
4986
2.92k
                ps_final_prms->u4_cu_cbf_bits = 0;
4987
2.92k
                ps_final_prms->u4_cu_luma_res_bits = 0;
4988
2.92k
                ps_final_prms->u4_cu_chroma_res_bits = 0;
4989
2.92k
                ps_final_prms->u4_cu_hdr_bits =
4990
2.92k
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
4991
4992
                /* update cabac model with qtroot cbf = 0 decision */
4993
2.92k
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
4994
2.92k
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
4995
4996
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
4997
2.92k
                memcpy(
4998
2.92k
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
4999
2.92k
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5000
2.92k
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5001
5002
                /* mark all tus as not coded for final eval */
5003
8.69k
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5004
5.76k
                {
5005
5.76k
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5006
5.76k
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5007
5008
5.76k
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5009
5.76k
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5010
5011
5.76k
                    num_4x4_in_tu = trans_size >> 2;
5012
5013
5.76k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5014
5.76k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5015
5.76k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5016
5017
5.76k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5018
5.76k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5019
5.76k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5020
5021
5.76k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5022
5.76k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5023
5024
5.76k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5025
5.76k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5026
5.76k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5027
5028
                    /* reset cbf for the all 4x4 in TU */
5029
5.76k
                    {
5030
5.76k
                        WORD32 i, j;
5031
5.76k
                        nbr_4x4_t *ps_tmp_4x4;
5032
5.76k
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5033
5034
45.2k
                        for(i = 0; i < num_4x4_in_tu; i++)
5035
39.5k
                        {
5036
333k
                            for(j = 0; j < num_4x4_in_tu; j++)
5037
294k
                            {
5038
294k
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5039
294k
                            }
5040
                            /* row level update*/
5041
39.5k
                            ps_tmp_4x4 += num_4x4_in_cu;
5042
39.5k
                        }
5043
5.76k
                    }
5044
5.76k
                }
5045
2.92k
            }
5046
152k
        }
5047
1.17M
#endif /* ENABLE_INTER_ZCU_COST */
5048
5049
1.17M
#endif /* RDOPT_ENABLE */
5050
1.17M
    }
5051
5052
1.17M
    return (total_rdopt_cost);
5053
1.19M
}
5054
5055
#if ENABLE_RDO_BASED_TU_RECURSION
5056
LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
5057
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5058
    enc_loop_cu_prms_t *ps_cu_prms,
5059
    void *pv_src,
5060
    WORD32 cu_size,
5061
    WORD32 cu_pos_x,
5062
    WORD32 cu_pos_y,
5063
    WORD32 curr_buf_idx,
5064
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
5065
    cu_inter_cand_t *ps_inter_cand,
5066
    cu_analyse_t *ps_cu_analyse,
5067
    WORD32 i4_alpha_stim_multiplier)
5068
3.05M
{
5069
3.05M
    tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
5070
3.05M
    buffer_data_for_tu_t s_buffer_data_for_tu;
5071
3.05M
    enc_loop_cu_final_prms_t *ps_final_prms;
5072
3.05M
    nbr_4x4_t *ps_nbr_4x4;
5073
5074
3.05M
    WORD32 num_split_flags = 1;
5075
3.05M
    UWORD8 u1_tu_size;
5076
3.05M
    UWORD8 *pu1_pred;
5077
3.05M
    UWORD8 *pu1_ecd_data;
5078
3.05M
    WORD16 *pi2_deq_data;
5079
3.05M
    UWORD8 *pu1_csbf_buf;
5080
3.05M
    UWORD8 *pu1_tu_sz_sft;
5081
3.05M
    UWORD8 *pu1_tu_posx;
5082
3.05M
    UWORD8 *pu1_tu_posy;
5083
3.05M
    LWORD64 total_rdopt_cost;
5084
3.05M
    WORD32 ctr;
5085
3.05M
    WORD32 chrm_ctr;
5086
3.05M
    WORD32 pred_stride;
5087
3.05M
    WORD32 recon_stride;
5088
3.05M
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
5089
3.05M
    WORD32 csbf_strd;
5090
3.05M
    WORD32 ecd_data_bytes_cons;
5091
3.05M
    WORD32 num_4x4_in_cu;
5092
3.05M
    WORD32 num_4x4_in_tu;
5093
3.05M
    WORD32 recon_func_mode;
5094
3.05M
    WORD32 cu_bits;
5095
3.05M
    UWORD8 u1_compute_spatial_ssd;
5096
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5097
3.05M
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
5098
5099
3.05M
    WORD32 i4_min_trans_size = 256;
5100
3.05M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
5101
3.05M
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
5102
    /* model for no residue syntax qt root cbf flag */
5103
3.05M
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
5104
3.05M
    UWORD8 u1_skip_tu_sz_sft = 0;
5105
3.05M
    UWORD8 u1_skip_tu_posx = 0;
5106
3.05M
    UWORD8 u1_skip_tu_posy = 0;
5107
3.05M
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
5108
5109
3.05M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5110
3.05M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5111
3.05M
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
5112
3.05M
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
5113
3.05M
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
5114
3.05M
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
5115
3.05M
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5116
3.05M
    recon_stride = cu_size;
5117
3.05M
    pu1_pred = ps_inter_cand->pu1_pred_data;
5118
3.05M
    chrm_ctr = 0;
5119
3.05M
    ecd_data_bytes_cons = 0;
5120
3.05M
    total_rdopt_cost = 0;
5121
3.05M
    num_4x4_in_cu = cu_size >> 2;
5122
3.05M
    recon_func_mode = PRED_MODE_INTER;
5123
3.05M
    cu_bits = 0;
5124
5125
    /* get the 4x4 level postion of current cu */
5126
3.05M
    cu_pos_x = cu_pos_x << 1;
5127
3.05M
    cu_pos_y = cu_pos_y << 1;
5128
5129
3.05M
    ps_final_prms->u1_is_cu_coded = 0;
5130
3.05M
    ps_final_prms->u4_cu_sad = 0;
5131
5132
    /* populate the coeffs scan idx */
5133
3.05M
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
5134
5135
3.05M
#if ENABLE_INTER_ZCU_COST
5136
    /* reset cu not coded cost */
5137
3.05M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5138
5139
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5140
3.05M
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
5141
3.05M
#endif
5142
5143
3.05M
    if(ps_cu_analyse->u1_cu_size == 64)
5144
38.4k
    {
5145
38.4k
        num_split_flags = 4;
5146
38.4k
        u1_tu_size = 32;
5147
38.4k
    }
5148
3.01M
    else
5149
3.01M
    {
5150
3.01M
        num_split_flags = 1;
5151
3.01M
        u1_tu_size = ps_cu_analyse->u1_cu_size;
5152
3.01M
    }
5153
5154
3.05M
    if(1 == ps_final_prms->u1_skip_flag)
5155
908k
    {
5156
908k
        if(64 == cu_size)
5157
10.3k
        {
5158
            /* TU = CU/2 is set but no trnaform is evaluated  */
5159
10.3k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5160
10.3k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5161
10.3k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5162
10.3k
        }
5163
898k
        else
5164
898k
        {
5165
            /* TU = CU is set but no trnaform is evaluated  */
5166
898k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5167
898k
            pu1_tu_posx = &u1_skip_tu_posx;
5168
898k
            pu1_tu_posy = &u1_skip_tu_posy;
5169
898k
        }
5170
5171
908k
        recon_func_mode = PRED_MODE_SKIP;
5172
908k
    }
5173
    /* check for PU part mode being AMP or No AMP */
5174
2.14M
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
5175
1.56M
    {
5176
1.56M
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
5177
1.47M
        {
5178
            /* TU= CU is evaluated 2Nx2N inter case */
5179
1.47M
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5180
1.47M
            pu1_tu_posx = &u1_skip_tu_posx;
5181
1.47M
            pu1_tu_posy = &u1_skip_tu_posy;
5182
1.47M
        }
5183
90.2k
        else
5184
90.2k
        {
5185
            /* currently TU= CU/2 is evaluated for all inter case */
5186
90.2k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5187
90.2k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5188
90.2k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5189
90.2k
        }
5190
1.56M
    }
5191
583k
    else
5192
583k
    {
5193
        /* for AMP cases one level of TU recurssion is done */
5194
        /* based on oreintation of the partitions           */
5195
583k
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5196
583k
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5197
583k
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5198
583k
    }
5199
5200
3.05M
    i4_min_trans_size = 4;
5201
5202
3.05M
    if(ps_ctxt->i1_cu_qp_delta_enable)
5203
1.37M
    {
5204
1.37M
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
5205
1.37M
    }
5206
5207
3.05M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
5208
0
    {
5209
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
5210
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
5211
0
             100.0f);
5212
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
5213
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
5214
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
5215
0
    }
5216
5217
3.05M
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
5218
3.05M
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
5219
3.05M
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5220
5221
3.05M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
5222
0
    {
5223
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
5224
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5225
0
    }
5226
5227
3.05M
    if(!u1_compute_spatial_ssd)
5228
981k
    {
5229
981k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5230
981k
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5231
981k
    }
5232
2.07M
    else
5233
2.07M
    {
5234
2.07M
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
5235
5236
2.07M
        if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5237
0
        {
5238
0
            ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
5239
0
        }
5240
2.07M
    }
5241
5242
    /* RDOPT copy States :  TU init (best until prev TU) to current */
5243
3.05M
    memcpy(
5244
3.05M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5245
3.05M
             .s_cabac_ctxt.au1_ctxt_models[0],
5246
3.05M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
5247
3.05M
        IHEVC_CAB_COEFFX_PREFIX);
5248
5249
3.05M
    ihevce_tu_tree_init(
5250
3.05M
        as_tu_nodes,
5251
3.05M
        cu_size,
5252
3.05M
        (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
5253
3.05M
        ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
5254
3.05M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5255
3.05M
        ps_ctxt->u1_chroma_array_type == 2);
5256
5257
3.05M
    if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
5258
0
    {
5259
0
        ihevce_tuSplitArray_to_tuTree_mapper(
5260
0
            as_tu_nodes,
5261
0
            ps_inter_cand->ai4_tu_split_flag,
5262
0
            cu_size,
5263
0
            cu_size,
5264
0
            MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
5265
0
            MIN(MAX_TU_SIZE, cu_size),
5266
0
            ps_inter_cand->b1_skip_flag);
5267
0
    }
5268
5269
3.05M
    ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
5270
5271
3.05M
#if ENABLE_INTER_ZCU_COST
5272
3.05M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5273
3.05M
#endif
5274
5275
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
5276
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
5277
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
5278
3.05M
        ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
5279
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
5280
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
5281
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
5282
3.05M
        ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5283
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
5284
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
5285
3.05M
        ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
5286
3.05M
        curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
5287
3.05M
                                                              (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
5288
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
5289
3.05M
        ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
5290
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
5291
3.05M
        ps_chrm_cu_buf_prms->i4_chrm_src_stride;
5292
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
5293
3.05M
        ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
5294
3.05M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
5295
3.05M
        ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
5296
3.05M
    s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
5297
3.05M
    s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
5298
3.05M
    s_buffer_data_for_tu.pi2_deq_data_chroma =
5299
3.05M
        pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
5300
3.05M
    s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
5301
3.05M
    s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
5302
3.05M
    s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
5303
3.05M
    s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
5304
5305
3.05M
    if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5306
0
    {
5307
0
        UWORD8 i;
5308
5309
0
        UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
5310
5311
0
        for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
5312
0
        {
5313
0
            pu_t *ps_pu;
5314
5315
0
            WORD32 inter_pu_wd;
5316
0
            WORD32 inter_pu_ht;
5317
5318
0
            ps_pu = ps_inter_cand->as_inter_pu + i;
5319
5320
0
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
5321
0
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
5322
0
            inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
5323
0
            ihevce_chroma_inter_pred_pu(
5324
0
                &ps_ctxt->s_mc_ctxt,
5325
0
                ps_pu,
5326
0
                pu1_pred,
5327
0
                s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5328
0
            if(!!ps_inter_cand->b3_part_size)
5329
0
            {
5330
                /* 2Nx__ partion case */
5331
0
                if(inter_pu_wd == cu_size)
5332
0
                {
5333
0
                    pu1_pred +=
5334
0
                        (inter_pu_ht *
5335
0
                         s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5336
0
                }
5337
5338
                /* __x2N partion case */
5339
0
                if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
5340
0
                {
5341
0
                    pu1_pred += inter_pu_wd;
5342
0
                }
5343
0
            }
5344
0
        }
5345
0
    }
5346
5347
#if !ENABLE_TOP_DOWN_TU_RECURSION
5348
    total_rdopt_cost = ihevce_tu_tree_selector(
5349
        ps_ctxt,
5350
        as_tu_nodes,
5351
        &s_buffer_data_for_tu,
5352
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5353
             .s_cabac_ctxt.au1_ctxt_models[0],
5354
        recon_func_mode,
5355
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5356
        i4_alpha_stim_multiplier,
5357
        u1_is_cu_noisy,
5358
#endif
5359
        0,
5360
        ps_ctxt->u1_max_inter_tr_depth,
5361
        ps_inter_cand->b3_part_size,
5362
        u1_compute_spatial_ssd);
5363
#else
5364
3.05M
    total_rdopt_cost = ihevce_topDown_tu_tree_selector(
5365
3.05M
        ps_ctxt,
5366
3.05M
        as_tu_nodes,
5367
3.05M
        &s_buffer_data_for_tu,
5368
3.05M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5369
3.05M
             .s_cabac_ctxt.au1_ctxt_models[0],
5370
3.05M
        recon_func_mode,
5371
3.05M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5372
3.05M
        i4_alpha_stim_multiplier,
5373
3.05M
        u1_is_cu_noisy,
5374
3.05M
#endif
5375
3.05M
        0,
5376
3.05M
        ps_ctxt->u1_max_inter_tr_depth,
5377
3.05M
        ps_inter_cand->b3_part_size,
5378
3.05M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5379
3.05M
        u1_compute_spatial_ssd);
5380
3.05M
#endif
5381
5382
3.05M
    ps_final_prms->u2_num_tus_in_cu = 0;
5383
3.05M
    ps_final_prms->u4_cu_luma_res_bits = 0;
5384
3.05M
    ps_final_prms->u4_cu_sad = 0;
5385
3.05M
    total_rdopt_cost = 0;
5386
3.05M
    ecd_data_bytes_cons = 0;
5387
3.05M
    cu_bits = 0;
5388
3.05M
#if ENABLE_INTER_ZCU_COST
5389
3.05M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5390
3.05M
#endif
5391
3.05M
    ps_final_prms->u1_is_cu_coded = 0;
5392
3.05M
    ps_final_prms->u1_cu_size = cu_size;
5393
5394
3.05M
    ihevce_tu_selector_debriefer(
5395
3.05M
        as_tu_nodes,
5396
3.05M
        ps_final_prms,
5397
3.05M
        &total_rdopt_cost,
5398
3.05M
#if ENABLE_INTER_ZCU_COST
5399
3.05M
        &ps_ctxt->i8_cu_not_coded_cost,
5400
3.05M
#endif
5401
3.05M
        &ecd_data_bytes_cons,
5402
3.05M
        &cu_bits,
5403
3.05M
        &ps_final_prms->u2_num_tus_in_cu,
5404
3.05M
        ps_ctxt->i4_cu_qp,
5405
3.05M
        cu_pos_x * 4,
5406
3.05M
        cu_pos_y * 4,
5407
3.05M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5408
3.05M
        (ps_ctxt->u1_chroma_array_type == 2),
5409
3.05M
        POS_TL);
5410
5411
3.05M
    if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5412
3.05M
    {
5413
3.05M
        ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
5414
3.05M
    }
5415
5416
    /* Modify the cost function for this CU. */
5417
    /* loop in for 8x8 blocks */
5418
3.05M
    if(ps_ctxt->u1_enable_psyRDOPT)
5419
0
    {
5420
0
        UWORD8 *pu1_recon_cu;
5421
0
        WORD32 recon_stride;
5422
0
        WORD32 curr_pos_x;
5423
0
        WORD32 curr_pos_y;
5424
0
        WORD32 start_index;
5425
0
        WORD32 num_horz_cu_in_ctb;
5426
0
        WORD32 had_block_size;
5427
5428
        /* tODO: sreenivasa ctb size has to be used appropriately */
5429
0
        had_block_size = 8;
5430
0
        num_horz_cu_in_ctb = 64 / had_block_size;
5431
5432
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
5433
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
5434
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5435
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
5436
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
5437
        //+ \curr_pos_x + curr_pos_y * recon_stride;
5438
5439
        /* start index to index the source satd of curr cu int he current ctb*/
5440
0
        start_index =
5441
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
5442
5443
0
        {
5444
0
            total_rdopt_cost += ihevce_psy_rd_cost(
5445
0
                ps_ctxt->ai4_source_satd_8x8,
5446
0
                pu1_recon_cu,
5447
0
                recon_stride,
5448
0
                1,  //howz stride
5449
0
                cu_size,
5450
0
                0,  // pic type
5451
0
                0,  //layer id
5452
0
                ps_ctxt->i4_satd_lamda,  // lambda
5453
0
                start_index,
5454
0
                ps_ctxt->u1_is_input_data_hbd,
5455
0
                ps_ctxt->u4_psy_strength,
5456
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
5457
0
        }
5458
0
    }
5459
5460
3.05M
    ps_final_prms->u1_chroma_intra_pred_mode = 4;
5461
5462
    /* update the bytes consumed */
5463
3.05M
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
5464
5465
    /* store the current cu size to final prms */
5466
3.05M
    ps_final_prms->u1_cu_size = cu_size;
5467
    /* ------------- Chroma processing -------------- */
5468
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
5469
3.05M
    if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
5470
3.05M
       !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5471
3.05M
    {
5472
3.05M
        LWORD64 chrm_rdopt_cost;
5473
3.05M
        WORD32 chrm_rdopt_tu_bits;
5474
5475
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
5476
3.05M
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
5477
5478
3.05M
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
5479
3.05M
            ps_ctxt,
5480
3.05M
            curr_buf_idx,
5481
3.05M
            0, /* TU mode : Don't care in Inter patrh */
5482
3.05M
            ps_chrm_cu_buf_prms->pu1_curr_src,
5483
3.05M
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
5484
3.05M
            ps_chrm_cu_buf_prms->pu1_cu_left,
5485
3.05M
            ps_chrm_cu_buf_prms->pu1_cu_top,
5486
3.05M
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
5487
3.05M
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
5488
3.05M
            (cu_pos_x >> 1),
5489
3.05M
            (cu_pos_y >> 1),
5490
3.05M
            &chrm_rdopt_tu_bits,
5491
3.05M
            i4_alpha_stim_multiplier,
5492
3.05M
            u1_is_cu_noisy);
5493
5494
3.05M
#if WEIGH_CHROMA_COST
5495
3.05M
        chrm_rdopt_cost = (LWORD64)(
5496
3.05M
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
5497
3.05M
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
5498
3.05M
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
5499
3.05M
#endif
5500
5501
3.05M
#if CHROMA_RDOPT_ENABLE
5502
3.05M
        total_rdopt_cost += chrm_rdopt_cost;
5503
3.05M
#endif
5504
3.05M
        cu_bits += chrm_rdopt_tu_bits;
5505
5506
        /* during chroma evaluation if skip decision was over written     */
5507
        /* then the current skip candidate is set to a non skip candidate */
5508
3.05M
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
5509
5510
        /* cu bits for chroma residual if chroma rdopt is on       */
5511
        /* if zero_cbf eval is disabled then cu bits will be zero  */
5512
3.05M
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
5513
5514
3.05M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
5515
3.05M
        {
5516
            /* Early exit : If the current running cost exceeds
5517
            the prev. best mode cost, break */
5518
3.05M
            if(total_rdopt_cost > prev_best_rdopt_cost)
5519
363k
            {
5520
363k
                return (total_rdopt_cost);
5521
363k
            }
5522
3.05M
        }
5523
3.05M
    }
5524
0
    else
5525
0
    {}
5526
5527
2.69M
#if SHRINK_INTER_TUTREE
5528
    /* ------------- Quadtree TU split  optimization ------------  */
5529
2.69M
    if(ps_final_prms->u1_is_cu_coded)
5530
323k
    {
5531
323k
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
5532
323k
            &ps_final_prms->as_tu_enc_loop[0],
5533
323k
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
5534
323k
            &ps_final_prms->s_recon_datastore,
5535
323k
            ps_final_prms->u2_num_tus_in_cu,
5536
323k
            (ps_ctxt->u1_chroma_array_type == 2));
5537
323k
    }
5538
2.69M
#endif
5539
5540
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
5541
2.69M
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
5542
2.69M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5543
2.69M
                .s_cabac_ctxt.au1_ctxt_models[0] +
5544
2.69M
            IHEVC_CAB_COEFFX_PREFIX,
5545
2.69M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
5546
2.69M
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
5547
5548
    /* -------- Bit estimate for RD opt -------------- */
5549
2.69M
    {
5550
2.69M
        nbr_avail_flags_t s_nbr;
5551
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
5552
2.69M
        WORD32 cbf_bits, header_bits;
5553
5554
        /* get the neighbour availability flags for current cu  */
5555
2.69M
        ihevce_get_only_nbr_flag(
5556
2.69M
            &s_nbr,
5557
2.69M
            ps_ctxt->pu1_ctb_nbr_map,
5558
2.69M
            ps_ctxt->i4_nbr_map_strd,
5559
2.69M
            cu_pos_x,
5560
2.69M
            cu_pos_y,
5561
2.69M
            (cu_size >> 2),
5562
2.69M
            (cu_size >> 2));
5563
5564
        /* call the entropy rdo encode to get the bit estimate for current cu */
5565
2.69M
        header_bits = ihevce_entropy_rdo_encode_cu(
5566
2.69M
            &ps_ctxt->s_rdopt_entropy_ctxt,
5567
2.69M
            ps_final_prms,
5568
2.69M
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
5569
2.69M
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
5570
2.69M
            cu_size,
5571
2.69M
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
5572
2.69M
                                           : s_nbr.u1_top_avail,
5573
2.69M
            s_nbr.u1_left_avail,
5574
2.69M
            &ps_final_prms->pu1_cu_coeffs[0],
5575
2.69M
            &cbf_bits);
5576
5577
2.69M
        cu_bits += header_bits;
5578
5579
        /* cbf bits are excluded from header bits, instead considered as texture bits */
5580
        /* incase if zero cbf eval is disabled then texture bits gets added here */
5581
2.69M
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
5582
2.69M
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
5583
5584
2.69M
#if RDOPT_ENABLE
5585
        /* add the cost of coding the header bits */
5586
2.69M
        total_rdopt_cost +=
5587
2.69M
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
5588
5589
2.69M
#if ENABLE_INTER_ZCU_COST
5590
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
5591
2.69M
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
5592
323k
        {
5593
323k
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
5594
5595
323k
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
5596
323k
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
5597
5598
323k
            cab_ctxt_t *ps_cab_ctxt =
5599
323k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
5600
5601
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
5602
323k
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
5603
5604
            /* account for coding qt_root_cbf = 0 */
5605
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
5606
323k
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
5607
323k
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
5608
588
                u4_cu_hdr_bits_q12 = 0;
5609
323k
            else
5610
323k
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
5611
5612
            /* add the cost of coding the header bits */
5613
323k
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
5614
323k
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
5615
323k
                ps_ctxt->i8_cl_ssd_lambda_qf,
5616
323k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
5617
5618
323k
            if(ps_ctxt->u1_enable_psyRDOPT)
5619
0
            {
5620
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
5621
0
            }
5622
5623
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
5624
323k
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
5625
4.07k
            {
5626
4.07k
                WORD32 tx_size;
5627
5628
                /* force cu as not coded and update the cost */
5629
4.07k
                ps_final_prms->u1_is_cu_coded = 0;
5630
4.07k
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5631
4.07k
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5632
5633
4.07k
                total_rdopt_cost = i8_cu_not_coded_cost;
5634
5635
                /* reset num TUs to 1 unless cu size id 64 */
5636
4.07k
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
5637
4.07k
                trans_size = (64 == cu_size) ? 32 : cu_size;
5638
4.07k
                GETRANGE(tx_size, trans_size);
5639
5640
                /* reset the bytes consumed */
5641
4.07k
                ps_final_prms->i4_num_bytes_ecd_data = 0;
5642
5643
                /* reset texture related bits and roll back header bits*/
5644
4.07k
                ps_final_prms->u4_cu_cbf_bits = 0;
5645
4.07k
                ps_final_prms->u4_cu_luma_res_bits = 0;
5646
4.07k
                ps_final_prms->u4_cu_chroma_res_bits = 0;
5647
4.07k
                ps_final_prms->u4_cu_hdr_bits =
5648
4.07k
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
5649
5650
                /* update cabac model with qtroot cbf = 0 decision */
5651
4.07k
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
5652
4.07k
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
5653
5654
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
5655
4.07k
                memcpy(
5656
4.07k
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5657
4.07k
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5658
4.07k
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5659
5660
                /* mark all tus as not coded for final eval */
5661
10.2k
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5662
6.16k
                {
5663
6.16k
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5664
6.16k
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5665
5666
6.16k
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5667
6.16k
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5668
5669
6.16k
                    num_4x4_in_tu = trans_size >> 2;
5670
5671
6.16k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5672
6.16k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5673
6.16k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5674
5675
6.16k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5676
6.16k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5677
6.16k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5678
5679
6.16k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5680
6.16k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5681
5682
6.16k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5683
6.16k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5684
6.16k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5685
5686
                    /* reset cbf for the all 4x4 in TU */
5687
6.16k
                    {
5688
6.16k
                        WORD32 i, j;
5689
6.16k
                        nbr_4x4_t *ps_tmp_4x4;
5690
6.16k
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5691
5692
46.0k
                        for(i = 0; i < num_4x4_in_tu; i++)
5693
39.9k
                        {
5694
329k
                            for(j = 0; j < num_4x4_in_tu; j++)
5695
290k
                            {
5696
290k
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5697
290k
                            }
5698
                            /* row level update*/
5699
39.9k
                            ps_tmp_4x4 += num_4x4_in_cu;
5700
39.9k
                        }
5701
6.16k
                    }
5702
6.16k
                }
5703
4.07k
            }
5704
323k
        }
5705
2.69M
#endif /* ENABLE_INTER_ZCU_COST */
5706
5707
2.69M
#endif /* RDOPT_ENABLE */
5708
2.69M
    }
5709
5710
2.69M
    return (total_rdopt_cost);
5711
3.05M
}
5712
#endif
5713
5714
/*!
5715
******************************************************************************
5716
* \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
5717
*
5718
* \brief
5719
*    Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
5720
*
5721
* \param[in] ps_ctxt       enc_loop module ctxt pointer
5722
* \param[in] ps_inter_cand pointer to inter candidate structure
5723
* \param[in] cu_size         Current CU size
5724
* \param[in] cu_pos_x        cu position x w.r.t to ctb
5725
* \param[in] cu_pos_y        cu position y w.r.t to ctb
5726
* \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
5727
* \param[in] ps_top_nbr_4x4  top neighbour 4x4 structure pointer
5728
* \param[in] ps_topleft_nbr_4x4  top left neighbour 4x4 structure pointer
5729
* \param[in] nbr_4x4_left_strd  left neighbour 4x4 buffer stride
5730
* \param[in] curr_buf_idx Current Buffer index
5731
*
5732
* \return
5733
*    Rdopt cost
5734
*
5735
* \author
5736
*  Ittiam
5737
*
5738
*****************************************************************************
5739
*/
5740
LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
5741
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5742
    cu_inter_cand_t *ps_inter_cand,
5743
    WORD32 cu_size,
5744
    WORD32 cu_pos_x,
5745
    WORD32 cu_pos_y,
5746
    nbr_4x4_t *ps_left_nbr_4x4,
5747
    nbr_4x4_t *ps_top_nbr_4x4,
5748
    nbr_4x4_t *ps_topleft_nbr_4x4,
5749
    WORD32 nbr_4x4_left_strd,
5750
    WORD32 curr_buf_idx)
5751
4.37M
{
5752
    /* local variables */
5753
4.37M
    enc_loop_cu_final_prms_t *ps_final_prms;
5754
4.37M
    nbr_avail_flags_t s_nbr;
5755
4.37M
    nbr_4x4_t *ps_nbr_4x4;
5756
5757
4.37M
    UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
5758
4.37M
    UWORD8 *pu1_pred;
5759
4.37M
    WORD32 rdopt_cost;
5760
4.37M
    WORD32 ctr;
5761
4.37M
    WORD32 num_cu_part;
5762
4.37M
    WORD32 inter_pu_wd;
5763
4.37M
    WORD32 inter_pu_ht;
5764
4.37M
    WORD32 pred_stride;
5765
5766
    /* get the pointers based on curbuf idx */
5767
4.37M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5768
4.37M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5769
4.37M
    pu1_pred = ps_inter_cand->pu1_pred_data;
5770
5771
4.37M
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5772
5773
    /* store the partition mode in final prms */
5774
4.37M
    ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
5775
5776
    /* since encoder does not support NXN part type */
5777
    /* num parts can be either 1 or 2 only          */
5778
4.37M
    ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
5779
5780
4.37M
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
5781
5782
    /* get the 4x4 level position of current cu */
5783
4.37M
    cu_pos_x = cu_pos_x << 1;
5784
4.37M
    cu_pos_y = cu_pos_y << 1;
5785
5786
    /* populate cu level params */
5787
4.37M
    ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
5788
4.37M
    ps_final_prms->u2_num_pus_in_cu = num_cu_part;
5789
5790
    /* run a loop over all the partitons in cu */
5791
9.50M
    for(ctr = 0; ctr < num_cu_part; ctr++)
5792
5.13M
    {
5793
5.13M
        pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
5794
5.13M
        pu_t *ps_pu;
5795
5.13M
        WORD32 skip_or_merge_flag;
5796
5.13M
        UWORD8 u1_use_mvp_from_top_row;
5797
5798
5.13M
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
5799
5800
        /* IF AMP then each partitions can have diff wd ht */
5801
5.13M
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
5802
5.13M
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
5803
5804
        /* populate reference pic buf id for bs compute */
5805
5806
        /* L0 */
5807
5.13M
        if(-1 != ps_pu->mv.i1_l0_ref_idx)
5808
4.82M
        {
5809
4.82M
            ps_pu->mv.i1_l0_ref_pic_buf_id =
5810
4.82M
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
5811
4.82M
        }
5812
5813
        /* L1 */
5814
5.13M
        if(-1 != ps_pu->mv.i1_l1_ref_idx)
5815
1.89M
        {
5816
1.89M
            ps_pu->mv.i1_l1_ref_pic_buf_id =
5817
1.89M
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
5818
1.89M
        }
5819
5820
        /* SKIP or merge check for every part */
5821
5.13M
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
5822
5823
        /* ----------- MV Prediction ----------------- */
5824
5.13M
        if(0 == skip_or_merge_flag)
5825
1.19M
        {
5826
            /* get the neighbour availability flags */
5827
1.19M
            ihevce_get_only_nbr_flag(
5828
1.19M
                &s_nbr,
5829
1.19M
                ps_ctxt->pu1_ctb_nbr_map,
5830
1.19M
                ps_ctxt->i4_nbr_map_strd,
5831
1.19M
                cu_pos_x,
5832
1.19M
                cu_pos_y,
5833
1.19M
                inter_pu_wd >> 2,
5834
1.19M
                inter_pu_ht >> 2);
5835
5836
1.19M
            if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
5837
0
            {
5838
0
                u1_use_mvp_from_top_row = 0;
5839
0
            }
5840
1.19M
            else
5841
1.19M
            {
5842
1.19M
                u1_use_mvp_from_top_row = 1;
5843
1.19M
            }
5844
5845
1.19M
            if(!u1_use_mvp_from_top_row)
5846
0
            {
5847
0
                if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
5848
0
                {
5849
0
                    if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
5850
0
                    {
5851
0
                        WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
5852
5853
                        /* Ensure Top Right Sync */
5854
0
                        if(!ps_ctxt->u1_use_top_at_ctb_boundary)
5855
0
                        {
5856
0
                            curr_cu_pos_in_row =
5857
0
                                ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
5858
5859
0
                            if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
5860
0
                            {
5861
                                /* No wait for 1st row */
5862
0
                                cu_top_right_offset = -(MAX_CTB_SIZE);
5863
0
                                {
5864
0
                                    ihevce_tile_params_t *ps_col_tile_params =
5865
0
                                        ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
5866
0
                                         ps_ctxt->i4_tile_col_idx);
5867
5868
                                    /* No wait for 1st row */
5869
0
                                    cu_top_right_offset =
5870
0
                                        -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
5871
0
                                }
5872
0
                                cu_top_right_dep_pos = 0;
5873
0
                            }
5874
0
                            else
5875
0
                            {
5876
0
                                cu_top_right_offset = (cu_size) + 4;
5877
0
                                cu_top_right_dep_pos =
5878
0
                                    (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
5879
0
                            }
5880
5881
0
                            ihevce_dmgr_chk_row_row_sync(
5882
0
                                ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
5883
0
                                curr_cu_pos_in_row,
5884
0
                                cu_top_right_offset,
5885
0
                                cu_top_right_dep_pos,
5886
0
                                ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
5887
0
                                ps_ctxt->thrd_id);
5888
0
                        }
5889
5890
0
                        u1_use_mvp_from_top_row = 1;
5891
0
                    }
5892
0
                    else
5893
0
                    {
5894
0
                        s_nbr.u1_top_avail = 0;
5895
0
                        s_nbr.u1_top_lt_avail = 0;
5896
0
                        s_nbr.u1_top_rt_avail = 0;
5897
0
                    }
5898
0
                }
5899
0
                else
5900
0
                {
5901
0
                    u1_use_mvp_from_top_row = 1;
5902
0
                }
5903
0
            }
5904
            /* Call the MV prediction module to get MVP */
5905
1.19M
            ihevce_mv_pred(
5906
1.19M
                &ps_ctxt->s_mv_pred_ctxt,
5907
1.19M
                ps_top_nbr_4x4,
5908
1.19M
                ps_left_nbr_4x4,
5909
1.19M
                ps_topleft_nbr_4x4,
5910
1.19M
                nbr_4x4_left_strd,
5911
1.19M
                &s_nbr,
5912
1.19M
                NULL, /* colocated MV */
5913
1.19M
                ps_pu,
5914
1.19M
                &as_pred_mv[0],
5915
1.19M
                au1_is_top_used);
5916
1.19M
        }
5917
5918
        /* store the nbr 4x4 structure */
5919
5.13M
        ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
5920
5.13M
        ps_nbr_4x4->b1_intra_flag = 0;
5921
5.13M
        ps_nbr_4x4->b1_pred_l0_flag = 0;
5922
5.13M
        ps_nbr_4x4->b1_pred_l1_flag = 0;
5923
5924
        /* DC is default mode for inter cu, required for intra mode signalling */
5925
5.13M
        ps_nbr_4x4->b6_luma_intra_mode = 1;
5926
5927
        /* copy the motion vectors to neighbour structure */
5928
5.13M
        ps_nbr_4x4->mv = ps_pu->mv;
5929
5930
        /* copy the PU to final out pu */
5931
5.13M
        ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
5932
5933
        /* copy the PU to chroma */
5934
5.13M
        ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
5935
5936
        /* store the skip flag to final prms */
5937
5.13M
        ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
5938
5939
        /* MVP index & MVD calc is gated on skip/merge flag */
5940
5.13M
        if(0 == skip_or_merge_flag)
5941
1.19M
        {
5942
            /* calculate the MVDs and popluate the MVP idx for L0 */
5943
1.19M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
5944
1.07M
            {
5945
1.07M
                WORD32 idx0_cost, idx1_cost;
5946
5947
                /* calculate the ABS mvd for cand 0 */
5948
1.07M
                idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
5949
1.07M
                idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
5950
5951
                /* calculate the ABS mvd for cand 1 */
5952
1.07M
                if(u1_use_mvp_from_top_row)
5953
1.07M
                {
5954
1.07M
                    idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
5955
1.07M
                    idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
5956
1.07M
                }
5957
0
                else
5958
0
                {
5959
0
                    idx1_cost = INT_MAX;
5960
0
                }
5961
5962
                /* based on the least cost choose the mvp idx */
5963
1.07M
                if(idx0_cost <= idx1_cost)
5964
729k
                {
5965
729k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5966
729k
                        as_pred_mv[0].s_l0_mv.i2_mvx;
5967
729k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5968
729k
                        as_pred_mv[0].s_l0_mv.i2_mvy;
5969
5970
729k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
5971
729k
                }
5972
350k
                else
5973
350k
                {
5974
350k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5975
350k
                        as_pred_mv[1].s_l0_mv.i2_mvx;
5976
350k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5977
350k
                        as_pred_mv[1].s_l0_mv.i2_mvy;
5978
5979
350k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
5980
350k
                }
5981
5982
                /* set the pred l0 flag for neighbour storage */
5983
1.07M
                ps_nbr_4x4->b1_pred_l0_flag = 1;
5984
1.07M
            }
5985
            /* calculate the MVDs and popluate the MVP idx for L1 */
5986
1.19M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
5987
159k
            {
5988
159k
                WORD32 idx0_cost, idx1_cost;
5989
5990
                /* calculate the ABS mvd for cand 0 */
5991
159k
                idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
5992
159k
                idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
5993
5994
                /* calculate the ABS mvd for cand 1 */
5995
159k
                if(u1_use_mvp_from_top_row)
5996
159k
                {
5997
159k
                    idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
5998
159k
                    idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
5999
159k
                }
6000
0
                else
6001
0
                {
6002
0
                    idx1_cost = INT_MAX;
6003
0
                }
6004
6005
                /* based on the least cost choose the mvp idx */
6006
159k
                if(idx0_cost <= idx1_cost)
6007
106k
                {
6008
106k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6009
106k
                        as_pred_mv[0].s_l1_mv.i2_mvx;
6010
106k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6011
106k
                        as_pred_mv[0].s_l1_mv.i2_mvy;
6012
6013
106k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
6014
106k
                }
6015
52.3k
                else
6016
52.3k
                {
6017
52.3k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6018
52.3k
                        as_pred_mv[1].s_l1_mv.i2_mvx;
6019
52.3k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6020
52.3k
                        as_pred_mv[1].s_l1_mv.i2_mvy;
6021
6022
52.3k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
6023
52.3k
                }
6024
6025
                /* set the pred l1 flag for neighbour storage */
6026
159k
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6027
159k
            }
6028
6029
            /* set the merge flag to 0 */
6030
1.19M
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
6031
1.19M
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
6032
1.19M
        }
6033
3.93M
        else
6034
3.93M
        {
6035
            /* copy the merge index from candidate */
6036
3.93M
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
6037
6038
3.93M
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
6039
6040
3.93M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
6041
3.74M
            {
6042
                /* set the pred l0 flag for neighbour storage */
6043
3.74M
                ps_nbr_4x4->b1_pred_l0_flag = 1;
6044
3.74M
            }
6045
6046
            /* calculate the MVDs and popluate the MVP idx for L1 */
6047
3.93M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
6048
764k
            {
6049
                /* set the pred l1 flag for neighbour storage */
6050
764k
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6051
764k
            }
6052
3.93M
        }
6053
6054
        /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
6055
5.13M
        rdopt_cost = 0;
6056
6057
        /* copy the MV to colocated Mv structure */
6058
5.13M
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
6059
5.13M
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
6060
5.13M
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
6061
5.13M
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
6062
5.13M
        ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
6063
5.13M
        ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
6064
6065
        /* replicate neighbour 4x4 strcuture for entire partition */
6066
5.13M
        {
6067
5.13M
            WORD32 i, j;
6068
5.13M
            nbr_4x4_t *ps_tmp_4x4;
6069
6070
5.13M
            ps_tmp_4x4 = ps_nbr_4x4;
6071
6072
25.5M
            for(i = 0; i < (inter_pu_ht >> 2); i++)
6073
20.4M
            {
6074
137M
                for(j = 0; j < (inter_pu_wd >> 2); j++)
6075
116M
                {
6076
116M
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
6077
116M
                }
6078
                /* row level update*/
6079
20.4M
                ps_tmp_4x4 += (cu_size >> 2);
6080
20.4M
            }
6081
5.13M
        }
6082
        /* set the neighbour map to 1 */
6083
5.13M
        ihevce_set_inter_nbr_map(
6084
5.13M
            ps_ctxt->pu1_ctb_nbr_map,
6085
5.13M
            ps_ctxt->i4_nbr_map_strd,
6086
5.13M
            cu_pos_x,
6087
5.13M
            cu_pos_y,
6088
5.13M
            (inter_pu_wd >> 2),
6089
5.13M
            (inter_pu_ht >> 2),
6090
5.13M
            1);
6091
        /* ----------- Motion Compensation for Luma ----------- */
6092
#if !ENABLE_MIXED_INTER_MODE_EVAL
6093
        {
6094
            IV_API_CALL_STATUS_T valid_mv_cand;
6095
6096
            /*If the inter candidate is neither merge cand nor skip cand
6097
            then calculate the mc.*/
6098
            if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
6099
            {
6100
                valid_mv_cand =
6101
                    ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
6102
6103
                /* assert if the MC is given a valid mv candidate */
6104
                ASSERT(valid_mv_cand == IV_SUCCESS);
6105
            }
6106
        }
6107
#endif
6108
5.13M
        if((2 == num_cu_part) && (0 == ctr))
6109
764k
        {
6110
            /* 2Nx__ partion case */
6111
764k
            if(inter_pu_wd == cu_size)
6112
632k
            {
6113
632k
                cu_pos_y += (inter_pu_ht >> 2);
6114
632k
                pu1_pred += (inter_pu_ht * pred_stride);
6115
632k
                ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
6116
632k
                ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
6117
632k
                ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
6118
632k
                ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
6119
632k
            }
6120
6121
            /* __x2N partion case */
6122
764k
            if(inter_pu_ht == cu_size)
6123
131k
            {
6124
131k
                cu_pos_x += (inter_pu_wd >> 2);
6125
131k
                pu1_pred += inter_pu_wd;
6126
131k
                ps_nbr_4x4 += (inter_pu_wd >> 2);
6127
131k
                ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
6128
131k
                ps_top_nbr_4x4 += (inter_pu_wd >> 2);
6129
131k
                ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
6130
131k
                nbr_4x4_left_strd = (cu_size >> 2);
6131
131k
            }
6132
764k
        }
6133
5.13M
    }
6134
6135
4.37M
    return (rdopt_cost);
6136
4.37M
}
6137
6138
/*!
6139
******************************************************************************
6140
* \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
6141
*
6142
* \brief
6143
*    Coding unit processing function for chroma special modes (Non-Luma modes)
6144
*
6145
* \param[in] ps_ctxt       enc_loop module ctxt pointer
6146
* \param[in] ps_chrm_cu_buf_prms    ctxt having chroma related prms
6147
* \param[in] ps_cu_analyse      pointer to cu analyse
6148
* \param[in] rd_opt_curr_idx    index in the array of RDopt params
6149
* \param[in] tu_mode            TU_EQ_CU or other case
6150
*
6151
* \return
6152
*    Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
6153
*
6154
* \author
6155
*  Ittiam
6156
*
6157
*****************************************************************************
6158
*/
6159
UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
6160
    cu_analyse_t *ps_cu_analyse,
6161
    ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
6162
    pf_intra_pred *ppf_chroma_ip,
6163
    pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
6164
    UWORD8 *pu1_src,
6165
    WORD32 i4_src_stride,
6166
    UWORD8 *pu1_pred,
6167
    WORD32 i4_pred_stride,
6168
    UWORD8 *pu1_ctb_nbr_map,
6169
    WORD32 i4_nbr_map_strd,
6170
    UWORD8 *pu1_ref_sub_out,
6171
    WORD32 i4_alpha_stim_multiplier,
6172
    UWORD8 u1_is_cu_noisy,
6173
    UWORD8 u1_trans_size,
6174
    UWORD8 u1_trans_idx,
6175
    UWORD8 u1_num_tus_in_cu,
6176
    UWORD8 u1_num_4x4_luma_blks_in_tu,
6177
    UWORD8 u1_enable_psyRDOPT,
6178
    UWORD8 u1_is_422)
6179
2.19M
{
6180
2.19M
    UWORD8 u1_chrm_mode;
6181
2.19M
    UWORD8 ctr;
6182
2.19M
    WORD32 i4_subtu_idx;
6183
6184
2.19M
    WORD32 i = 0;
6185
2.19M
    UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
6186
2.19M
    WORD32 i4_satd_had[4] = { 0 };
6187
2.19M
    WORD32 i4_best_satd_had = INT_MAX;
6188
2.19M
    UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6189
2.19M
    UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6190
2.19M
    WORD32 i4_num_sub_tus = u1_is_422 + 1;
6191
2.19M
    UWORD8 u1_best_chrm_mode = 0;
6192
6193
    /* Get the best satd among all possible modes */
6194
10.9M
    for(i = 0; i < 4; i++)
6195
8.77M
    {
6196
8.77M
        WORD32 left_strd = i4_src_stride;
6197
6198
8.77M
        u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
6199
8.77M
                                        : u1_chrm_modes[i];
6200
6201
        /* loop based on num tus in a cu */
6202
22.7M
        for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
6203
13.9M
        {
6204
13.9M
            WORD32 luma_nbr_flags;
6205
13.9M
            WORD32 chrm_pred_func_idx;
6206
6207
13.9M
            WORD32 i4_trans_size_m2 = u1_trans_size << 1;
6208
13.9M
            UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
6209
13.9M
                                 (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
6210
13.9M
            UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
6211
13.9M
                                  (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
6212
13.9M
            WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
6213
13.9M
            WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
6214
6215
13.9M
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6216
13.9M
                pu1_ctb_nbr_map,
6217
13.9M
                i4_nbr_map_strd,
6218
13.9M
                i4_curr_tu_pos_x,
6219
13.9M
                i4_curr_tu_pos_y,
6220
13.9M
                u1_num_4x4_luma_blks_in_tu,
6221
13.9M
                u1_num_4x4_luma_blks_in_tu);
6222
6223
27.9M
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6224
13.9M
            {
6225
13.9M
                WORD32 nbr_flags;
6226
6227
13.9M
                UWORD8 *pu1_cur_src =
6228
13.9M
                    pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
6229
13.9M
                UWORD8 *pu1_cur_pred =
6230
13.9M
                    pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
6231
13.9M
                UWORD8 *pu1_left = pu1_cur_src - 2;
6232
13.9M
                UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
6233
13.9M
                UWORD8 *pu1_top_left = pu1_top - 2;
6234
6235
13.9M
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6236
13.9M
                    luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
6237
6238
                /* call the chroma reference array substitution */
6239
13.9M
                pf_ref_substitution(
6240
13.9M
                    pu1_top_left,
6241
13.9M
                    pu1_top,
6242
13.9M
                    pu1_left,
6243
13.9M
                    left_strd,
6244
13.9M
                    u1_trans_size,
6245
13.9M
                    nbr_flags,
6246
13.9M
                    pu1_ref_sub_out,
6247
13.9M
                    1);
6248
6249
                /* use the look up to get the function idx */
6250
13.9M
                chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
6251
6252
                /* call the intra prediction function */
6253
13.9M
                ppf_chroma_ip[chrm_pred_func_idx](
6254
13.9M
                    pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
6255
6256
13.9M
                if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
6257
13.9M
                {
6258
                    /* compute Hadamard-transform satd : Cb */
6259
13.9M
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6260
13.9M
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6261
6262
                    /* compute Hadamard-transform satd : Cr */
6263
13.9M
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6264
13.9M
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6265
13.9M
                }
6266
0
                else
6267
0
                {
6268
0
                    WORD32 i4_satd;
6269
6270
                    /* compute Hadamard-transform satd : Cb */
6271
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6272
0
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6273
6274
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6275
0
                        pu1_cur_src,
6276
0
                        i4_src_stride,
6277
0
                        pu1_cur_pred,
6278
0
                        i4_pred_stride,
6279
0
                        i4_satd,
6280
0
                        i4_alpha_stim_multiplier,
6281
0
                        u1_trans_size,
6282
0
                        0,
6283
0
                        u1_enable_psyRDOPT,
6284
0
                        U_PLANE);
6285
6286
0
                    i4_satd_had[i] += i4_satd;
6287
6288
                    /* compute Hadamard-transform satd : Cr */
6289
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6290
0
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6291
6292
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6293
0
                        pu1_cur_src,
6294
0
                        i4_src_stride,
6295
0
                        pu1_cur_pred,
6296
0
                        i4_pred_stride,
6297
0
                        i4_satd,
6298
0
                        i4_alpha_stim_multiplier,
6299
0
                        u1_trans_size,
6300
0
                        0,
6301
0
                        u1_enable_psyRDOPT,
6302
0
                        V_PLANE);
6303
6304
0
                    i4_satd_had[i] += i4_satd;
6305
0
                }
6306
13.9M
            }
6307
6308
            /* set the neighbour map to 1 */
6309
13.9M
            ihevce_set_nbr_map(
6310
13.9M
                pu1_ctb_nbr_map,
6311
13.9M
                i4_nbr_map_strd,
6312
13.9M
                i4_curr_tu_pos_x,
6313
13.9M
                i4_curr_tu_pos_y,
6314
13.9M
                u1_num_4x4_luma_blks_in_tu,
6315
13.9M
                1);
6316
13.9M
        }
6317
6318
        /* set the neighbour map to 0 */
6319
8.77M
        ihevce_set_nbr_map(
6320
8.77M
            pu1_ctb_nbr_map,
6321
8.77M
            i4_nbr_map_strd,
6322
8.77M
            (ps_cu_analyse->b3_cu_pos_x << 1),
6323
8.77M
            (ps_cu_analyse->b3_cu_pos_y << 1),
6324
8.77M
            (ps_cu_analyse->u1_cu_size >> 2),
6325
8.77M
            0);
6326
6327
        /* Get the least SATD and corresponding mode */
6328
8.77M
        if(i4_best_satd_had > i4_satd_had[i])
6329
2.56M
        {
6330
2.56M
            i4_best_satd_had = i4_satd_had[i];
6331
2.56M
            u1_best_chrm_mode = u1_chrm_mode;
6332
2.56M
        }
6333
8.77M
    }
6334
6335
2.19M
    return u1_best_chrm_mode;
6336
2.19M
}
6337
6338
void ihevce_intra_chroma_pred_mode_selector(
6339
    ihevce_enc_loop_ctxt_t *ps_ctxt,
6340
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
6341
    cu_analyse_t *ps_cu_analyse,
6342
    WORD32 rd_opt_curr_idx,
6343
    WORD32 tu_mode,
6344
    WORD32 i4_alpha_stim_multiplier,
6345
    UWORD8 u1_is_cu_noisy)
6346
2.19M
{
6347
2.19M
    chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
6348
6349
2.19M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
6350
6351
2.19M
    UWORD8 *pu1_pred;
6352
2.19M
    WORD32 trans_size;
6353
2.19M
    WORD32 num_tus_in_cu;
6354
2.19M
    WORD32 pred_strd;
6355
2.19M
    WORD32 ctr;
6356
2.19M
    WORD32 i4_subtu_idx;
6357
2.19M
    WORD32 i4_num_sub_tus;
6358
2.19M
    WORD32 trans_idx;
6359
2.19M
    WORD32 scan_idx;
6360
2.19M
    WORD32 num_4x4_luma_in_tu;
6361
2.19M
    WORD32 cu_pos_x;
6362
2.19M
    WORD32 cu_pos_y;
6363
6364
2.19M
    recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
6365
2.19M
                                                  &ps_ctxt->as_cu_prms[1].s_recon_datastore };
6366
6367
2.19M
    LWORD64 chrm_cod_cost = 0;
6368
2.19M
    WORD32 chrm_tu_bits = 0;
6369
2.19M
    WORD32 best_chrm_mode = DM_CHROMA_IDX;
6370
2.19M
    UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
6371
2.19M
    WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
6372
2.19M
    UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
6373
2.19M
    UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
6374
2.19M
    UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
6375
2.19M
    WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
6376
2.19M
    WORD32 cu_size = ps_cu_analyse->u1_cu_size;
6377
2.19M
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
6378
2.19M
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
6379
2.19M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
6380
6381
2.19M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
6382
2.19M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
6383
2.19M
    i4_num_sub_tus = (u1_is_422 == 1) + 1;
6384
6385
#if DISABLE_RDOQ_INTRA
6386
    i4_perform_rdoq = 0;
6387
#endif
6388
6389
2.19M
    if(TU_EQ_CU == tu_mode)
6390
1.75M
    {
6391
1.75M
        num_tus_in_cu = 1;
6392
1.75M
        trans_size = cu_size >> 1;
6393
1.75M
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6394
1.75M
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6395
1.75M
    }
6396
434k
    else
6397
434k
    {
6398
434k
        num_tus_in_cu = 4;
6399
434k
        trans_size = cu_size >> 2;
6400
434k
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6401
6402
        /* For 8x8 CU only one TU */
6403
434k
        if(MIN_TU_SIZE > trans_size)
6404
0
        {
6405
0
            trans_size = MIN_TU_SIZE;
6406
0
            num_tus_in_cu = 1;
6407
            /* chroma nbr avail. is derived based on luma.
6408
            for 4x4 chrm use 8x8 luma's size */
6409
0
            num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
6410
0
        }
6411
6412
434k
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6413
434k
    }
6414
6415
    /* Can't be TU_EQ_SUBCU case */
6416
2.19M
    ASSERT(TU_EQ_SUBCU != tu_mode);
6417
6418
    /* translate the transform size to index */
6419
2.19M
    trans_idx = trans_size >> 2;
6420
6421
2.19M
    pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
6422
6423
2.19M
    pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
6424
6425
    /* for 16x16 cases */
6426
2.19M
    if(16 == trans_size)
6427
363k
    {
6428
363k
        trans_idx = 3;
6429
363k
    }
6430
6431
2.19M
    best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
6432
2.19M
        ps_cu_analyse,
6433
2.19M
        ihevc_intra_pred_chroma_ref_substitution_fptr,
6434
2.19M
        ps_ctxt->apf_chrm_ip,
6435
2.19M
        ps_ctxt->apf_chrm_resd_trns_had,
6436
2.19M
        pu1_chrm_src,
6437
2.19M
        chrm_src_stride,
6438
2.19M
        pu1_pred,
6439
2.19M
        pred_strd,
6440
2.19M
        ps_ctxt->pu1_ctb_nbr_map,
6441
2.19M
        ps_ctxt->i4_nbr_map_strd,
6442
2.19M
        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6443
2.19M
        i4_alpha_stim_multiplier,
6444
2.19M
        u1_is_cu_noisy,
6445
2.19M
        trans_size,
6446
2.19M
        trans_idx,
6447
2.19M
        num_tus_in_cu,
6448
2.19M
        num_4x4_luma_in_tu,
6449
2.19M
        ps_ctxt->u1_enable_psyRDOPT,
6450
2.19M
        u1_is_422);
6451
6452
    /* Store the best chroma mode */
6453
2.19M
    ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
6454
6455
    /* evaluate RDOPT cost for the Best mode */
6456
2.19M
    {
6457
2.19M
        WORD32 i4_subtu_pos_x;
6458
2.19M
        WORD32 i4_subtu_pos_y;
6459
2.19M
        UWORD8 u1_compute_spatial_ssd;
6460
6461
2.19M
        WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
6462
2.19M
        WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
6463
        /* State for prefix bin of chroma intra pred mode before CU encode */
6464
2.19M
        UWORD8 u1_chroma_intra_mode_prefix_state =
6465
2.19M
            ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
6466
2.19M
        WORD32 luma_trans_size = trans_size << 1;
6467
2.19M
        WORD32 calc_recon = 0;
6468
2.19M
        UWORD8 *pu1_left = pu1_cu_left;
6469
2.19M
        UWORD8 *pu1_top = pu1_cu_top;
6470
2.19M
        UWORD8 *pu1_top_left = pu1_cu_top_left;
6471
2.19M
        WORD32 left_strd = cu_left_stride;
6472
6473
2.19M
        if(ps_ctxt->i1_cu_qp_delta_enable)
6474
938k
        {
6475
938k
            ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1);
6476
938k
        }
6477
6478
2.19M
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
6479
2.19M
                                 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
6480
2.19M
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6481
6482
2.19M
        if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
6483
0
        {
6484
0
            u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
6485
0
                                     CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6486
0
        }
6487
6488
        /* get the 4x4 level postion of current cu */
6489
2.19M
        cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6490
2.19M
        cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6491
6492
2.19M
        calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
6493
6494
2.19M
        if(calc_recon || u1_compute_spatial_ssd)
6495
1.57M
        {
6496
1.57M
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6497
1.57M
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6498
1.57M
        }
6499
617k
        else
6500
617k
        {
6501
617k
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6502
617k
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6503
617k
        }
6504
6505
        /* loop based on num tus in a cu */
6506
5.69M
        for(ctr = 0; ctr < num_tus_in_cu; ctr++)
6507
3.49M
        {
6508
3.49M
            WORD16 *pi2_cur_deq_data_cb;
6509
3.49M
            WORD16 *pi2_cur_deq_data_cr;
6510
6511
3.49M
            WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
6512
3.49M
            WORD32 luma_nbr_flags = 0;
6513
6514
3.49M
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6515
3.49M
                ps_ctxt->pu1_ctb_nbr_map,
6516
3.49M
                ps_ctxt->i4_nbr_map_strd,
6517
3.49M
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6518
3.49M
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6519
3.49M
                (luma_trans_size >> 2),
6520
3.49M
                (luma_trans_size >> 2));
6521
6522
6.99M
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6523
3.49M
            {
6524
3.49M
                WORD32 cbf, num_bytes;
6525
3.49M
                LWORD64 trans_ssd_u, trans_ssd_v;
6526
3.49M
                UWORD8 u1_is_recon_available;
6527
6528
3.49M
                WORD32 trans_size_m2 = trans_size << 1;
6529
3.49M
                UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
6530
3.49M
                                      (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
6531
3.49M
                                      (i4_subtu_idx * trans_size * chrm_src_stride);
6532
3.49M
                UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
6533
3.49M
                                       (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
6534
3.49M
                                       (i4_subtu_idx * trans_size * pred_strd);
6535
3.49M
                WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6536
3.49M
                UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
6537
3.49M
                                             ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
6538
3.49M
                                        ((ctr & 1) * trans_size_m2) +
6539
3.49M
                                        (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
6540
3.49M
                                        (i4_subtu_idx * trans_size * i4_recon_stride);
6541
6542
                /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
6543
                chroma coeff/iq for high quality intra SATD special modes. Will
6544
                be over written by coeff of luma mode in chroma_rdopt call */
6545
3.49M
                UWORD8 *pu1_ecd_data_cb =
6546
3.49M
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
6547
3.49M
                UWORD8 *pu1_ecd_data_cr =
6548
3.49M
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
6549
6550
3.49M
                WORD32 chrm_pred_func_idx = 0;
6551
3.49M
                LWORD64 curr_cb_cod_cost = 0;
6552
3.49M
                LWORD64 curr_cr_cod_cost = 0;
6553
3.49M
                WORD32 nbr_flags = 0;
6554
6555
3.49M
                i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
6556
3.49M
                i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
6557
3.49M
                                 ((i4_subtu_idx * trans_size) >> 2);
6558
3.49M
                pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
6559
3.49M
                                      ((ctr & 1) * trans_size) +
6560
3.49M
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6561
3.49M
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6562
3.49M
                pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
6563
3.49M
                                      ((ctr & 1) * trans_size) +
6564
3.49M
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6565
3.49M
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6566
6567
                /* left cu boundary */
6568
3.49M
                if(0 == i4_subtu_pos_x)
6569
2.62M
                {
6570
2.62M
                    left_strd = cu_left_stride;
6571
2.62M
                    pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
6572
2.62M
                }
6573
869k
                else
6574
869k
                {
6575
869k
                    pu1_left = pu1_cur_recon - 2;
6576
869k
                    left_strd = i4_recon_stride;
6577
869k
                }
6578
6579
                /* top cu boundary */
6580
3.49M
                if(0 == i4_subtu_pos_y)
6581
2.62M
                {
6582
2.62M
                    pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
6583
2.62M
                }
6584
869k
                else
6585
869k
                {
6586
869k
                    pu1_top = pu1_cur_recon - i4_recon_stride;
6587
869k
                }
6588
6589
                /* by default top left is set to cu top left */
6590
3.49M
                pu1_top_left = pu1_cu_top_left;
6591
6592
                /* top left based on position */
6593
3.49M
                if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
6594
434k
                {
6595
434k
                    pu1_top_left = pu1_left - left_strd;
6596
434k
                }
6597
3.06M
                else if(0 != i4_subtu_pos_x)
6598
869k
                {
6599
869k
                    pu1_top_left = pu1_top - 2;
6600
869k
                }
6601
6602
                /* populate the coeffs scan idx */
6603
3.49M
                scan_idx = SCAN_DIAG_UPRIGHT;
6604
6605
                /* RDOPT copy States :  TU init (best until prev TU) to current */
6606
3.49M
                COPY_CABAC_STATES(
6607
3.49M
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6608
3.49M
                         .s_cabac_ctxt.au1_ctxt_models[0],
6609
3.49M
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6610
3.49M
                    IHEVC_CAB_CTXT_END);
6611
6612
                /* for 4x4 transforms based on intra pred mode scan is choosen*/
6613
3.49M
                if(4 == trans_size)
6614
1.63M
                {
6615
                    /* for modes from 22 upto 30 horizontal scan is used */
6616
1.63M
                    if((best_chrm_mode > 21) && (best_chrm_mode < 31))
6617
52.1k
                    {
6618
52.1k
                        scan_idx = SCAN_HORZ;
6619
52.1k
                    }
6620
                    /* for modes from 6 upto 14 horizontal scan is used */
6621
1.58M
                    else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
6622
109k
                    {
6623
109k
                        scan_idx = SCAN_VERT;
6624
109k
                    }
6625
1.63M
                }
6626
6627
3.49M
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6628
3.49M
                    luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
6629
6630
                /* call the chroma reference array substitution */
6631
3.49M
                ihevc_intra_pred_chroma_ref_substitution_fptr(
6632
3.49M
                    pu1_top_left,
6633
3.49M
                    pu1_top,
6634
3.49M
                    pu1_left,
6635
3.49M
                    left_strd,
6636
3.49M
                    trans_size,
6637
3.49M
                    nbr_flags,
6638
3.49M
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6639
3.49M
                    1);
6640
6641
                /* use the look up to get the function idx */
6642
3.49M
                chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
6643
6644
                /* call the intra prediction function */
6645
3.49M
                ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
6646
3.49M
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6647
3.49M
                    1,
6648
3.49M
                    pu1_cur_pred,
6649
3.49M
                    pred_strd,
6650
3.49M
                    trans_size,
6651
3.49M
                    best_chrm_mode);
6652
6653
                /* UPLANE RDOPT Loop */
6654
3.49M
                {
6655
3.49M
                    WORD32 tu_bits;
6656
6657
3.49M
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6658
3.49M
                        ps_ctxt,
6659
3.49M
                        pu1_cur_pred,
6660
3.49M
                        pred_strd,
6661
3.49M
                        pu1_cur_src,
6662
3.49M
                        chrm_src_stride,
6663
3.49M
                        pi2_cur_deq_data_cb,
6664
3.49M
                        deq_data_strd,
6665
3.49M
                        pu1_cur_recon,
6666
3.49M
                        i4_recon_stride,
6667
3.49M
                        pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
6668
3.49M
                        ps_ctxt->au1_cu_csbf,
6669
3.49M
                        ps_ctxt->i4_cu_csbf_strd,
6670
3.49M
                        trans_size,
6671
3.49M
                        scan_idx,
6672
3.49M
                        1,
6673
3.49M
                        &num_bytes,
6674
3.49M
                        &tu_bits,
6675
3.49M
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6676
3.49M
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6677
3.49M
                        &u1_is_recon_available,
6678
3.49M
                        i4_perform_sbh,
6679
3.49M
                        i4_perform_rdoq,
6680
3.49M
                        &trans_ssd_u,
6681
3.49M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6682
3.49M
                        i4_alpha_stim_multiplier,
6683
3.49M
                        u1_is_cu_noisy,
6684
3.49M
#endif
6685
3.49M
                        0,
6686
3.49M
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6687
3.49M
                        U_PLANE);
6688
6689
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6690
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6691
                    {
6692
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6693
                        trans_ssd_u = ihevce_inject_stim_into_distortion(
6694
                            pu1_cur_src,
6695
                            chrm_src_stride,
6696
                            pu1_cur_pred,
6697
                            pred_strd,
6698
                            trans_ssd_u,
6699
                            i4_alpha_stim_multiplier,
6700
                            trans_size,
6701
                            0,
6702
                            ps_ctxt->u1_enable_psyRDOPT,
6703
                            U_PLANE);
6704
#else
6705
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6706
                        {
6707
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6708
                                pu1_cur_src,
6709
                                chrm_src_stride,
6710
                                pu1_cur_recon,
6711
                                i4_recon_stride,
6712
                                trans_ssd_u,
6713
                                i4_alpha_stim_multiplier,
6714
                                trans_size,
6715
                                0,
6716
                                ps_ctxt->u1_enable_psyRDOPT,
6717
                                U_PLANE);
6718
                        }
6719
                        else
6720
                        {
6721
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6722
                                pu1_cur_src,
6723
                                chrm_src_stride,
6724
                                pu1_cur_pred,
6725
                                pred_strd,
6726
                                trans_ssd_u,
6727
                                i4_alpha_stim_multiplier,
6728
                                trans_size,
6729
                                0,
6730
                                ps_ctxt->u1_enable_psyRDOPT,
6731
                                U_PLANE);
6732
                        }
6733
#endif
6734
                    }
6735
#endif
6736
6737
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6738
3.49M
                    if(0 != cbf)
6739
371k
                    {
6740
371k
                        memcpy(
6741
371k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6742
371k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6743
371k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6744
371k
                            IHEVC_CAB_CTXT_END);
6745
371k
                    }
6746
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6747
3.12M
                    else
6748
3.12M
                    {
6749
3.12M
                        memcpy(
6750
3.12M
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6751
3.12M
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6752
3.12M
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6753
3.12M
                            IHEVC_CAB_CTXT_END);
6754
3.12M
                    }
6755
6756
3.49M
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6757
533k
                    {
6758
533k
                        ihevce_chroma_it_recon_fxn(
6759
533k
                            ps_ctxt,
6760
533k
                            pi2_cur_deq_data_cb,
6761
533k
                            deq_data_strd,
6762
533k
                            pu1_cur_pred,
6763
533k
                            pred_strd,
6764
533k
                            pu1_cur_recon,
6765
533k
                            i4_recon_stride,
6766
533k
                            (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
6767
533k
                            trans_size,
6768
533k
                            cbf,
6769
533k
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6770
533k
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6771
533k
                            U_PLANE);
6772
533k
                    }
6773
6774
3.49M
                    ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
6775
3.49M
                    curr_cb_cod_cost =
6776
3.49M
                        trans_ssd_u +
6777
3.49M
                        COMPUTE_RATE_COST_CLIP30(
6778
3.49M
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6779
3.49M
                    chrm_tu_bits += tu_bits;
6780
3.49M
                    ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
6781
3.49M
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
6782
3.49M
                        num_bytes;
6783
3.49M
                }
6784
6785
                /* VPLANE RDOPT Loop */
6786
3.49M
                {
6787
3.49M
                    WORD32 tu_bits;
6788
6789
3.49M
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6790
3.49M
                        ps_ctxt,
6791
3.49M
                        pu1_cur_pred,
6792
3.49M
                        pred_strd,
6793
3.49M
                        pu1_cur_src,
6794
3.49M
                        chrm_src_stride,
6795
3.49M
                        pi2_cur_deq_data_cr,
6796
3.49M
                        deq_data_strd,
6797
3.49M
                        pu1_cur_recon,
6798
3.49M
                        i4_recon_stride,
6799
3.49M
                        pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
6800
3.49M
                        ps_ctxt->au1_cu_csbf,
6801
3.49M
                        ps_ctxt->i4_cu_csbf_strd,
6802
3.49M
                        trans_size,
6803
3.49M
                        scan_idx,
6804
3.49M
                        1,
6805
3.49M
                        &num_bytes,
6806
3.49M
                        &tu_bits,
6807
3.49M
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6808
3.49M
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6809
3.49M
                        &u1_is_recon_available,
6810
3.49M
                        i4_perform_sbh,
6811
3.49M
                        i4_perform_rdoq,
6812
3.49M
                        &trans_ssd_v,
6813
3.49M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6814
3.49M
                        i4_alpha_stim_multiplier,
6815
3.49M
                        u1_is_cu_noisy,
6816
3.49M
#endif
6817
3.49M
                        0,
6818
3.49M
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6819
3.49M
                        V_PLANE);
6820
6821
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6822
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6823
                    {
6824
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6825
                        trans_ssd_v = ihevce_inject_stim_into_distortion(
6826
                            pu1_cur_src,
6827
                            chrm_src_stride,
6828
                            pu1_cur_pred,
6829
                            pred_strd,
6830
                            trans_ssd_v,
6831
                            i4_alpha_stim_multiplier,
6832
                            trans_size,
6833
                            0,
6834
                            ps_ctxt->u1_enable_psyRDOPT,
6835
                            V_PLANE);
6836
#else
6837
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6838
                        {
6839
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6840
                                pu1_cur_src,
6841
                                chrm_src_stride,
6842
                                pu1_cur_recon,
6843
                                i4_recon_stride,
6844
                                trans_ssd_v,
6845
                                i4_alpha_stim_multiplier,
6846
                                trans_size,
6847
                                0,
6848
                                ps_ctxt->u1_enable_psyRDOPT,
6849
                                V_PLANE);
6850
                        }
6851
                        else
6852
                        {
6853
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6854
                                pu1_cur_src,
6855
                                chrm_src_stride,
6856
                                pu1_cur_pred,
6857
                                pred_strd,
6858
                                trans_ssd_v,
6859
                                i4_alpha_stim_multiplier,
6860
                                trans_size,
6861
                                0,
6862
                                ps_ctxt->u1_enable_psyRDOPT,
6863
                                V_PLANE);
6864
                        }
6865
#endif
6866
                    }
6867
#endif
6868
6869
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6870
3.49M
                    if(0 != cbf)
6871
360k
                    {
6872
360k
                        COPY_CABAC_STATES(
6873
360k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6874
360k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6875
360k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6876
360k
                            IHEVC_CAB_CTXT_END);
6877
360k
                    }
6878
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6879
3.13M
                    else
6880
3.13M
                    {
6881
3.13M
                        COPY_CABAC_STATES(
6882
3.13M
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6883
3.13M
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6884
3.13M
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6885
3.13M
                            IHEVC_CAB_CTXT_END);
6886
3.13M
                    }
6887
6888
3.49M
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6889
533k
                    {
6890
533k
                        ihevce_chroma_it_recon_fxn(
6891
533k
                            ps_ctxt,
6892
533k
                            pi2_cur_deq_data_cr,
6893
533k
                            deq_data_strd,
6894
533k
                            pu1_cur_pred,
6895
533k
                            pred_strd,
6896
533k
                            pu1_cur_recon,
6897
533k
                            i4_recon_stride,
6898
533k
                            (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
6899
533k
                            trans_size,
6900
533k
                            cbf,
6901
533k
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6902
533k
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6903
533k
                            V_PLANE);
6904
533k
                    }
6905
6906
3.49M
                    ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
6907
3.49M
                    curr_cr_cod_cost =
6908
3.49M
                        trans_ssd_v +
6909
3.49M
                        COMPUTE_RATE_COST_CLIP30(
6910
3.49M
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6911
3.49M
                    chrm_tu_bits += tu_bits;
6912
3.49M
                    ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
6913
3.49M
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
6914
3.49M
                        num_bytes;
6915
3.49M
                }
6916
6917
3.49M
                chrm_cod_cost += curr_cb_cod_cost;
6918
3.49M
                chrm_cod_cost += curr_cr_cod_cost;
6919
3.49M
            }
6920
6921
            /* set the neighbour map to 1 */
6922
3.49M
            ihevce_set_nbr_map(
6923
3.49M
                ps_ctxt->pu1_ctb_nbr_map,
6924
3.49M
                ps_ctxt->i4_nbr_map_strd,
6925
3.49M
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6926
3.49M
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6927
3.49M
                (luma_trans_size >> 2),
6928
3.49M
                1);
6929
3.49M
        }
6930
6931
        /* set the neighbour map to 0 */
6932
2.19M
        ihevce_set_nbr_map(
6933
2.19M
            ps_ctxt->pu1_ctb_nbr_map,
6934
2.19M
            ps_ctxt->i4_nbr_map_strd,
6935
2.19M
            (ps_cu_analyse->b3_cu_pos_x << 1),
6936
2.19M
            (ps_cu_analyse->b3_cu_pos_y << 1),
6937
2.19M
            (ps_cu_analyse->u1_cu_size >> 2),
6938
2.19M
            0);
6939
6940
        /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
6941
        /* This is done by adding the bits for signalling chroma mode (0-3)    */
6942
        /* and subtracting the bits for chroma mode same as luma mode (4)      */
6943
2.19M
#if CHROMA_RDOPT_ENABLE
6944
2.19M
        {
6945
            /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
6946
2.19M
            WORD32 bits_frac_1 =
6947
2.19M
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
6948
6949
2.19M
            WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
6950
6951
            /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
6952
2.19M
            WORD32 bits_for_mode4 =
6953
2.19M
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
6954
6955
            /* accumulate into final rd cost for chroma */
6956
2.19M
            ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
6957
2.19M
                (bits_for_mode_0to3 - bits_for_mode4),
6958
2.19M
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
6959
2.19M
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
6960
6961
2.19M
            chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
6962
2.19M
        }
6963
2.19M
#endif
6964
6965
2.19M
        if(ps_ctxt->u1_enable_psyRDOPT)
6966
0
        {
6967
0
            UWORD8 *pu1_recon_cu;
6968
0
            WORD32 recon_stride;
6969
0
            WORD32 curr_pos_x;
6970
0
            WORD32 curr_pos_y;
6971
0
            WORD32 start_index;
6972
0
            WORD32 num_horz_cu_in_ctb;
6973
0
            WORD32 had_block_size;
6974
6975
            /* tODO: sreenivasa ctb size has to be used appropriately */
6976
0
            had_block_size = 8;
6977
0
            num_horz_cu_in_ctb = 2 * 64 / had_block_size;
6978
0
            curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6979
0
            curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6980
0
            recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6981
0
            pu1_recon_cu =
6982
0
                aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)];  //
6983
6984
            /* start index to index the source satd of curr cu int he current ctb*/
6985
0
            start_index = 2 * (curr_pos_x / had_block_size) +
6986
0
                          (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
6987
6988
0
            {
6989
0
                chrm_cod_cost += ihevce_psy_rd_cost_croma(
6990
0
                    ps_ctxt->ai4_source_chroma_satd,
6991
0
                    pu1_recon_cu,
6992
0
                    recon_stride,
6993
0
                    1,  //
6994
0
                    cu_size,
6995
0
                    0,  // pic type
6996
0
                    0,  //layer id
6997
0
                    ps_ctxt->i4_satd_lamda,  // lambda
6998
0
                    start_index,
6999
0
                    ps_ctxt->u1_is_input_data_hbd,  // 8 bit
7000
0
                    ps_ctxt->u1_chroma_array_type,
7001
0
                    &ps_ctxt->s_cmn_opt_func
7002
7003
0
                );  // chroma subsampling 420
7004
0
            }
7005
0
        }
7006
7007
2.19M
        ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
7008
2.19M
        ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
7009
7010
2.19M
        memcpy(
7011
2.19M
            &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
7012
2.19M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7013
2.19M
            IHEVC_CAB_CTXT_END);
7014
2.19M
    }
7015
2.19M
}
7016
7017
/*!
7018
******************************************************************************
7019
* \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
7020
*
7021
* \brief
7022
*    Coding unit processing function for chroma
7023
*
7024
* \param[in] ps_ctxt    enc_loop module ctxt pointer
7025
* \param[in] rd_opt_curr_idx index in the array of RDopt params
7026
* \param[in] func_proc_mode TU_EQ_CU or other case
7027
* \param[in] pu1_chrm_src  pointer to source data buffer
7028
* \param[in] chrm_src_stride   source buffer stride
7029
* \param[in] pu1_cu_left pointer to left recon data buffer
7030
* \param[in] pu1_cu_top  pointer to top recon data buffer
7031
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
7032
* \param[in] left_stride left recon buffer stride
7033
* \param[out] cu_pos_x position x of current CU in CTB
7034
* \param[out] cu_pos_y position y of current CU in CTB
7035
* \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
7036
*
7037
* \return
7038
*    Chroma coding cost (cb adn Cr included)
7039
*
7040
* \author
7041
*  Ittiam
7042
*
7043
*****************************************************************************
7044
*/
7045
LWORD64 ihevce_chroma_cu_prcs_rdopt(
7046
    ihevce_enc_loop_ctxt_t *ps_ctxt,
7047
    WORD32 rd_opt_curr_idx,
7048
    WORD32 func_proc_mode,
7049
    UWORD8 *pu1_chrm_src,
7050
    WORD32 chrm_src_stride,
7051
    UWORD8 *pu1_cu_left,
7052
    UWORD8 *pu1_cu_top,
7053
    UWORD8 *pu1_cu_top_left,
7054
    WORD32 cu_left_stride,
7055
    WORD32 cu_pos_x,
7056
    WORD32 cu_pos_y,
7057
    WORD32 *pi4_chrm_tu_bits,
7058
    WORD32 i4_alpha_stim_multiplier,
7059
    UWORD8 u1_is_cu_noisy)
7060
10.6M
{
7061
10.6M
    tu_enc_loop_out_t *ps_tu;
7062
10.6M
    tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
7063
7064
10.6M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
7065
7066
10.6M
    UWORD8 *pu1_pred;
7067
10.6M
    UWORD8 *pu1_recon;
7068
10.6M
    WORD32 i4_recon_stride;
7069
10.6M
    WORD32 cu_size, trans_size = 0;
7070
10.6M
    WORD32 pred_strd;
7071
10.6M
    WORD32 ctr, i4_subtu_idx;
7072
10.6M
    WORD32 scan_idx;
7073
10.6M
    WORD32 u1_is_cu_coded_old;
7074
10.6M
    WORD32 init_bytes_offset;
7075
7076
10.6M
    enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
7077
10.6M
    recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
7078
7079
10.6M
    WORD32 total_bytes_offset = 0;
7080
10.6M
    LWORD64 chrm_cod_cost = 0;
7081
10.6M
    WORD32 chrm_tu_bits = 0;
7082
10.6M
    WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
7083
10.6M
    LWORD64 i8_ssd_cb = 0;
7084
10.6M
    WORD32 i4_bits_cb = 0;
7085
10.6M
    LWORD64 i8_ssd_cr = 0;
7086
10.6M
    WORD32 i4_bits_cr = 0;
7087
10.6M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
7088
10.6M
    UWORD8 u1_num_tus =
7089
        /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
7090
10.6M
        (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
7091
10.6M
            ? 1
7092
10.6M
            : ps_best_cu_prms->u2_num_tus_in_cu;
7093
10.6M
    UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
7094
10.6M
    UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
7095
10.6M
                                    (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
7096
10.6M
                                    CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7097
    /* Get the RDOPT cost of the best CU mode for early_exit */
7098
10.6M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
7099
    /* Get the current running RDOPT (Luma RDOPT) for early_exit */
7100
10.6M
    LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
7101
10.6M
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
7102
10.6M
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
7103
7104
10.6M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
7105
10.6M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
7106
7107
10.6M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
7108
0
    {
7109
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
7110
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7111
0
    }
7112
7113
    /* Store the init bytes offset from luma */
7114
10.6M
    init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
7115
7116
    /* Unused pred buffer in merge_skip_pred_data_t structure is used as
7117
    Chroma pred storage buf. for final_recon function.
7118
    The buffer is split into two and used as a ping-pong buffer */
7119
10.6M
    pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
7120
10.6M
               rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
7121
10.6M
                                  (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
7122
7123
10.6M
    pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
7124
7125
10.6M
    pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
7126
10.6M
    i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
7127
10.6M
    cu_size = ps_best_cu_prms->u1_cu_size;
7128
10.6M
    chrm_tu_bits = 0;
7129
7130
    /* get the first TU pointer */
7131
10.6M
    ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7132
    /* get the first TU enc_loop temp prms pointer */
7133
10.6M
    ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7134
7135
10.6M
    if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7136
7.06M
    {
7137
        /* Mode signalled by intra prediction for luma */
7138
7.06M
        luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
7139
7140
#if DISABLE_RDOQ_INTRA
7141
        i4_perform_rdoq = 0;
7142
#endif
7143
7.06M
    }
7144
7145
3.54M
    else
7146
3.54M
    {
7147
3.54M
        UWORD8 *pu1_pred_org = pu1_pred;
7148
7149
        /* ------ Motion Compensation for Chroma -------- */
7150
7.85M
        for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
7151
4.30M
        {
7152
4.30M
            pu_t *ps_pu;
7153
4.30M
            WORD32 inter_pu_wd;
7154
4.30M
            WORD32 inter_pu_ht;
7155
7156
4.30M
            ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
7157
7158
4.30M
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
7159
4.30M
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
7160
4.30M
            inter_pu_ht <<= u1_is_422;
7161
7162
4.30M
            ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
7163
7164
4.30M
            if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
7165
1.51M
            {
7166
                /* 2Nx__ partion case */
7167
1.51M
                if(inter_pu_wd == cu_size)
7168
1.25M
                {
7169
1.25M
                    pu1_pred += (inter_pu_ht * pred_strd);
7170
1.25M
                }
7171
7172
                /* __x2N partion case */
7173
1.51M
                if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
7174
257k
                {
7175
257k
                    pu1_pred += inter_pu_wd;
7176
257k
                }
7177
1.51M
            }
7178
4.30M
        }
7179
7180
        /* restore the pred pointer to start for transform loop */
7181
3.54M
        pu1_pred = pu1_pred_org;
7182
3.54M
    }
7183
7184
    /* Used to store back only the luma based info. if SATD based chorma
7185
    mode also comes */
7186
10.6M
    u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
7187
7188
    /* evaluate chroma candidates (same as luma) and
7189
    if INTRA & HIGH_QUALITY compare with best SATD mode */
7190
10.6M
    {
7191
10.6M
        WORD32 calc_recon = 0, deq_data_strd;
7192
10.6M
        WORD16 *pi2_deq_data;
7193
10.6M
        UWORD8 *pu1_ecd_data;
7194
10.6M
        UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
7195
7196
10.6M
        pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
7197
10.6M
        pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
7198
10.6M
        deq_data_strd = cu_size;
7199
        /* update ecd buffer for storing coeff. */
7200
10.6M
        pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
7201
10.6M
        pu1_ecd_data += init_bytes_offset;
7202
        /* store chroma starting index */
7203
10.6M
        ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
7204
7205
        /* get the first TU pointer */
7206
10.6M
        ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7207
10.6M
        ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7208
7209
        /* Reset total_bytes_offset for each candidate */
7210
10.6M
        chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
7211
10.6M
                                          : luma_pred_mode;
7212
7213
10.6M
        total_bytes_offset = 0;
7214
7215
10.6M
        if(TU_EQ_SUBCU == func_proc_mode)
7216
386k
        {
7217
386k
            func_proc_mode = TU_EQ_CU_DIV2;
7218
386k
        }
7219
7220
        /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
7221
        TU_EQ_CU_DIV2 and  TU_EQ_SUBCU case */
7222
10.6M
        if(8 == cu_size)
7223
3.92M
        {
7224
3.92M
            func_proc_mode = TU_EQ_CU;
7225
3.92M
        }
7226
7227
        /* loop based on num tus in a cu */
7228
10.6M
        if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
7229
10.6M
           (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
7230
6.25M
            (chrm_pred_mode !=
7231
6.25M
             ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
7232
8.74M
        {
7233
            /* loop based on num tus in a cu */
7234
21.0M
            for(ctr = 0; ctr < u1_num_tus; ctr++)
7235
13.3M
            {
7236
13.3M
                WORD32 num_bytes = 0;
7237
13.3M
                LWORD64 curr_cb_cod_cost = 0;
7238
13.3M
                LWORD64 curr_cr_cod_cost = 0;
7239
13.3M
                WORD32 chrm_pred_func_idx = 0;
7240
13.3M
                UWORD8 u1_is_early_exit_condition_satisfied = 0;
7241
7242
                /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7   */
7243
                /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
7244
13.3M
                ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
7245
13.3M
                ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
7246
13.3M
                ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7247
13.3M
                ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7248
13.3M
                ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7249
13.3M
                ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7250
13.3M
                ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
7251
13.3M
                ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
7252
13.3M
                ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
7253
13.3M
                ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
7254
7255
                /* TU level inits */
7256
                /* check if chroma present flag is set */
7257
13.3M
                if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7258
12.4M
                {
7259
                    /* RDOPT copy States :  TU init (best until prev TU) to current */
7260
12.4M
                    COPY_CABAC_STATES(
7261
12.4M
                        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7262
12.4M
                             .s_cabac_ctxt.au1_ctxt_models[0],
7263
12.4M
                        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7264
12.4M
                        IHEVC_CAB_CTXT_END);
7265
7266
                    /* get the current transform size */
7267
12.4M
                    trans_size = ps_tu->s_tu.b3_size;
7268
12.4M
                    trans_size = (1 << (trans_size + 1)); /* in chroma units */
7269
7270
                    /* since 2x2 transform is not allowed for chroma*/
7271
12.4M
                    if(2 == trans_size)
7272
860k
                    {
7273
860k
                        trans_size = 4;
7274
860k
                    }
7275
12.4M
                }
7276
7277
25.6M
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
7278
13.3M
                {
7279
13.3M
                    WORD32 cbf;
7280
13.3M
                    UWORD8 u1_is_recon_available;
7281
7282
13.3M
                    WORD32 nbr_flags = 0;
7283
13.3M
                    WORD32 zero_cols = 0;
7284
13.3M
                    WORD32 zero_rows = 0;
7285
7286
                    /* check if chroma present flag is set */
7287
13.3M
                    if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7288
12.4M
                    {
7289
12.4M
                        UWORD8 *pu1_cur_pred;
7290
12.4M
                        UWORD8 *pu1_cur_recon;
7291
12.4M
                        UWORD8 *pu1_cur_src;
7292
12.4M
                        WORD16 *pi2_cur_deq_data;
7293
12.4M
                        WORD32 curr_pos_x, curr_pos_y;
7294
12.4M
                        LWORD64 trans_ssd_u, trans_ssd_v;
7295
7296
                        /* get the current sub-tu posx and posy w.r.t to cu */
7297
12.4M
                        curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
7298
12.4M
                        curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
7299
12.4M
                                     (i4_subtu_idx * trans_size);
7300
7301
                        /* 420sp case only vertical height will be half */
7302
12.4M
                        if(u1_is_422 == 0)
7303
12.4M
                        {
7304
12.4M
                            curr_pos_y >>= 1;
7305
12.4M
                        }
7306
7307
                        /* increment the pointers to start of current Sub-TU */
7308
12.4M
                        pu1_cur_recon = (pu1_recon + curr_pos_x);
7309
12.4M
                        pu1_cur_recon += (curr_pos_y * i4_recon_stride);
7310
12.4M
                        pu1_cur_src = (pu1_chrm_src + curr_pos_x);
7311
12.4M
                        pu1_cur_src += (curr_pos_y * chrm_src_stride);
7312
12.4M
                        pu1_cur_pred = (pu1_pred + curr_pos_x);
7313
12.4M
                        pu1_cur_pred += (curr_pos_y * pred_strd);
7314
12.4M
                        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
7315
12.4M
                        pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
7316
7317
                        /* populate the coeffs scan idx */
7318
12.4M
                        scan_idx = SCAN_DIAG_UPRIGHT;
7319
7320
                        /* perform intra prediction only for Intra case */
7321
12.4M
                        if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7322
8.01M
                        {
7323
8.01M
                            UWORD8 *pu1_top_left;
7324
8.01M
                            UWORD8 *pu1_top;
7325
8.01M
                            UWORD8 *pu1_left;
7326
8.01M
                            WORD32 left_strd;
7327
7328
8.01M
                            calc_recon = !u1_compute_spatial_ssd &&
7329
8.01M
                                         ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
7330
8.01M
                                         (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
7331
1.57M
                                          ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
7332
1.57M
                                          ((u1_num_tus == 4) && (ctr < 3)));
7333
7334
                            /* left cu boundary */
7335
8.01M
                            if(0 == curr_pos_x)
7336
6.13M
                            {
7337
6.13M
                                pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
7338
6.13M
                                left_strd = cu_left_stride;
7339
6.13M
                            }
7340
1.87M
                            else
7341
1.87M
                            {
7342
1.87M
                                pu1_left = pu1_cur_recon - 2;
7343
1.87M
                                left_strd = i4_recon_stride;
7344
1.87M
                            }
7345
7346
                            /* top cu boundary */
7347
8.01M
                            if(0 == curr_pos_y)
7348
6.14M
                            {
7349
6.14M
                                pu1_top = pu1_cu_top + curr_pos_x;
7350
6.14M
                            }
7351
1.86M
                            else
7352
1.86M
                            {
7353
1.86M
                                pu1_top = pu1_cur_recon - i4_recon_stride;
7354
1.86M
                            }
7355
7356
                            /* by default top left is set to cu top left */
7357
8.01M
                            pu1_top_left = pu1_cu_top_left;
7358
7359
                            /* top left based on position */
7360
8.01M
                            if((0 != curr_pos_y) && (0 == curr_pos_x))
7361
941k
                            {
7362
941k
                                pu1_top_left = pu1_left - cu_left_stride;
7363
941k
                            }
7364
7.06M
                            else if(0 != curr_pos_x)
7365
1.87M
                            {
7366
1.87M
                                pu1_top_left = pu1_top - 2;
7367
1.87M
                            }
7368
7369
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
7370
8.01M
                            if(4 == trans_size)
7371
4.22M
                            {
7372
                                /* for modes from 22 upto 30 horizontal scan is used */
7373
4.22M
                                if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
7374
1.48M
                                {
7375
1.48M
                                    scan_idx = SCAN_HORZ;
7376
1.48M
                                }
7377
                                /* for modes from 6 upto 14 horizontal scan is used */
7378
2.73M
                                else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
7379
716k
                                {
7380
716k
                                    scan_idx = SCAN_VERT;
7381
716k
                                }
7382
4.22M
                            }
7383
7384
8.01M
                            nbr_flags = ihevce_get_intra_chroma_tu_nbr(
7385
8.01M
                                ps_best_cu_prms->au4_nbr_flags[ctr],
7386
8.01M
                                i4_subtu_idx,
7387
8.01M
                                trans_size,
7388
8.01M
                                u1_is_422);
7389
7390
                            /* call the chroma reference array substitution */
7391
8.01M
                            ihevc_intra_pred_chroma_ref_substitution_fptr(
7392
8.01M
                                pu1_top_left,
7393
8.01M
                                pu1_top,
7394
8.01M
                                pu1_left,
7395
8.01M
                                left_strd,
7396
8.01M
                                trans_size,
7397
8.01M
                                nbr_flags,
7398
8.01M
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7399
8.01M
                                1);
7400
7401
                            /* use the look up to get the function idx */
7402
8.01M
                            chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
7403
7404
                            /* call the intra prediction function */
7405
8.01M
                            ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
7406
8.01M
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7407
8.01M
                                1,
7408
8.01M
                                pu1_cur_pred,
7409
8.01M
                                pred_strd,
7410
8.01M
                                trans_size,
7411
8.01M
                                chrm_pred_mode);
7412
8.01M
                        }
7413
7414
12.4M
                        if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
7415
5.47M
                        {
7416
5.47M
                            ps_recon_datastore->au1_is_chromaRecon_available[0] =
7417
5.47M
                                !ps_best_cu_prms->u1_skip_flag;
7418
5.47M
                        }
7419
7.00M
                        else if(!ctr && !i4_subtu_idx)
7420
3.26M
                        {
7421
3.26M
                            ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
7422
3.26M
                        }
7423
                        /************************************************************/
7424
                        /* recon loop is done for all cases including skip cu       */
7425
                        /* This is because skipping chroma reisdual based on luma   */
7426
                        /* skip decision can lead to chroma artifacts               */
7427
                        /************************************************************/
7428
                        /************************************************************/
7429
                        /*In the high quality and medium speed modes, wherein chroma*/
7430
                        /*and luma costs are included in the total cost calculation */
7431
                        /*the cost is just a ssd cost, and not that obtained through*/
7432
                        /*iq_it path                                                */
7433
                        /************************************************************/
7434
12.4M
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7435
11.4M
                        {
7436
11.4M
                            WORD32 tu_bits;
7437
7438
11.4M
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7439
11.4M
                                ps_ctxt,
7440
11.4M
                                pu1_cur_pred,
7441
11.4M
                                pred_strd,
7442
11.4M
                                pu1_cur_src,
7443
11.4M
                                chrm_src_stride,
7444
11.4M
                                pi2_cur_deq_data,
7445
11.4M
                                deq_data_strd,
7446
11.4M
                                pu1_cur_recon,
7447
11.4M
                                i4_recon_stride,
7448
11.4M
                                pu1_ecd_data + total_bytes_offset,
7449
11.4M
                                ps_ctxt->au1_cu_csbf,
7450
11.4M
                                ps_ctxt->i4_cu_csbf_strd,
7451
11.4M
                                trans_size,
7452
11.4M
                                scan_idx,
7453
11.4M
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7454
11.4M
                                &num_bytes,
7455
11.4M
                                &tu_bits,
7456
11.4M
                                &zero_cols,
7457
11.4M
                                &zero_rows,
7458
11.4M
                                &u1_is_recon_available,
7459
11.4M
                                i4_perform_sbh,
7460
11.4M
                                i4_perform_rdoq,
7461
11.4M
                                &trans_ssd_u,
7462
11.4M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7463
11.4M
                                i4_alpha_stim_multiplier,
7464
11.4M
                                u1_is_cu_noisy,
7465
11.4M
#endif
7466
11.4M
                                ps_best_cu_prms->u1_skip_flag,
7467
11.4M
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7468
11.4M
                                U_PLANE);
7469
7470
11.4M
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7471
6.60M
                            {
7472
6.60M
                                ps_recon_datastore
7473
6.60M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7474
6.60M
                                                                        [i4_subtu_idx] = 0;
7475
6.60M
                            }
7476
4.82M
                            else
7477
4.82M
                            {
7478
4.82M
                                ps_recon_datastore
7479
4.82M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7480
4.82M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7481
4.82M
                            }
7482
7483
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7484
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7485
                            {
7486
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7487
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7488
                                    pu1_cur_src,
7489
                                    chrm_src_stride,
7490
                                    pu1_cur_pred,
7491
                                    pred_strd,
7492
                                    trans_ssd_u,
7493
                                    i4_alpha_stim_multiplier,
7494
                                    trans_size,
7495
                                    0,
7496
                                    ps_ctxt->u1_enable_psyRDOPT,
7497
                                    U_PLANE);
7498
#else
7499
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7500
                                {
7501
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7502
                                        pu1_cur_src,
7503
                                        chrm_src_stride,
7504
                                        pu1_cur_recon,
7505
                                        i4_recon_stride,
7506
                                        trans_ssd_u,
7507
                                        i4_alpha_stim_multiplier,
7508
                                        trans_size,
7509
                                        0,
7510
                                        ps_ctxt->u1_enable_psyRDOPT,
7511
                                        U_PLANE);
7512
                                }
7513
                                else
7514
                                {
7515
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7516
                                        pu1_cur_src,
7517
                                        chrm_src_stride,
7518
                                        pu1_cur_pred,
7519
                                        pred_strd,
7520
                                        trans_ssd_u,
7521
                                        i4_alpha_stim_multiplier,
7522
                                        trans_size,
7523
                                        0,
7524
                                        ps_ctxt->u1_enable_psyRDOPT,
7525
                                        U_PLANE);
7526
                                }
7527
#endif
7528
                            }
7529
#endif
7530
7531
11.4M
                            curr_cb_cod_cost =
7532
11.4M
                                trans_ssd_u +
7533
11.4M
                                COMPUTE_RATE_COST_CLIP30(
7534
11.4M
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7535
7536
11.4M
                            chrm_tu_bits += tu_bits;
7537
11.4M
                            i4_bits_cb += tu_bits;
7538
7539
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7540
11.4M
                            if(0 != cbf)
7541
1.73M
                            {
7542
1.73M
                                COPY_CABAC_STATES(
7543
1.73M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7544
1.73M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7545
1.73M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7546
1.73M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7547
1.73M
                                    IHEVC_CAB_CTXT_END);
7548
1.73M
                            }
7549
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7550
9.68M
                            else
7551
9.68M
                            {
7552
9.68M
                                COPY_CABAC_STATES(
7553
9.68M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7554
9.68M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7555
9.68M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7556
9.68M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7557
9.68M
                                    IHEVC_CAB_CTXT_END);
7558
9.68M
                            }
7559
7560
                            /* If Intra and TU=CU/2, need recon for next TUs */
7561
11.4M
                            if(calc_recon)
7562
1.20M
                            {
7563
1.20M
                                ihevce_chroma_it_recon_fxn(
7564
1.20M
                                    ps_ctxt,
7565
1.20M
                                    pi2_cur_deq_data,
7566
1.20M
                                    deq_data_strd,
7567
1.20M
                                    pu1_cur_pred,
7568
1.20M
                                    pred_strd,
7569
1.20M
                                    pu1_cur_recon,
7570
1.20M
                                    i4_recon_stride,
7571
1.20M
                                    (pu1_ecd_data + total_bytes_offset),
7572
1.20M
                                    trans_size,
7573
1.20M
                                    cbf,
7574
1.20M
                                    zero_cols,
7575
1.20M
                                    zero_rows,
7576
1.20M
                                    U_PLANE);
7577
7578
1.20M
                                ps_recon_datastore
7579
1.20M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7580
1.20M
                                                                        [i4_subtu_idx] = 0;
7581
1.20M
                            }
7582
10.2M
                            else
7583
10.2M
                            {
7584
10.2M
                                ps_recon_datastore
7585
10.2M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7586
10.2M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7587
10.2M
                            }
7588
11.4M
                        }
7589
1.05M
                        else
7590
1.05M
                        {
7591
                            /* num bytes is set to 0 */
7592
1.05M
                            num_bytes = 0;
7593
7594
                            /* cbf is returned as 0 */
7595
1.05M
                            cbf = 0;
7596
7597
1.05M
                            curr_cb_cod_cost = trans_ssd_u =
7598
7599
1.05M
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7600
1.05M
                                    pu1_cur_pred,
7601
1.05M
                                    pu1_cur_src,
7602
1.05M
                                    pred_strd,
7603
1.05M
                                    chrm_src_stride,
7604
1.05M
                                    trans_size,
7605
1.05M
                                    trans_size,
7606
1.05M
                                    U_PLANE);
7607
7608
1.05M
                            if(u1_compute_spatial_ssd)
7609
659k
                            {
7610
                                /* buffer copy fromp pred to recon */
7611
7612
659k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7613
659k
                                    pu1_cur_pred,
7614
659k
                                    pred_strd,
7615
659k
                                    pu1_cur_recon,
7616
659k
                                    i4_recon_stride,
7617
659k
                                    trans_size,
7618
659k
                                    trans_size,
7619
659k
                                    U_PLANE);
7620
7621
659k
                                ps_recon_datastore
7622
659k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7623
659k
                                                                        [i4_subtu_idx] = 0;
7624
659k
                            }
7625
7626
1.05M
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7627
0
                            {
7628
0
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7629
0
                                    pu1_cur_src,
7630
0
                                    chrm_src_stride,
7631
0
                                    pu1_cur_pred,
7632
0
                                    pred_strd,
7633
0
                                    trans_ssd_u,
7634
0
                                    i4_alpha_stim_multiplier,
7635
0
                                    trans_size,
7636
0
                                    0,
7637
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7638
0
                                    U_PLANE);
7639
0
                            }
7640
7641
1.05M
#if ENABLE_INTER_ZCU_COST
7642
#if !WEIGH_CHROMA_COST
7643
                            /* cbf = 0, accumulate cu not coded cost */
7644
                            ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
7645
#else
7646
                            /* cbf = 0, accumulate cu not coded cost */
7647
7648
1.05M
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7649
1.05M
                                (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7650
1.05M
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7651
1.05M
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7652
1.05M
#endif
7653
1.05M
#endif
7654
1.05M
                        }
7655
7656
#if !WEIGH_CHROMA_COST
7657
                        curr_rdopt_cost += curr_cb_cod_cost;
7658
#else
7659
12.4M
                        curr_rdopt_cost +=
7660
12.4M
                            ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7661
12.4M
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7662
12.4M
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7663
12.4M
#endif
7664
12.4M
                        chrm_cod_cost += curr_cb_cod_cost;
7665
12.4M
                        i8_ssd_cb += trans_ssd_u;
7666
7667
12.4M
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7668
12.4M
                        {
7669
                            /* Early exit : If the current running cost exceeds
7670
                            the prev. best mode cost, break */
7671
12.4M
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7672
567k
                            {
7673
567k
                                u1_is_early_exit_condition_satisfied = 1;
7674
567k
                                break;
7675
567k
                            }
7676
12.4M
                        }
7677
7678
                        /* inter cu is coded if any of the tu is coded in it */
7679
11.9M
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7680
7681
                        /* update CB related params */
7682
11.9M
                        ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
7683
11.9M
                            total_bytes_offset + init_bytes_offset;
7684
7685
11.9M
                        if(0 == i4_subtu_idx)
7686
11.9M
                        {
7687
11.9M
                            ps_tu->s_tu.b1_cb_cbf = cbf;
7688
11.9M
                        }
7689
0
                        else
7690
0
                        {
7691
0
                            ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
7692
0
                        }
7693
7694
11.9M
                        total_bytes_offset += num_bytes;
7695
7696
11.9M
                        ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
7697
11.9M
                        ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
7698
11.9M
                        ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
7699
7700
                        /* recon loop is done for non skip cases */
7701
11.9M
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7702
11.0M
                        {
7703
11.0M
                            WORD32 tu_bits;
7704
7705
11.0M
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7706
11.0M
                                ps_ctxt,
7707
11.0M
                                pu1_cur_pred,
7708
11.0M
                                pred_strd,
7709
11.0M
                                pu1_cur_src,
7710
11.0M
                                chrm_src_stride,
7711
11.0M
                                pi2_cur_deq_data + trans_size,
7712
11.0M
                                deq_data_strd,
7713
11.0M
                                pu1_cur_recon,
7714
11.0M
                                i4_recon_stride,
7715
11.0M
                                pu1_ecd_data + total_bytes_offset,
7716
11.0M
                                ps_ctxt->au1_cu_csbf,
7717
11.0M
                                ps_ctxt->i4_cu_csbf_strd,
7718
11.0M
                                trans_size,
7719
11.0M
                                scan_idx,
7720
11.0M
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7721
11.0M
                                &num_bytes,
7722
11.0M
                                &tu_bits,
7723
11.0M
                                &zero_cols,
7724
11.0M
                                &zero_rows,
7725
11.0M
                                &u1_is_recon_available,
7726
11.0M
                                i4_perform_sbh,
7727
11.0M
                                i4_perform_rdoq,
7728
11.0M
                                &trans_ssd_v,
7729
11.0M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7730
11.0M
                                i4_alpha_stim_multiplier,
7731
11.0M
                                u1_is_cu_noisy,
7732
11.0M
#endif
7733
11.0M
                                ps_best_cu_prms->u1_skip_flag,
7734
11.0M
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7735
11.0M
                                V_PLANE);
7736
7737
11.0M
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7738
6.46M
                            {
7739
6.46M
                                ps_recon_datastore
7740
6.46M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7741
6.46M
                                                                        [i4_subtu_idx] = 0;
7742
6.46M
                            }
7743
4.55M
                            else
7744
4.55M
                            {
7745
4.55M
                                ps_recon_datastore
7746
4.55M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7747
4.55M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7748
4.55M
                            }
7749
7750
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7751
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7752
                            {
7753
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7754
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7755
                                    pu1_cur_src,
7756
                                    chrm_src_stride,
7757
                                    pu1_cur_pred,
7758
                                    pred_strd,
7759
                                    trans_ssd_v,
7760
                                    i4_alpha_stim_multiplier,
7761
                                    trans_size,
7762
                                    0,
7763
                                    ps_ctxt->u1_enable_psyRDOPT,
7764
                                    V_PLANE);
7765
#else
7766
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7767
                                {
7768
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7769
                                        pu1_cur_src,
7770
                                        chrm_src_stride,
7771
                                        pu1_cur_recon,
7772
                                        i4_recon_stride,
7773
                                        trans_ssd_v,
7774
                                        i4_alpha_stim_multiplier,
7775
                                        trans_size,
7776
                                        0,
7777
                                        ps_ctxt->u1_enable_psyRDOPT,
7778
                                        V_PLANE);
7779
                                }
7780
                                else
7781
                                {
7782
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7783
                                        pu1_cur_src,
7784
                                        chrm_src_stride,
7785
                                        pu1_cur_pred,
7786
                                        pred_strd,
7787
                                        trans_ssd_v,
7788
                                        i4_alpha_stim_multiplier,
7789
                                        trans_size,
7790
                                        0,
7791
                                        ps_ctxt->u1_enable_psyRDOPT,
7792
                                        V_PLANE);
7793
                                }
7794
#endif
7795
                            }
7796
#endif
7797
7798
11.0M
                            curr_cr_cod_cost =
7799
11.0M
                                trans_ssd_v +
7800
11.0M
                                COMPUTE_RATE_COST_CLIP30(
7801
11.0M
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7802
11.0M
                            chrm_tu_bits += tu_bits;
7803
11.0M
                            i4_bits_cr += tu_bits;
7804
7805
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7806
11.0M
                            if(0 != cbf)
7807
1.47M
                            {
7808
1.47M
                                COPY_CABAC_STATES(
7809
1.47M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7810
1.47M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7811
1.47M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7812
1.47M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7813
1.47M
                                    IHEVC_CAB_CTXT_END);
7814
1.47M
                            }
7815
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7816
9.54M
                            else
7817
9.54M
                            {
7818
9.54M
                                COPY_CABAC_STATES(
7819
9.54M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7820
9.54M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7821
9.54M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7822
9.54M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7823
9.54M
                                    IHEVC_CAB_CTXT_END);
7824
9.54M
                            }
7825
7826
                            /* If Intra and TU=CU/2, need recon for next TUs */
7827
11.0M
                            if(calc_recon)
7828
1.17M
                            {
7829
1.17M
                                ihevce_chroma_it_recon_fxn(
7830
1.17M
                                    ps_ctxt,
7831
1.17M
                                    (pi2_cur_deq_data + trans_size),
7832
1.17M
                                    deq_data_strd,
7833
1.17M
                                    pu1_cur_pred,
7834
1.17M
                                    pred_strd,
7835
1.17M
                                    pu1_cur_recon,
7836
1.17M
                                    i4_recon_stride,
7837
1.17M
                                    (pu1_ecd_data + total_bytes_offset),
7838
1.17M
                                    trans_size,
7839
1.17M
                                    cbf,
7840
1.17M
                                    zero_cols,
7841
1.17M
                                    zero_rows,
7842
1.17M
                                    V_PLANE);
7843
7844
1.17M
                                ps_recon_datastore
7845
1.17M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7846
1.17M
                                                                        [i4_subtu_idx] = 0;
7847
1.17M
                            }
7848
9.83M
                            else
7849
9.83M
                            {
7850
9.83M
                                ps_recon_datastore
7851
9.83M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7852
9.83M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7853
9.83M
                            }
7854
11.0M
                        }
7855
900k
                        else
7856
900k
                        {
7857
                            /* num bytes is set to 0 */
7858
900k
                            num_bytes = 0;
7859
7860
                            /* cbf is returned as 0 */
7861
900k
                            cbf = 0;
7862
7863
900k
                            curr_cr_cod_cost = trans_ssd_v =
7864
7865
900k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7866
900k
                                    pu1_cur_pred,
7867
900k
                                    pu1_cur_src,
7868
900k
                                    pred_strd,
7869
900k
                                    chrm_src_stride,
7870
900k
                                    trans_size,
7871
900k
                                    trans_size,
7872
900k
                                    V_PLANE);
7873
7874
900k
                            if(u1_compute_spatial_ssd)
7875
564k
                            {
7876
                                /* buffer copy fromp pred to recon */
7877
564k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7878
564k
                                    pu1_cur_pred,
7879
564k
                                    pred_strd,
7880
564k
                                    pu1_cur_recon,
7881
564k
                                    i4_recon_stride,
7882
564k
                                    trans_size,
7883
564k
                                    trans_size,
7884
564k
                                    V_PLANE);
7885
7886
564k
                                ps_recon_datastore
7887
564k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7888
564k
                                                                        [i4_subtu_idx] = 0;
7889
564k
                            }
7890
7891
900k
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7892
0
                            {
7893
0
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7894
0
                                    pu1_cur_src,
7895
0
                                    chrm_src_stride,
7896
0
                                    pu1_cur_pred,
7897
0
                                    pred_strd,
7898
0
                                    trans_ssd_v,
7899
0
                                    i4_alpha_stim_multiplier,
7900
0
                                    trans_size,
7901
0
                                    0,
7902
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7903
0
                                    V_PLANE);
7904
0
                            }
7905
7906
900k
#if ENABLE_INTER_ZCU_COST
7907
#if !WEIGH_CHROMA_COST
7908
                            /* cbf = 0, accumulate cu not coded cost */
7909
                            ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
7910
#else
7911
                            /* cbf = 0, accumulate cu not coded cost */
7912
7913
900k
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7914
900k
                                (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7915
900k
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7916
900k
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7917
900k
#endif
7918
900k
#endif
7919
900k
                        }
7920
7921
#if !WEIGH_CHROMA_COST
7922
                        curr_rdopt_cost += curr_cr_cod_cost;
7923
#else
7924
11.9M
                        curr_rdopt_cost +=
7925
11.9M
                            ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7926
11.9M
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7927
11.9M
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7928
11.9M
#endif
7929
7930
11.9M
                        chrm_cod_cost += curr_cr_cod_cost;
7931
11.9M
                        i8_ssd_cr += trans_ssd_v;
7932
7933
11.9M
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7934
11.9M
                        {
7935
                            /* Early exit : If the current running cost exceeds
7936
                            the prev. best mode cost, break */
7937
11.9M
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7938
413k
                            {
7939
413k
                                u1_is_early_exit_condition_satisfied = 1;
7940
413k
                                break;
7941
413k
                            }
7942
11.9M
                        }
7943
7944
                        /* inter cu is coded if any of the tu is coded in it */
7945
11.5M
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7946
7947
                        /* update CR related params */
7948
11.5M
                        ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
7949
11.5M
                            total_bytes_offset + init_bytes_offset;
7950
7951
11.5M
                        if(0 == i4_subtu_idx)
7952
11.5M
                        {
7953
11.5M
                            ps_tu->s_tu.b1_cr_cbf = cbf;
7954
11.5M
                        }
7955
0
                        else
7956
0
                        {
7957
0
                            ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
7958
0
                        }
7959
7960
11.5M
                        total_bytes_offset += num_bytes;
7961
7962
11.5M
                        ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
7963
11.5M
                        ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
7964
11.5M
                        ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
7965
11.5M
                    }
7966
850k
                    else
7967
850k
                    {
7968
850k
                        ps_recon_datastore
7969
850k
                            ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
7970
850k
                            UCHAR_MAX;
7971
850k
                        ps_recon_datastore
7972
850k
                            ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
7973
850k
                            UCHAR_MAX;
7974
850k
                    }
7975
13.3M
                }
7976
7977
13.3M
                if(u1_is_early_exit_condition_satisfied)
7978
980k
                {
7979
980k
                    break;
7980
980k
                }
7981
7982
                /* loop increments */
7983
12.3M
                ps_tu++;
7984
12.3M
                ps_tu_temp_prms++;
7985
12.3M
            }
7986
7987
            /* Signal as luma mode. HIGH_QUALITY may update it */
7988
8.74M
            ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
7989
7990
            /* modify the cost chrm_cod_cost */
7991
8.74M
            if(ps_ctxt->u1_enable_psyRDOPT)
7992
0
            {
7993
0
                UWORD8 *pu1_recon_cu;
7994
0
                WORD32 recon_stride;
7995
0
                WORD32 curr_pos_x;
7996
0
                WORD32 curr_pos_y;
7997
0
                WORD32 start_index;
7998
0
                WORD32 num_horz_cu_in_ctb;
7999
0
                WORD32 had_block_size;
8000
                /* tODO: sreenivasa ctb size has to be used appropriately */
8001
0
                had_block_size = 8;
8002
0
                num_horz_cu_in_ctb = 2 * 64 / had_block_size;
8003
8004
0
                curr_pos_x = cu_pos_x << 3; /* pel units */
8005
0
                curr_pos_y = cu_pos_y << 3; /* pel units */
8006
0
                recon_stride = i4_recon_stride;
8007
0
                pu1_recon_cu = pu1_recon;
8008
8009
                /* start index to index the source satd of curr cu int he current ctb*/
8010
0
                start_index = 2 * (curr_pos_x / had_block_size) +
8011
0
                              (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
8012
8013
0
                {
8014
0
                    chrm_cod_cost += ihevce_psy_rd_cost_croma(
8015
0
                        ps_ctxt->ai4_source_chroma_satd,
8016
0
                        pu1_recon,
8017
0
                        recon_stride,
8018
0
                        1,  //
8019
0
                        cu_size,
8020
0
                        0,  // pic type
8021
0
                        0,  //layer id
8022
0
                        ps_ctxt->i4_satd_lamda,  // lambda
8023
0
                        start_index,
8024
0
                        ps_ctxt->u1_is_input_data_hbd,  // 8 bit
8025
0
                        ps_ctxt->u1_chroma_array_type,
8026
0
                        &ps_ctxt->s_cmn_opt_func
8027
8028
0
                    );  // chroma subsampling 420
8029
0
                }
8030
0
            }
8031
8.74M
        }
8032
1.87M
        else
8033
1.87M
        {
8034
1.87M
            u1_is_mode_eq_chroma_satd_mode = 1;
8035
1.87M
            chrm_cod_cost = MAX_COST_64;
8036
1.87M
        }
8037
8038
        /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
8039
10.6M
        if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
8040
10.6M
           (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
8041
6.25M
        {
8042
6.25M
            if(64 == cu_size)
8043
129k
            {
8044
129k
                ASSERT(TU_EQ_CU != func_proc_mode);
8045
129k
            }
8046
8047
6.25M
            if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
8048
6.25M
                   .i8_chroma_best_rdopt < chrm_cod_cost)
8049
2.25M
            {
8050
2.25M
                UWORD8 *pu1_src;
8051
2.25M
                UWORD8 *pu1_ecd_data_src_cb;
8052
2.25M
                UWORD8 *pu1_ecd_data_src_cr;
8053
8054
2.25M
                chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
8055
2.25M
                    &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
8056
8057
2.25M
                UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
8058
2.25M
                WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
8059
2.25M
                WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
8060
8061
2.25M
                pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
8062
2.25M
                chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
8063
2.25M
                chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
8064
2.25M
                chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
8065
8066
2.25M
                if(u1_is_mode_eq_chroma_satd_mode)
8067
1.87M
                {
8068
1.87M
                    chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
8069
1.87M
                }
8070
8071
                /*Resetting total_num_bytes_to 0*/
8072
2.25M
                total_bytes_offset = 0;
8073
8074
                /* Update the CABAC state corresponding to chroma only */
8075
                /* Chroma Cbf */
8076
2.25M
                memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
8077
                /* Chroma transform skip */
8078
2.25M
                memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
8079
                /* Chroma last coeff x prefix */
8080
2.25M
                memcpy(
8081
2.25M
                    pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
8082
2.25M
                    pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
8083
2.25M
                    3);
8084
                /* Chroma last coeff y prefix */
8085
2.25M
                memcpy(
8086
2.25M
                    pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
8087
2.25M
                    pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
8088
2.25M
                    3);
8089
                /* Chroma csbf */
8090
2.25M
                memcpy(
8091
2.25M
                    pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8092
2.25M
                    pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8093
2.25M
                    2);
8094
                /* Chroma sig coeff flags */
8095
2.25M
                memcpy(
8096
2.25M
                    pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
8097
                /* Chroma absgt1 flags */
8098
2.25M
                memcpy(
8099
2.25M
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8100
2.25M
                    pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8101
2.25M
                    8);
8102
                /* Chroma absgt2 flags */
8103
2.25M
                memcpy(
8104
2.25M
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8105
2.25M
                    pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8106
2.25M
                    2);
8107
8108
2.25M
                ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
8109
2.25M
                ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8110
8111
                /* update to luma decision as we update chroma in final mode */
8112
2.25M
                ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
8113
8114
5.75M
                for(ctr = 0; ctr < u1_num_tus; ctr++)
8115
3.50M
                {
8116
7.00M
                    for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
8117
3.50M
                    {
8118
3.50M
                        WORD32 cbf;
8119
3.50M
                        WORD32 num_bytes;
8120
8121
3.50M
                        pu1_ecd_data_src_cb =
8122
3.50M
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
8123
3.50M
                        pu1_ecd_data_src_cr =
8124
3.50M
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
8125
8126
                        /* check if chroma present flag is set */
8127
3.50M
                        if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
8128
3.50M
                        {
8129
3.50M
                            UWORD8 *pu1_cur_pred_dest;
8130
3.50M
                            UWORD8 *pu1_cur_pred_src;
8131
3.50M
                            WORD32 pred_src_strd;
8132
3.50M
                            WORD16 *pi2_cur_deq_data_dest;
8133
3.50M
                            WORD16 *pi2_cur_deq_data_src_cb;
8134
3.50M
                            WORD16 *pi2_cur_deq_data_src_cr;
8135
3.50M
                            WORD32 deq_src_strd;
8136
8137
3.50M
                            WORD32 curr_pos_x, curr_pos_y;
8138
8139
3.50M
                            trans_size = ps_tu->s_tu.b3_size;
8140
3.50M
                            trans_size = (1 << (trans_size + 1)); /* in chroma units */
8141
8142
                            /*Deriving stride values*/
8143
3.50M
                            pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
8144
3.50M
                            deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
8145
8146
                            /* since 2x2 transform is not allowed for chroma*/
8147
3.50M
                            if(2 == trans_size)
8148
206k
                            {
8149
206k
                                trans_size = 4;
8150
206k
                            }
8151
8152
                            /* get the current tu posx and posy w.r.t to cu */
8153
3.50M
                            curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
8154
3.50M
                            curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
8155
3.50M
                                         (i4_subtu_idx * trans_size);
8156
8157
                            /* 420sp case only vertical height will be half */
8158
3.50M
                            if(0 == u1_is_422)
8159
3.50M
                            {
8160
3.50M
                                curr_pos_y >>= 1;
8161
3.50M
                            }
8162
8163
                            /* increment the pointers to start of current TU  */
8164
3.50M
                            pu1_cur_pred_src =
8165
3.50M
                                ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
8166
3.50M
                            pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
8167
3.50M
                            pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
8168
3.50M
                            pu1_cur_pred_dest += (curr_pos_y * pred_strd);
8169
8170
3.50M
                            pi2_cur_deq_data_src_cb =
8171
3.50M
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
8172
3.50M
                            pi2_cur_deq_data_src_cr =
8173
3.50M
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
8174
3.50M
                            pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
8175
3.50M
                            pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
8176
3.50M
                            pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
8177
3.50M
                            pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
8178
8179
                            /*Overwriting deq data with that belonging to the winning special mode
8180
                            (luma mode !=  chroma mode)
8181
                            ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
8182
                            correspondingly manipulate to copy WORD16 data*/
8183
8184
3.50M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8185
3.50M
                                (UWORD8 *)pi2_cur_deq_data_dest,
8186
3.50M
                                (deq_data_strd << 1),
8187
3.50M
                                (UWORD8 *)pi2_cur_deq_data_src_cb,
8188
3.50M
                                (deq_src_strd << 1),
8189
3.50M
                                (trans_size << 1),
8190
3.50M
                                trans_size);
8191
8192
3.50M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8193
3.50M
                                (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
8194
3.50M
                                (deq_data_strd << 1),
8195
3.50M
                                (UWORD8 *)pi2_cur_deq_data_src_cr,
8196
3.50M
                                (deq_src_strd << 1),
8197
3.50M
                                (trans_size << 1),
8198
3.50M
                                trans_size);
8199
8200
                            /*Overwriting pred data with that belonging to the winning special mode
8201
                            (luma mode !=  chroma mode)*/
8202
8203
3.50M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8204
3.50M
                                pu1_cur_pred_dest,
8205
3.50M
                                pred_strd,
8206
3.50M
                                pu1_cur_pred_src,
8207
3.50M
                                pred_src_strd,
8208
3.50M
                                (trans_size << 1),
8209
3.50M
                                trans_size);
8210
8211
3.50M
                            num_bytes = ps_chr_intra_satd_ctxt
8212
3.50M
                                            ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
8213
3.50M
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
8214
                            /* inter cu is coded if any of the tu is coded in it */
8215
3.50M
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8216
8217
                            /* update CB related params */
8218
3.50M
                            ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
8219
3.50M
                                total_bytes_offset + init_bytes_offset;
8220
8221
3.50M
                            if(0 == i4_subtu_idx)
8222
3.50M
                            {
8223
3.50M
                                ps_tu->s_tu.b1_cb_cbf = cbf;
8224
3.50M
                            }
8225
0
                            else
8226
0
                            {
8227
0
                                ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
8228
0
                            }
8229
8230
                            /*Overwriting the cb ecd data corresponding to the special mode*/
8231
3.50M
                            if(0 != num_bytes)
8232
474k
                            {
8233
474k
                                memcpy(
8234
474k
                                    (pu1_ecd_data + total_bytes_offset),
8235
474k
                                    pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
8236
474k
                                    num_bytes);
8237
474k
                            }
8238
8239
3.50M
                            total_bytes_offset += num_bytes;
8240
3.50M
                            ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
8241
3.50M
                            ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
8242
8243
3.50M
                            num_bytes = ps_chr_intra_satd_ctxt
8244
3.50M
                                            ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
8245
3.50M
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
8246
                            /* inter cu is coded if any of the tu is coded in it */
8247
3.50M
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8248
8249
                            /*Overwriting the cr ecd data corresponding to the special mode*/
8250
3.50M
                            if(0 != num_bytes)
8251
449k
                            {
8252
449k
                                memcpy(
8253
449k
                                    (pu1_ecd_data + total_bytes_offset),
8254
449k
                                    pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
8255
449k
                                    num_bytes);
8256
449k
                            }
8257
8258
                            /* update CR related params */
8259
3.50M
                            ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
8260
3.50M
                                total_bytes_offset + init_bytes_offset;
8261
8262
3.50M
                            if(0 == i4_subtu_idx)
8263
3.50M
                            {
8264
3.50M
                                ps_tu->s_tu.b1_cr_cbf = cbf;
8265
3.50M
                            }
8266
0
                            else
8267
0
                            {
8268
0
                                ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
8269
0
                            }
8270
8271
3.50M
                            total_bytes_offset += num_bytes;
8272
3.50M
                            ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
8273
8274
                            /*Updating zero rows and zero cols*/
8275
3.50M
                            ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
8276
3.50M
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
8277
3.50M
                            ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
8278
3.50M
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
8279
3.50M
                            ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
8280
3.50M
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
8281
3.50M
                            ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
8282
3.50M
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
8283
8284
3.50M
                            ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
8285
8286
3.50M
                            if((u1_num_tus > 1) &&
8287
3.50M
                               ps_recon_datastore->au1_is_chromaRecon_available[2])
8288
1.67M
                            {
8289
1.67M
                                ps_recon_datastore
8290
1.67M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8291
1.67M
                                                                        [i4_subtu_idx] = 2;
8292
1.67M
                                ps_recon_datastore
8293
1.67M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8294
1.67M
                                                                        [i4_subtu_idx] = 2;
8295
1.67M
                            }
8296
1.83M
                            else if(
8297
1.83M
                                (1 == u1_num_tus) &&
8298
1.83M
                                ps_recon_datastore->au1_is_chromaRecon_available[1])
8299
1.12M
                            {
8300
1.12M
                                ps_recon_datastore
8301
1.12M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8302
1.12M
                                                                        [i4_subtu_idx] = 1;
8303
1.12M
                                ps_recon_datastore
8304
1.12M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8305
1.12M
                                                                        [i4_subtu_idx] = 1;
8306
1.12M
                            }
8307
707k
                            else
8308
707k
                            {
8309
707k
                                ps_recon_datastore
8310
707k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8311
707k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8312
707k
                                ps_recon_datastore
8313
707k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8314
707k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8315
707k
                            }
8316
3.50M
                        }
8317
3.50M
                    }
8318
8319
                    /* loop increments */
8320
3.50M
                    ps_tu++;
8321
3.50M
                    ps_tu_temp_prms++;
8322
3.50M
                }
8323
2.25M
            }
8324
8325
6.25M
            if(!u1_is_422)
8326
6.25M
            {
8327
6.25M
                if(chrm_pred_mode == luma_pred_mode)
8328
5.87M
                {
8329
5.87M
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8330
5.87M
                }
8331
380k
                else if(chrm_pred_mode == 0)
8332
70.9k
                {
8333
70.9k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8334
70.9k
                }
8335
309k
                else if(chrm_pred_mode == 1)
8336
94.5k
                {
8337
94.5k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8338
94.5k
                }
8339
215k
                else if(chrm_pred_mode == 10)
8340
157k
                {
8341
157k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8342
157k
                }
8343
57.3k
                else if(chrm_pred_mode == 26)
8344
57.3k
                {
8345
57.3k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8346
57.3k
                }
8347
0
                else
8348
0
                {
8349
0
                    ASSERT(0); /*Should not come here*/
8350
0
                }
8351
6.25M
            }
8352
0
            else
8353
0
            {
8354
0
                if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
8355
0
                {
8356
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8357
0
                }
8358
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
8359
0
                {
8360
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8361
0
                }
8362
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
8363
0
                {
8364
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8365
0
                }
8366
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
8367
0
                {
8368
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8369
0
                }
8370
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
8371
0
                {
8372
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8373
0
                }
8374
0
                else
8375
0
                {
8376
0
                    ASSERT(0); /*Should not come here*/
8377
0
                }
8378
0
            }
8379
6.25M
        }
8380
8381
        /* Store the actual chroma mode */
8382
10.6M
        ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
8383
10.6M
    }
8384
8385
    /* update the total bytes produced */
8386
0
    ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
8387
8388
    /* store the final chrm bits accumulated */
8389
10.6M
    *pi4_chrm_tu_bits = chrm_tu_bits;
8390
8391
10.6M
    return (chrm_cod_cost);
8392
10.6M
}
8393
8394
/*!
8395
******************************************************************************
8396
* \if Function name : ihevce_final_rdopt_mode_prcs \endif
8397
*
8398
* \brief
8399
*    Final RDOPT mode process function. Performs Recon computation for the
8400
*    final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
8401
*
8402
* \param[in] pv_ctxt : pointer to enc_loop module
8403
* \param[in] ps_prms : pointer to struct containing requisite parameters
8404
*
8405
* \return
8406
*    None
8407
*
8408
* \author
8409
*  Ittiam
8410
*
8411
*****************************************************************************
8412
*/
8413
void ihevce_final_rdopt_mode_prcs(
8414
    ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
8415
2.92M
{
8416
2.92M
    enc_loop_cu_final_prms_t *ps_best_cu_prms;
8417
2.92M
    tu_enc_loop_out_t *ps_tu_enc_loop;
8418
2.92M
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
8419
2.92M
    nbr_avail_flags_t s_nbr;
8420
2.92M
    recon_datastore_t *ps_recon_datastore;
8421
8422
2.92M
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
8423
2.92M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
8424
2.92M
    ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
8425
8426
2.92M
    WORD32 num_tu_in_cu;
8427
2.92M
    LWORD64 rd_opt_cost;
8428
2.92M
    WORD32 ctr;
8429
2.92M
    WORD32 i4_subtu_idx;
8430
2.92M
    WORD32 cu_size;
8431
2.92M
    WORD32 cu_pos_x, cu_pos_y;
8432
2.92M
    WORD32 chrm_present_flag = 1;
8433
2.92M
    WORD32 num_bytes, total_bytes = 0;
8434
2.92M
    WORD32 chrm_ctr = 0;
8435
2.92M
    WORD32 u1_is_cu_coded;
8436
2.92M
    UWORD8 *pu1_old_ecd_data;
8437
2.92M
    UWORD8 *pu1_chrm_old_ecd_data;
8438
2.92M
    UWORD8 *pu1_cur_pred;
8439
2.92M
    WORD16 *pi2_deq_data;
8440
2.92M
    WORD16 *pi2_chrm_deq_data;
8441
2.92M
    WORD16 *pi2_cur_deq_data;
8442
2.92M
    WORD16 *pi2_cur_deq_data_chrm;
8443
2.92M
    UWORD8 *pu1_cur_luma_recon;
8444
2.92M
    UWORD8 *pu1_cur_chroma_recon;
8445
2.92M
    UWORD8 *pu1_cur_src;
8446
2.92M
    UWORD8 *pu1_cur_src_chrm;
8447
2.92M
    UWORD8 *pu1_cur_pred_chrm;
8448
2.92M
    UWORD8 *pu1_intra_pred_mode;
8449
2.92M
    UWORD32 *pu4_nbr_flags;
8450
2.92M
    LWORD64 i8_ssd;
8451
8452
2.92M
    cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
8453
2.92M
    cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
8454
2.92M
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
8455
8456
2.92M
    WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
8457
2.92M
    WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
8458
2.92M
    UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
8459
2.92M
    WORD32 src_strd = ps_prms->src_strd;
8460
2.92M
    UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
8461
2.92M
    WORD32 pred_strd = ps_prms->pred_strd;
8462
2.92M
    UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
8463
2.92M
    WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
8464
2.92M
    UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
8465
2.92M
    UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
8466
2.92M
    WORD32 csbf_strd = ps_prms->csbf_strd;
8467
2.92M
    UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
8468
2.92M
    WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
8469
2.92M
    UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
8470
2.92M
    WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
8471
2.92M
    UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
8472
2.92M
    UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
8473
2.92M
    UWORD8 u1_cu_size = ps_prms->u1_cu_size;
8474
2.92M
    WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
8475
2.92M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
8476
2.92M
    UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
8477
    /* Get the Chroma pointer and parameters */
8478
2.92M
    UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
8479
2.92M
    WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
8480
2.92M
    UWORD8 u1_compute_spatial_ssd_luma = 0;
8481
2.92M
    UWORD8 u1_compute_spatial_ssd_chroma = 0;
8482
    /* Get the pointer for function selector */
8483
2.92M
    ihevc_intra_pred_luma_ref_substitution_fptr =
8484
2.92M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
8485
8486
2.92M
    ihevc_intra_pred_ref_filtering_fptr =
8487
2.92M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
8488
8489
2.92M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
8490
2.92M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
8491
8492
    /* Get the best CU parameters */
8493
2.92M
    ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
8494
2.92M
    num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
8495
2.92M
    cu_size = ps_best_cu_prms->u1_cu_size;
8496
2.92M
    cu_pos_x = u1_cu_pos_x;
8497
2.92M
    cu_pos_y = u1_cu_pos_y;
8498
2.92M
    pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
8499
2.92M
    pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
8500
2.92M
    ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
8501
8502
    /* get the first TU pointer */
8503
2.92M
    ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8504
    /* get the first TU only enc_loop prms pointer */
8505
2.92M
    ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8506
    /*modify quant related param in ctxt based on current cu qp*/
8507
2.92M
    if((ps_ctxt->i1_cu_qp_delta_enable))
8508
1.04M
    {
8509
        /*recompute quant related param at every cu level*/
8510
1.04M
        ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
8511
8512
        /* get frame level lambda params */
8513
1.04M
        ihevce_get_cl_cu_lambda_prms(
8514
1.04M
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
8515
1.04M
    }
8516
8517
2.92M
    ps_best_cu_prms->i8_cu_ssd = 0;
8518
2.92M
    ps_best_cu_prms->u4_cu_open_intra_sad = 0;
8519
8520
    /* For skip case : Set TU_size = CU_size and make cbf = 0
8521
    so that same TU loop can be used for all modes */
8522
2.92M
    if(PRED_MODE_SKIP == packed_pred_mode)
8523
244k
    {
8524
494k
        for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8525
250k
        {
8526
250k
            ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
8527
8528
250k
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
8529
8530
250k
            ps_tu_enc_loop++;
8531
250k
            ps_tu_enc_loop_temp_prms++;
8532
250k
        }
8533
8534
        /* go back to the first TU pointer */
8535
244k
        ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8536
244k
        ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8537
244k
    }
8538
    /**   For inter case, pred calculation is outside the loop     **/
8539
2.92M
    if(PRED_MODE_INTRA != packed_pred_mode)
8540
1.42M
    {
8541
        /**------------- Compute pred data if required --------------**/
8542
1.42M
        if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8543
0
        {
8544
0
            nbr_4x4_t *ps_topleft_nbr_4x4;
8545
0
            nbr_4x4_t *ps_left_nbr_4x4;
8546
0
            nbr_4x4_t *ps_top_nbr_4x4;
8547
0
            WORD32 nbr_4x4_left_strd;
8548
8549
0
            ps_best_inter_cand->pu1_pred_data = pu1_pred;
8550
0
            ps_best_inter_cand->i4_pred_data_stride = pred_strd;
8551
8552
            /* Get the CU nbr information */
8553
0
            ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
8554
0
            ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
8555
0
            ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
8556
0
            nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
8557
8558
            /* MVP ,MVD calc and Motion compensation */
8559
0
            rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
8560
0
                ps_ctxt,
8561
0
                ps_best_inter_cand,
8562
0
                u1_cu_size,
8563
0
                cu_pos_x,
8564
0
                cu_pos_y,
8565
0
                ps_left_nbr_4x4,
8566
0
                ps_top_nbr_4x4,
8567
0
                ps_topleft_nbr_4x4,
8568
0
                nbr_4x4_left_strd,
8569
0
                rd_opt_best_idx);
8570
0
        }
8571
8572
        /** ------ Motion Compensation for Chroma -------- **/
8573
1.42M
        if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
8574
413k
        {
8575
413k
            UWORD8 *pu1_cur_pred;
8576
413k
            pu1_cur_pred = pu1_pred_chrm;
8577
8578
            /* run a loop over all the partitons in cu */
8579
832k
            for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
8580
418k
            {
8581
418k
                pu_t *ps_pu;
8582
418k
                WORD32 inter_pu_wd, inter_pu_ht;
8583
8584
418k
                ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
8585
8586
                /* IF AMP then each partitions can have diff wd ht */
8587
418k
                inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
8588
418k
                inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
8589
418k
                inter_pu_ht <<= u1_is_422;
8590
                /* chroma mc func */
8591
418k
                ihevce_chroma_inter_pred_pu(
8592
418k
                    &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
8593
418k
                if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
8594
9.71k
                {
8595
                    /* 2Nx__ partion case */
8596
9.71k
                    if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
8597
5.14k
                    {
8598
5.14k
                        pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
8599
5.14k
                    }
8600
                    /* __x2N partion case */
8601
9.71k
                    if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
8602
4.56k
                    {
8603
4.56k
                        pu1_cur_pred += inter_pu_wd;
8604
4.56k
                    }
8605
9.71k
                }
8606
418k
            }
8607
413k
        }
8608
1.42M
    }
8609
2.92M
    pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
8610
2.92M
    pi2_chrm_deq_data =
8611
2.92M
        &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
8612
2.92M
    pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
8613
2.92M
    pu1_chrm_old_ecd_data =
8614
2.92M
        &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
8615
8616
    /* default value for cu coded flag */
8617
2.92M
    u1_is_cu_coded = 0;
8618
8619
    /* If we are re-computing coeff, set sad to 0 and start accumulating */
8620
    /* else use the best cand. sad from RDOPT stage                    */
8621
2.92M
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8622
0
    {
8623
        /*init of ssd of CU accuumulated over all TU*/
8624
0
        ps_best_cu_prms->u4_cu_sad = 0;
8625
8626
        /* reset the luma residual bits */
8627
0
        ps_best_cu_prms->u4_cu_luma_res_bits = 0;
8628
0
    }
8629
8630
2.92M
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
8631
858k
    {
8632
        /* reset the chroma residual bits */
8633
858k
        ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
8634
858k
    }
8635
8636
2.92M
    if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
8637
2.92M
       (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
8638
858k
    {
8639
        /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
8640
        the quantized coefficients might be changed.
8641
        We are copying only those states which correspond to the header from the cabac state
8642
        of the previous CU, because the header is going to be recomputed for this condition*/
8643
858k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
8644
858k
        memcpy(
8645
858k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
8646
858k
            &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
8647
858k
            IHEVC_CAB_COEFFX_PREFIX);
8648
8649
858k
        if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
8650
0
        {
8651
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8652
0
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8653
0
                (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
8654
0
                 IHEVC_CAB_COEFFX_PREFIX),
8655
0
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8656
0
        }
8657
858k
        else
8658
858k
        {
8659
858k
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8660
858k
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8661
858k
                (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8662
858k
                      .s_cabac_ctxt.au1_ctxt_models[0] +
8663
858k
                 IHEVC_CAB_COEFFX_PREFIX),
8664
858k
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8665
858k
        }
8666
858k
        ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
8667
858k
    }
8668
2.06M
    else
8669
2.06M
    {
8670
2.06M
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
8671
2.06M
    }
8672
8673
    /* Zero cbf tool is disabled for intra CUs */
8674
2.92M
    if(PRED_MODE_INTRA == packed_pred_mode)
8675
1.49M
    {
8676
#if ENABLE_ZERO_CBF_IN_INTRA
8677
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8678
#else
8679
1.49M
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8680
1.49M
#endif
8681
1.49M
    }
8682
1.42M
    else
8683
1.42M
    {
8684
#if DISABLE_ZERO_ZBF_IN_INTER
8685
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8686
#else
8687
1.42M
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8688
1.42M
#endif
8689
1.42M
    }
8690
8691
    /** Loop for all tu blocks in current cu and do reconstruction **/
8692
8.10M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8693
5.17M
    {
8694
5.17M
        tu_t *ps_tu;
8695
5.17M
        WORD32 trans_size, num_4x4_in_tu;
8696
5.17M
        WORD32 cbf, zero_rows, zero_cols;
8697
5.17M
        WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
8698
5.17M
        WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
8699
5.17M
        WORD32 luma_pred_mode, chroma_pred_mode = 0;
8700
5.17M
        UWORD8 au1_is_recon_available[2];
8701
8702
5.17M
        ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
8703
8704
5.17M
        u1_compute_spatial_ssd_luma = 0;
8705
5.17M
        u1_compute_spatial_ssd_chroma = 0;
8706
8707
5.17M
        trans_size = 1 << (ps_tu->b3_size + 2);
8708
5.17M
        num_4x4_in_tu = (trans_size >> 2);
8709
5.17M
        cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
8710
5.17M
        cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
8711
8712
        /* populate the coeffs scan idx */
8713
5.17M
        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
8714
8715
        /* get the current pos x and pos y in pixels */
8716
5.17M
        cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
8717
5.17M
        cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
8718
8719
        /* Update pointers based on the location */
8720
5.17M
        pu1_cur_src = pu1_src + cu_pos_x_in_pix;
8721
5.17M
        pu1_cur_src += (cu_pos_y_in_pix * src_strd);
8722
5.17M
        pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
8723
5.17M
        pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
8724
8725
5.17M
        pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
8726
5.17M
        pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
8727
8728
5.17M
        pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
8729
5.17M
        pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
8730
8731
5.17M
        pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
8732
5.17M
        pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
8733
5.17M
                            (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
8734
8735
5.17M
        pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
8736
5.17M
        pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
8737
5.17M
                             (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
8738
8739
5.17M
        pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
8740
5.17M
        pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
8741
5.17M
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
8742
8743
5.17M
        pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
8744
5.17M
        pi2_cur_deq_data_chrm +=
8745
5.17M
            ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
8746
8747
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
8748
5.17M
        chrm_present_flag = 1; /* by default chroma present is set to 1*/
8749
8750
5.17M
        if(4 == trans_size)
8751
1.67M
        {
8752
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
8753
1.67M
            if(0 != chrm_ctr)
8754
1.25M
            {
8755
1.25M
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
8756
1.25M
            }
8757
8758
            /* increment the chrm ctr unconditionally */
8759
1.67M
            chrm_ctr++;
8760
            /* after ctr reached 4 reset it */
8761
1.67M
            if(4 == chrm_ctr)
8762
419k
            {
8763
419k
                chrm_ctr = 0;
8764
419k
            }
8765
1.67M
        }
8766
8767
        /**------------- Compute pred data if required --------------**/
8768
5.17M
        if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
8769
2.67M
        {
8770
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8771
2.67M
            luma_pred_mode = *pu1_intra_pred_mode;
8772
8773
2.67M
            if((ps_ctxt->i4_rc_pass == 1) ||
8774
2.67M
               (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8775
0
            {
8776
0
                WORD32 nbr_flags;
8777
0
                WORD32 luma_pred_func_idx;
8778
0
                UWORD8 *pu1_left;
8779
0
                UWORD8 *pu1_top;
8780
0
                UWORD8 *pu1_top_left;
8781
0
                WORD32 left_strd;
8782
8783
                /* left cu boundary */
8784
0
                if(0 == cu_pos_x_in_pix)
8785
0
                {
8786
0
                    left_strd = ps_cu_nbr_prms->cu_left_stride;
8787
0
                    pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
8788
0
                }
8789
0
                else
8790
0
                {
8791
0
                    pu1_left = pu1_cur_luma_recon - 1;
8792
0
                    left_strd = recon_luma_strd;
8793
0
                }
8794
8795
                /* top cu boundary */
8796
0
                if(0 == cu_pos_y_in_pix)
8797
0
                {
8798
0
                    pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
8799
0
                }
8800
0
                else
8801
0
                {
8802
0
                    pu1_top = pu1_cur_luma_recon - recon_luma_strd;
8803
0
                }
8804
8805
                /* by default top left is set to cu top left */
8806
0
                pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
8807
8808
                /* top left based on position */
8809
0
                if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
8810
0
                {
8811
0
                    pu1_top_left = pu1_left - left_strd;
8812
0
                }
8813
0
                else if(0 != cu_pos_x_in_pix)
8814
0
                {
8815
0
                    pu1_top_left = pu1_top - 1;
8816
0
                }
8817
8818
                /* get the neighbour availability flags */
8819
0
                nbr_flags = ihevce_get_nbr_intra(
8820
0
                    &s_nbr,
8821
0
                    ps_ctxt->pu1_ctb_nbr_map,
8822
0
                    ps_ctxt->i4_nbr_map_strd,
8823
0
                    cu_pos_x_in_4x4,
8824
0
                    cu_pos_y_in_4x4,
8825
0
                    num_4x4_in_tu);
8826
8827
0
                if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
8828
0
                {
8829
                    /* copy the nbr flags for chroma reuse */
8830
0
                    if(4 != trans_size)
8831
0
                    {
8832
0
                        *pu4_nbr_flags = nbr_flags;
8833
0
                    }
8834
0
                    else if(1 == chrm_present_flag)
8835
0
                    {
8836
                        /* compute the avail flags assuming luma trans is 8x8 */
8837
                        /* get the neighbour availability flags */
8838
0
                        *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8839
0
                            ps_ctxt->pu1_ctb_nbr_map,
8840
0
                            ps_ctxt->i4_nbr_map_strd,
8841
0
                            cu_pos_x_in_4x4,
8842
0
                            cu_pos_y_in_4x4,
8843
0
                            (num_4x4_in_tu << 1),
8844
0
                            (num_4x4_in_tu << 1));
8845
0
                    }
8846
8847
                    /* call reference array substitution */
8848
0
                    ihevc_intra_pred_luma_ref_substitution_fptr(
8849
0
                        pu1_top_left,
8850
0
                        pu1_top,
8851
0
                        pu1_left,
8852
0
                        left_strd,
8853
0
                        trans_size,
8854
0
                        nbr_flags,
8855
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8856
0
                        1);
8857
8858
                    /* call reference filtering */
8859
0
                    ihevc_intra_pred_ref_filtering_fptr(
8860
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8861
0
                        trans_size,
8862
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8863
0
                        luma_pred_mode,
8864
0
                        ps_ctxt->i1_strong_intra_smoothing_enable_flag);
8865
8866
                    /* use the look up to get the function idx */
8867
0
                    luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
8868
8869
                    /* call the intra prediction function */
8870
0
                    ps_ctxt->apf_lum_ip[luma_pred_func_idx](
8871
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8872
0
                        1,
8873
0
                        pu1_cur_pred,
8874
0
                        pred_strd,
8875
0
                        trans_size,
8876
0
                        luma_pred_mode);
8877
0
                }
8878
0
            }
8879
2.67M
            else if(
8880
2.67M
                (1 == chrm_present_flag) &&
8881
2.67M
                (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
8882
563k
            {
8883
563k
                WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
8884
8885
563k
                if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
8886
87.7k
                {
8887
87.7k
                    temp_num_4x4_in_tu = num_4x4_in_tu << 1;
8888
87.7k
                }
8889
8890
563k
                *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8891
563k
                    ps_ctxt->pu1_ctb_nbr_map,
8892
563k
                    ps_ctxt->i4_nbr_map_strd,
8893
563k
                    cu_pos_x_in_4x4,
8894
563k
                    cu_pos_y_in_4x4,
8895
563k
                    temp_num_4x4_in_tu,
8896
563k
                    temp_num_4x4_in_tu);
8897
563k
            }
8898
8899
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8900
2.67M
            chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
8901
2.67M
        }
8902
8903
5.17M
        if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8904
0
        {
8905
0
            WORD32 temp_bits;
8906
0
            LWORD64 temp_cost;
8907
0
            UWORD32 u4_tu_sad;
8908
0
            WORD32 perform_sbh, perform_rdoq;
8909
8910
0
            if(PRED_MODE_INTRA == packed_pred_mode)
8911
0
            {
8912
                /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
8913
0
                if(trans_size < 16)
8914
0
                {
8915
                    /* for modes from 22 upto 30 horizontal scan is used */
8916
0
                    if((luma_pred_mode > 21) && (luma_pred_mode < 31))
8917
0
                    {
8918
0
                        ps_ctxt->i4_scan_idx = SCAN_HORZ;
8919
0
                    }
8920
                    /* for modes from 6 upto 14 horizontal scan is used */
8921
0
                    else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
8922
0
                    {
8923
0
                        ps_ctxt->i4_scan_idx = SCAN_VERT;
8924
0
                    }
8925
0
                }
8926
0
            }
8927
8928
            /* RDOPT copy States :  TU init (best until prev TU) to current */
8929
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8930
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8931
0
                        .s_cabac_ctxt.au1_ctxt_models[0] +
8932
0
                    IHEVC_CAB_COEFFX_PREFIX,
8933
0
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
8934
0
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
8935
8936
0
            if(ps_prms->u1_recompute_sbh_and_rdoq)
8937
0
            {
8938
0
                perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
8939
0
                perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
8940
0
            }
8941
0
            else
8942
0
            {
8943
                /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
8944
0
                perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
8945
                /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
8946
                we would have to do RDOQ again.*/
8947
0
                perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
8948
0
            }
8949
8950
#if DISABLE_RDOQ_INTRA
8951
            if(PRED_MODE_INTRA == packed_pred_mode)
8952
            {
8953
                perform_rdoq = 0;
8954
            }
8955
#endif
8956
            /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
8957
            so that all candidates and best candidate are quantized with same rounding factor  */
8958
0
            if(1 == perform_rdoq)
8959
0
            {
8960
0
                ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
8961
0
            }
8962
8963
0
            cbf = ihevce_t_q_iq_ssd_scan_fxn(
8964
0
                ps_ctxt,
8965
0
                pu1_cur_pred,
8966
0
                pred_strd,
8967
0
                pu1_cur_src,
8968
0
                src_strd,
8969
0
                pi2_cur_deq_data,
8970
0
                cu_size, /*deq_data stride is cu_size*/
8971
0
                pu1_cur_luma_recon,
8972
0
                recon_luma_strd,
8973
0
                pu1_final_ecd_data,
8974
0
                pu1_csbf_buf,
8975
0
                csbf_strd,
8976
0
                trans_size,
8977
0
                packed_pred_mode,
8978
0
                &temp_cost,
8979
0
                &num_bytes,
8980
0
                &temp_bits,
8981
0
                &u4_tu_sad,
8982
0
                &zero_cols,
8983
0
                &zero_rows,
8984
0
                &au1_is_recon_available[0],
8985
0
                perform_rdoq,  //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
8986
0
                perform_sbh,
8987
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
8988
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
8989
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
8990
0
                                          (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
8991
0
                                             100.0,
8992
0
                ps_prms->u1_is_cu_noisy,
8993
0
#endif
8994
0
                u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
8995
0
                1 /*early cbf*/
8996
0
            );  //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
8997
8998
            /* Accumulate luma residual bits */
8999
0
            ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
9000
9001
            /* RDOPT copy States :  New updated after curr TU to TU init */
9002
0
            if(0 != cbf)
9003
0
            {
9004
                /* update to new state only if CBF is non zero */
9005
0
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9006
0
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9007
0
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9008
0
                            .s_cabac_ctxt.au1_ctxt_models[0] +
9009
0
                        IHEVC_CAB_COEFFX_PREFIX,
9010
0
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9011
0
            }
9012
9013
            /* accumulate the TU sad into cu sad */
9014
0
            ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
9015
0
            ps_tu->b1_y_cbf = cbf;
9016
0
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
9017
9018
            /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
9019
0
            if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
9020
0
            {
9021
0
                WORD32 num_4x4_in_cu = u1_cu_size >> 2;
9022
0
                nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
9023
0
                ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
9024
0
                ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
9025
                /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
9026
0
                ps_cur_nbr_4x4->b1_y_cbf = cbf;
9027
                /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
9028
0
                ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
9029
                /* Qp and cbf are stored for the all 4x4 in TU */
9030
0
                {
9031
0
                    WORD32 i, j;
9032
0
                    nbr_4x4_t *ps_tmp_4x4;
9033
0
                    ps_tmp_4x4 = ps_cur_nbr_4x4;
9034
9035
0
                    for(i = 0; i < num_4x4_in_tu; i++)
9036
0
                    {
9037
0
                        for(j = 0; j < num_4x4_in_tu; j++)
9038
0
                        {
9039
0
                            ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
9040
0
                            ps_tmp_4x4[j].b1_y_cbf = cbf;
9041
0
                        }
9042
                        /* row level update*/
9043
0
                        ps_tmp_4x4 += num_4x4_in_cu;
9044
0
                    }
9045
0
                }
9046
0
            }
9047
0
        }
9048
5.17M
        else
9049
5.17M
        {
9050
5.17M
            zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
9051
5.17M
            zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
9052
9053
5.17M
            if(ps_prms->u1_will_cabac_state_change)
9054
5.17M
            {
9055
5.17M
                num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
9056
5.17M
            }
9057
0
            else
9058
0
            {
9059
0
                num_bytes = 0;
9060
0
            }
9061
9062
            /* copy luma ecd data to final buffer */
9063
5.17M
            memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
9064
9065
5.17M
            pu1_old_ecd_data += num_bytes;
9066
9067
5.17M
            au1_is_recon_available[0] = 0;
9068
5.17M
        }
9069
9070
        /**-------- Compute Recon data (Do IT & Recon) : Luma  -----------**/
9071
5.17M
        if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9072
5.17M
           (!u1_compute_spatial_ssd_luma ||
9073
5.11M
            (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
9074
5.11M
        {
9075
5.11M
            if(!ps_recon_datastore->u1_is_lumaRecon_available ||
9076
5.11M
               (ps_recon_datastore->u1_is_lumaRecon_available &&
9077
3.00M
                (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
9078
2.41M
            {
9079
2.41M
                ihevce_it_recon_fxn(
9080
2.41M
                    ps_ctxt,
9081
2.41M
                    pi2_cur_deq_data,
9082
2.41M
                    cu_size,
9083
2.41M
                    pu1_cur_pred,
9084
2.41M
                    pred_strd,
9085
2.41M
                    pu1_cur_luma_recon,
9086
2.41M
                    recon_luma_strd,
9087
2.41M
                    pu1_final_ecd_data,
9088
2.41M
                    trans_size,
9089
2.41M
                    packed_pred_mode,
9090
2.41M
                    ps_tu->b1_y_cbf,
9091
2.41M
                    zero_cols,
9092
2.41M
                    zero_rows);
9093
2.41M
            }
9094
2.70M
            else if(
9095
2.70M
                ps_recon_datastore->u1_is_lumaRecon_available &&
9096
2.70M
                (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
9097
2.70M
            {
9098
2.70M
                UWORD8 *pu1_recon_src =
9099
2.70M
                    ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
9100
2.70M
                         [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
9101
2.70M
                    cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
9102
9103
2.70M
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
9104
2.70M
                    pu1_cur_luma_recon,
9105
2.70M
                    recon_luma_strd,
9106
2.70M
                    pu1_recon_src,
9107
2.70M
                    ps_recon_datastore->i4_lumaRecon_stride,
9108
2.70M
                    trans_size,
9109
2.70M
                    trans_size);
9110
2.70M
            }
9111
5.11M
        }
9112
9113
5.17M
        if(ps_prms->u1_will_cabac_state_change)
9114
5.17M
        {
9115
5.17M
            ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
9116
5.17M
        }
9117
9118
5.17M
        pu1_final_ecd_data += num_bytes;
9119
        /* update total bytes consumed */
9120
5.17M
        total_bytes += num_bytes;
9121
9122
5.17M
        u1_is_cu_coded |= ps_tu->b1_y_cbf;
9123
9124
        /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
9125
5.17M
        if(1 == chrm_present_flag)
9126
3.91M
        {
9127
3.91M
            pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
9128
3.91M
            pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
9129
3.91M
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
9130
9131
3.91M
            pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
9132
3.91M
            pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
9133
3.91M
                                 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
9134
9135
3.91M
            pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
9136
3.91M
            pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
9137
3.91M
                                    (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
9138
9139
3.91M
            pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
9140
3.91M
            pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
9141
3.91M
                                     (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
9142
9143
3.91M
            if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
9144
3.91M
               (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
9145
3.91M
               (PRED_MODE_INTRA != packed_pred_mode))
9146
0
            {
9147
0
                WORD32 i4_num_bytes;
9148
0
                UWORD8 *pu1_chroma_pred;
9149
0
                UWORD8 *pu1_chroma_recon;
9150
0
                WORD16 *pi2_chroma_deq;
9151
0
                UWORD32 u4_zero_col;
9152
0
                UWORD32 u4_zero_row;
9153
9154
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9155
0
                {
9156
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9157
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9158
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9159
9160
0
                    if(0 == u1_is_422)
9161
0
                    {
9162
0
                        i4_subtu_pos_y >>= 1;
9163
0
                    }
9164
9165
0
                    pu1_chroma_pred =
9166
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9167
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9168
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9169
0
                    pi2_chroma_deq =
9170
0
                        pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
9171
9172
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9173
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9174
9175
0
                    if(ps_prms->u1_will_cabac_state_change)
9176
0
                    {
9177
0
                        i4_num_bytes =
9178
0
                            ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9179
0
                    }
9180
0
                    else
9181
0
                    {
9182
0
                        i4_num_bytes = 0;
9183
0
                    }
9184
9185
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9186
9187
0
                    pu1_old_ecd_data += i4_num_bytes;
9188
9189
0
                    au1_is_recon_available[U_PLANE] = 0;
9190
9191
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9192
0
                       (!u1_compute_spatial_ssd_chroma ||
9193
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9194
0
                    {
9195
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9196
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9197
0
                            (UCHAR_MAX ==
9198
0
                             ps_recon_datastore
9199
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9200
0
                        {
9201
0
                            ihevce_chroma_it_recon_fxn(
9202
0
                                ps_ctxt,
9203
0
                                pi2_chroma_deq,
9204
0
                                cu_size,
9205
0
                                pu1_chroma_pred,
9206
0
                                pred_chrm_strd,
9207
0
                                pu1_chroma_recon,
9208
0
                                recon_chrma_strd,
9209
0
                                pu1_final_ecd_data,
9210
0
                                chroma_trans_size,
9211
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9212
0
                                u4_zero_col,
9213
0
                                u4_zero_row,
9214
0
                                U_PLANE);
9215
0
                        }
9216
0
                        else if(
9217
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9218
0
                            (UCHAR_MAX !=
9219
0
                             ps_recon_datastore
9220
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9221
0
                        {
9222
0
                            UWORD8 *pu1_recon_src =
9223
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9224
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9225
0
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9226
0
                                i4_subtu_pos_x +
9227
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9228
9229
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9230
0
                                pu1_recon_src,
9231
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9232
0
                                pu1_chroma_recon,
9233
0
                                recon_chrma_strd,
9234
0
                                chroma_trans_size,
9235
0
                                chroma_trans_size,
9236
0
                                U_PLANE);
9237
0
                        }
9238
0
                    }
9239
9240
0
                    u1_is_cu_coded |=
9241
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9242
9243
0
                    pu1_final_ecd_data += i4_num_bytes;
9244
0
                    total_bytes += i4_num_bytes;
9245
0
                }
9246
9247
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9248
0
                {
9249
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9250
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9251
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9252
9253
0
                    if(0 == u1_is_422)
9254
0
                    {
9255
0
                        i4_subtu_pos_y >>= 1;
9256
0
                    }
9257
9258
0
                    pu1_chroma_pred =
9259
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9260
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9261
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9262
0
                    pi2_chroma_deq = pi2_cur_deq_data_chrm +
9263
0
                                     (i4_subtu_idx * chroma_trans_size * cu_size) +
9264
0
                                     chroma_trans_size;
9265
9266
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9267
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9268
9269
0
                    if(ps_prms->u1_will_cabac_state_change)
9270
0
                    {
9271
0
                        i4_num_bytes =
9272
0
                            ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9273
0
                    }
9274
0
                    else
9275
0
                    {
9276
0
                        i4_num_bytes = 0;
9277
0
                    }
9278
9279
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9280
9281
0
                    pu1_old_ecd_data += i4_num_bytes;
9282
9283
0
                    au1_is_recon_available[V_PLANE] = 0;
9284
9285
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9286
0
                       (!u1_compute_spatial_ssd_chroma ||
9287
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9288
0
                    {
9289
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9290
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9291
0
                            (UCHAR_MAX ==
9292
0
                             ps_recon_datastore
9293
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9294
0
                        {
9295
0
                            ihevce_chroma_it_recon_fxn(
9296
0
                                ps_ctxt,
9297
0
                                pi2_chroma_deq,
9298
0
                                cu_size,
9299
0
                                pu1_chroma_pred,
9300
0
                                pred_chrm_strd,
9301
0
                                pu1_chroma_recon,
9302
0
                                recon_chrma_strd,
9303
0
                                pu1_final_ecd_data,
9304
0
                                chroma_trans_size,
9305
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9306
0
                                u4_zero_col,
9307
0
                                u4_zero_row,
9308
0
                                V_PLANE);
9309
0
                        }
9310
0
                        else if(
9311
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9312
0
                            (UCHAR_MAX !=
9313
0
                             ps_recon_datastore
9314
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9315
0
                        {
9316
0
                            UWORD8 *pu1_recon_src =
9317
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9318
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9319
0
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9320
0
                                i4_subtu_pos_x +
9321
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9322
9323
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9324
0
                                pu1_recon_src,
9325
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9326
0
                                pu1_chroma_recon,
9327
0
                                recon_chrma_strd,
9328
0
                                chroma_trans_size,
9329
0
                                chroma_trans_size,
9330
0
                                V_PLANE);
9331
0
                        }
9332
0
                    }
9333
9334
0
                    u1_is_cu_coded |=
9335
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9336
9337
0
                    pu1_final_ecd_data += i4_num_bytes;
9338
0
                    total_bytes += i4_num_bytes;
9339
0
                }
9340
0
            }
9341
3.91M
            else
9342
3.91M
            {
9343
3.91M
                WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
9344
9345
7.83M
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9346
3.91M
                {
9347
3.91M
                    WORD32 cb_cbf, cr_cbf;
9348
3.91M
                    WORD32 cb_num_bytes, cr_num_bytes;
9349
9350
3.91M
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9351
9352
3.91M
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9353
3.91M
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9354
9355
3.91M
                    if(0 == u1_is_422)
9356
3.91M
                    {
9357
3.91M
                        i4_subtu_pos_y >>= 1;
9358
3.91M
                    }
9359
9360
3.91M
                    pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
9361
3.91M
                    pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9362
3.91M
                    pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9363
3.91M
                    pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
9364
9365
3.91M
                    if((PRED_MODE_INTRA == packed_pred_mode) &&
9366
3.91M
                       (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
9367
563k
                    {
9368
563k
                        WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
9369
563k
                        UWORD8 *pu1_left_chrm;
9370
563k
                        UWORD8 *pu1_top_chrm;
9371
563k
                        UWORD8 *pu1_top_left_chrm;
9372
9373
563k
                        nbr_flags = ihevce_get_intra_chroma_tu_nbr(
9374
563k
                            *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
9375
9376
                        /* left cu boundary */
9377
563k
                        if(0 == i4_subtu_pos_x)
9378
483k
                        {
9379
483k
                            left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
9380
483k
                            pu1_left_chrm =
9381
483k
                                ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
9382
483k
                        }
9383
79.8k
                        else
9384
79.8k
                        {
9385
79.8k
                            pu1_left_chrm = pu1_cur_chroma_recon - 2;
9386
79.8k
                            left_strd_chrm = recon_chrma_strd;
9387
79.8k
                        }
9388
9389
                        /* top cu boundary */
9390
563k
                        if(0 == i4_subtu_pos_y)
9391
483k
                        {
9392
483k
                            pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
9393
483k
                        }
9394
79.8k
                        else
9395
79.8k
                        {
9396
79.8k
                            pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
9397
79.8k
                        }
9398
9399
                        /* by default top left is set to cu top left */
9400
563k
                        pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
9401
9402
                        /* top left based on position */
9403
563k
                        if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
9404
39.9k
                        {
9405
39.9k
                            pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
9406
39.9k
                        }
9407
523k
                        else if(0 != i4_subtu_pos_x)
9408
79.8k
                        {
9409
79.8k
                            pu1_top_left_chrm = pu1_top_chrm - 2;
9410
79.8k
                        }
9411
9412
                        /* call the chroma reference array substitution */
9413
563k
                        ihevc_intra_pred_chroma_ref_substitution_fptr(
9414
563k
                            pu1_top_left_chrm,
9415
563k
                            pu1_top_chrm,
9416
563k
                            pu1_left_chrm,
9417
563k
                            left_strd_chrm,
9418
563k
                            chroma_trans_size,
9419
563k
                            nbr_flags,
9420
563k
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9421
563k
                            1);
9422
9423
                        /* use the look up to get the function idx */
9424
563k
                        chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
9425
9426
                        /* call the intra prediction function */
9427
563k
                        ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
9428
563k
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9429
563k
                            1,
9430
563k
                            pu1_cur_pred_chrm,
9431
563k
                            pred_chrm_strd,
9432
563k
                            chroma_trans_size,
9433
563k
                            chroma_pred_mode);
9434
563k
                    }
9435
9436
                    /**---------- Compute iq&coeff data if required : Chroma ------------**/
9437
3.91M
                    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
9438
1.25M
                    {
9439
1.25M
                        WORD32 perform_sbh, perform_rdoq, temp_bits;
9440
9441
1.25M
                        if(ps_prms->u1_recompute_sbh_and_rdoq)
9442
0
                        {
9443
0
                            perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
9444
0
                            perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
9445
0
                        }
9446
1.25M
                        else
9447
1.25M
                        {
9448
                            /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
9449
1.25M
                            perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
9450
                            /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
9451
                        we would have to do RDOQ again.*/
9452
1.25M
                            perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
9453
1.25M
                        }
9454
9455
                        /* populate the coeffs scan idx */
9456
1.25M
                        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
9457
9458
1.25M
                        if(PRED_MODE_INTRA == packed_pred_mode)
9459
563k
                        {
9460
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
9461
563k
                            if(4 == chroma_trans_size)
9462
289k
                            {
9463
                                /* for modes from 22 upto 30 horizontal scan is used */
9464
289k
                                if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
9465
40.8k
                                {
9466
40.8k
                                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
9467
40.8k
                                }
9468
                                /* for modes from 6 upto 14 horizontal scan is used */
9469
248k
                                else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
9470
70.7k
                                {
9471
70.7k
                                    ps_ctxt->i4_scan_idx = SCAN_VERT;
9472
70.7k
                                }
9473
289k
                            }
9474
563k
                        }
9475
9476
#if DISABLE_RDOQ_INTRA
9477
                        if(PRED_MODE_INTRA == packed_pred_mode)
9478
                        {
9479
                            perform_rdoq = 0;
9480
                        }
9481
#endif
9482
9483
                        /* RDOPT copy States :  TU init (best until prev TU) to current */
9484
1.25M
                        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9485
1.25M
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9486
1.25M
                                    .s_cabac_ctxt.au1_ctxt_models[0] +
9487
1.25M
                                IHEVC_CAB_COEFFX_PREFIX,
9488
1.25M
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9489
1.25M
                            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9490
9491
1.25M
                        ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
9492
                        /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
9493
                    so that all candidates and best candidate are quantized with same rounding factor  */
9494
1.25M
                        if(1 == perform_rdoq)
9495
0
                        {
9496
0
                            ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
9497
0
                        }
9498
9499
1.25M
                        if(!ps_best_cu_prms->u1_skip_flag ||
9500
1.25M
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9501
1.25M
                        {
9502
                            /* Cb */
9503
1.25M
                            cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9504
1.25M
                                ps_ctxt,
9505
1.25M
                                pu1_cur_pred_chrm,
9506
1.25M
                                pred_chrm_strd,
9507
1.25M
                                pu1_cur_src_chrm,
9508
1.25M
                                src_chrm_strd,
9509
1.25M
                                pi2_cur_deq_data_chrm,
9510
1.25M
                                cu_size,
9511
1.25M
                                pu1_chrm_recon,
9512
1.25M
                                recon_chrma_strd,
9513
1.25M
                                pu1_final_ecd_data,
9514
1.25M
                                pu1_csbf_buf,
9515
1.25M
                                csbf_strd,
9516
1.25M
                                chroma_trans_size,
9517
1.25M
                                ps_ctxt->i4_scan_idx,
9518
1.25M
                                (PRED_MODE_INTRA == packed_pred_mode),
9519
1.25M
                                &cb_num_bytes,
9520
1.25M
                                &temp_bits,
9521
1.25M
                                &cb_zero_col,
9522
1.25M
                                &cb_zero_row,
9523
1.25M
                                &au1_is_recon_available[U_PLANE],
9524
1.25M
                                perform_sbh,
9525
1.25M
                                perform_rdoq,
9526
1.25M
                                &i8_ssd,
9527
1.25M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9528
1.25M
                                !ps_ctxt->u1_is_refPic
9529
1.25M
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9530
1.25M
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9531
1.15M
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9532
1.15M
                                          100.0,
9533
1.25M
                                ps_prms->u1_is_cu_noisy,
9534
1.25M
#endif
9535
1.25M
                                ps_best_cu_prms->u1_skip_flag &&
9536
1.25M
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9537
1.25M
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9538
1.25M
                                                              : FREQUENCY_DOMAIN_SSD,
9539
1.25M
                                U_PLANE);
9540
1.25M
                        }
9541
0
                        else
9542
0
                        {
9543
0
                            cb_cbf = 0;
9544
0
                            temp_bits = 0;
9545
0
                            cb_num_bytes = 0;
9546
0
                            au1_is_recon_available[U_PLANE] = 0;
9547
0
                            cb_zero_col = 0;
9548
0
                            cb_zero_row = 0;
9549
0
                        }
9550
9551
                        /* Accumulate chroma residual bits */
9552
1.25M
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9553
9554
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9555
1.25M
                        if(0 != cb_cbf)
9556
294k
                        {
9557
294k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9558
294k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9559
294k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9560
294k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9561
294k
                                    IHEVC_CAB_COEFFX_PREFIX,
9562
294k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9563
294k
                        }
9564
                        /* RDOPT copy States :  Restoring back the Cb init state to Cr */
9565
958k
                        else
9566
958k
                        {
9567
958k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9568
958k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9569
958k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9570
958k
                                    IHEVC_CAB_COEFFX_PREFIX,
9571
958k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9572
958k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9573
958k
                        }
9574
9575
1.25M
                        if(!ps_best_cu_prms->u1_skip_flag ||
9576
1.25M
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9577
1.25M
                        {
9578
                            /* Cr */
9579
1.25M
                            cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9580
1.25M
                                ps_ctxt,
9581
1.25M
                                pu1_cur_pred_chrm,
9582
1.25M
                                pred_chrm_strd,
9583
1.25M
                                pu1_cur_src_chrm,
9584
1.25M
                                src_chrm_strd,
9585
1.25M
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9586
1.25M
                                cu_size,
9587
1.25M
                                pu1_chrm_recon,
9588
1.25M
                                recon_chrma_strd,
9589
1.25M
                                pu1_final_ecd_data + cb_num_bytes,
9590
1.25M
                                pu1_csbf_buf,
9591
1.25M
                                csbf_strd,
9592
1.25M
                                chroma_trans_size,
9593
1.25M
                                ps_ctxt->i4_scan_idx,
9594
1.25M
                                (PRED_MODE_INTRA == packed_pred_mode),
9595
1.25M
                                &cr_num_bytes,
9596
1.25M
                                &temp_bits,
9597
1.25M
                                &cr_zero_col,
9598
1.25M
                                &cr_zero_row,
9599
1.25M
                                &au1_is_recon_available[V_PLANE],
9600
1.25M
                                perform_sbh,
9601
1.25M
                                perform_rdoq,
9602
1.25M
                                &i8_ssd,
9603
1.25M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9604
1.25M
                                !ps_ctxt->u1_is_refPic
9605
1.25M
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9606
1.25M
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9607
1.15M
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9608
1.15M
                                          100.0,
9609
1.25M
                                ps_prms->u1_is_cu_noisy,
9610
1.25M
#endif
9611
1.25M
                                ps_best_cu_prms->u1_skip_flag &&
9612
1.25M
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9613
1.25M
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9614
1.25M
                                                              : FREQUENCY_DOMAIN_SSD,
9615
1.25M
                                V_PLANE);
9616
1.25M
                        }
9617
0
                        else
9618
0
                        {
9619
0
                            cr_cbf = 0;
9620
0
                            temp_bits = 0;
9621
0
                            cr_num_bytes = 0;
9622
0
                            au1_is_recon_available[V_PLANE] = 0;
9623
0
                            cr_zero_col = 0;
9624
0
                            cr_zero_row = 0;
9625
0
                        }
9626
9627
                        /* Accumulate chroma residual bits */
9628
1.25M
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9629
9630
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9631
1.25M
                        if(0 != cr_cbf)
9632
290k
                        {
9633
290k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9634
290k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9635
290k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9636
290k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9637
290k
                                    IHEVC_CAB_COEFFX_PREFIX,
9638
290k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9639
290k
                        }
9640
9641
1.25M
                        if(0 == i4_subtu_idx)
9642
1.25M
                        {
9643
1.25M
                            ps_tu->b1_cb_cbf = cb_cbf;
9644
1.25M
                            ps_tu->b1_cr_cbf = cr_cbf;
9645
1.25M
                        }
9646
0
                        else
9647
0
                        {
9648
0
                            ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
9649
0
                            ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
9650
0
                        }
9651
1.25M
                    }
9652
2.66M
                    else
9653
2.66M
                    {
9654
2.66M
                        cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9655
2.66M
                        cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9656
2.66M
                        cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9657
2.66M
                        cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9658
9659
2.66M
                        if(ps_prms->u1_will_cabac_state_change)
9660
2.66M
                        {
9661
2.66M
                            cb_num_bytes =
9662
2.66M
                                ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9663
2.66M
                        }
9664
0
                        else
9665
0
                        {
9666
0
                            cb_num_bytes = 0;
9667
0
                        }
9668
9669
2.66M
                        if(ps_prms->u1_will_cabac_state_change)
9670
2.66M
                        {
9671
2.66M
                            cr_num_bytes =
9672
2.66M
                                ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9673
2.66M
                        }
9674
0
                        else
9675
0
                        {
9676
0
                            cr_num_bytes = 0;
9677
0
                        }
9678
9679
                        /* copy cb ecd data to final buffer */
9680
2.66M
                        memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
9681
9682
2.66M
                        pu1_chrm_old_ecd_data += cb_num_bytes;
9683
9684
                        /* copy cb ecd data to final buffer */
9685
2.66M
                        memcpy(
9686
2.66M
                            (pu1_final_ecd_data + cb_num_bytes),
9687
2.66M
                            pu1_chrm_old_ecd_data,
9688
2.66M
                            cr_num_bytes);
9689
9690
2.66M
                        pu1_chrm_old_ecd_data += cr_num_bytes;
9691
9692
2.66M
                        au1_is_recon_available[U_PLANE] = 0;
9693
2.66M
                        au1_is_recon_available[V_PLANE] = 0;
9694
2.66M
                    }
9695
9696
                    /**-------- Compute Recon data (Do IT & Recon) : Chroma  -----------**/
9697
3.91M
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9698
3.91M
                       (!u1_compute_spatial_ssd_chroma ||
9699
3.86M
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9700
3.86M
                    {
9701
3.86M
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9702
3.86M
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9703
1.13M
                            (UCHAR_MAX ==
9704
1.13M
                             ps_recon_datastore
9705
1.13M
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9706
3.69M
                        {
9707
3.69M
                            ihevce_chroma_it_recon_fxn(
9708
3.69M
                                ps_ctxt,
9709
3.69M
                                pi2_cur_deq_data_chrm,
9710
3.69M
                                cu_size,
9711
3.69M
                                pu1_cur_pred_chrm,
9712
3.69M
                                pred_chrm_strd,
9713
3.69M
                                pu1_cur_chroma_recon,
9714
3.69M
                                recon_chrma_strd,
9715
3.69M
                                pu1_final_ecd_data,
9716
3.69M
                                chroma_trans_size,
9717
3.69M
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9718
3.69M
                                cb_zero_col,
9719
3.69M
                                cb_zero_row,
9720
3.69M
                                U_PLANE);
9721
3.69M
                        }
9722
169k
                        else if(
9723
169k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9724
169k
                            (UCHAR_MAX !=
9725
169k
                             ps_recon_datastore
9726
169k
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9727
169k
                        {
9728
169k
                            UWORD8 *pu1_recon_src =
9729
169k
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9730
169k
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9731
169k
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9732
169k
                                i4_subtu_pos_x +
9733
169k
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9734
9735
169k
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9736
169k
                                pu1_recon_src,
9737
169k
                                ps_recon_datastore->i4_lumaRecon_stride,
9738
169k
                                pu1_cur_chroma_recon,
9739
169k
                                recon_chrma_strd,
9740
169k
                                chroma_trans_size,
9741
169k
                                chroma_trans_size,
9742
169k
                                U_PLANE);
9743
169k
                        }
9744
3.86M
                    }
9745
9746
3.91M
                    u1_is_cu_coded |=
9747
3.91M
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9748
9749
3.91M
                    if(ps_prms->u1_will_cabac_state_change)
9750
3.91M
                    {
9751
3.91M
                        ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
9752
3.91M
                    }
9753
9754
3.91M
                    pu1_final_ecd_data += cb_num_bytes;
9755
                    /* update total bytes consumed */
9756
3.91M
                    total_bytes += cb_num_bytes;
9757
9758
3.91M
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9759
3.91M
                       (!u1_compute_spatial_ssd_chroma ||
9760
3.86M
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9761
3.86M
                    {
9762
3.86M
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9763
3.86M
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9764
1.13M
                            (UCHAR_MAX ==
9765
1.13M
                             ps_recon_datastore
9766
1.13M
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9767
3.69M
                        {
9768
3.69M
                            ihevce_chroma_it_recon_fxn(
9769
3.69M
                                ps_ctxt,
9770
3.69M
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9771
3.69M
                                cu_size,
9772
3.69M
                                pu1_cur_pred_chrm,
9773
3.69M
                                pred_chrm_strd,
9774
3.69M
                                pu1_cur_chroma_recon,
9775
3.69M
                                recon_chrma_strd,
9776
3.69M
                                pu1_final_ecd_data,
9777
3.69M
                                chroma_trans_size,
9778
3.69M
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9779
3.69M
                                cr_zero_col,
9780
3.69M
                                cr_zero_row,
9781
3.69M
                                V_PLANE);
9782
3.69M
                        }
9783
169k
                        else if(
9784
169k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9785
169k
                            (UCHAR_MAX !=
9786
169k
                             ps_recon_datastore
9787
169k
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9788
169k
                        {
9789
169k
                            UWORD8 *pu1_recon_src =
9790
169k
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9791
169k
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9792
169k
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9793
169k
                                i4_subtu_pos_x +
9794
169k
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9795
9796
169k
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9797
169k
                                pu1_recon_src,
9798
169k
                                ps_recon_datastore->i4_lumaRecon_stride,
9799
169k
                                pu1_cur_chroma_recon,
9800
169k
                                recon_chrma_strd,
9801
169k
                                chroma_trans_size,
9802
169k
                                chroma_trans_size,
9803
169k
                                V_PLANE);
9804
169k
                        }
9805
3.86M
                    }
9806
9807
3.91M
                    u1_is_cu_coded |=
9808
3.91M
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9809
9810
3.91M
                    if(ps_prms->u1_will_cabac_state_change)
9811
3.91M
                    {
9812
3.91M
                        ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
9813
3.91M
                    }
9814
9815
3.91M
                    pu1_final_ecd_data += cr_num_bytes;
9816
                    /* update total bytes consumed */
9817
3.91M
                    total_bytes += cr_num_bytes;
9818
3.91M
                }
9819
3.91M
            }
9820
3.91M
        }
9821
1.25M
        else
9822
1.25M
        {
9823
1.25M
            ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
9824
1.25M
            ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
9825
1.25M
            ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
9826
1.25M
            ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
9827
1.25M
            ps_tu->b1_cb_cbf = 0;
9828
1.25M
            ps_tu->b1_cr_cbf = 0;
9829
1.25M
            ps_tu->b1_cb_cbf_subtu1 = 0;
9830
1.25M
            ps_tu->b1_cr_cbf_subtu1 = 0;
9831
1.25M
        }
9832
9833
        /* Update to next TU */
9834
5.17M
        ps_tu_enc_loop++;
9835
5.17M
        ps_tu_enc_loop_temp_prms++;
9836
9837
5.17M
        pu4_nbr_flags++;
9838
5.17M
        pu1_intra_pred_mode++;
9839
9840
        /*Do not set the nbr map for last pu in cu */
9841
5.17M
        if((num_tu_in_cu - 1) != ctr)
9842
2.25M
        {
9843
            /* set the neighbour map to 1 */
9844
2.25M
            ihevce_set_nbr_map(
9845
2.25M
                ps_ctxt->pu1_ctb_nbr_map,
9846
2.25M
                ps_ctxt->i4_nbr_map_strd,
9847
2.25M
                cu_pos_x_in_4x4,
9848
2.25M
                cu_pos_y_in_4x4,
9849
2.25M
                (trans_size >> 2),
9850
2.25M
                1);
9851
2.25M
        }
9852
5.17M
    }
9853
9854
2.92M
    if(ps_prms->u1_will_cabac_state_change)
9855
2.92M
    {
9856
2.92M
        ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
9857
9858
        /* Modify skip flag, if luma is skipped & Chroma is coded */
9859
2.92M
        if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
9860
1.73k
        {
9861
1.73k
            ps_best_cu_prms->u1_skip_flag = 0;
9862
1.73k
        }
9863
2.92M
    }
9864
9865
    /* during chroma evaluation if skip decision was over written     */
9866
    /* then the current skip candidate is set to a non skip candidate */
9867
2.92M
    if(PRED_MODE_INTRA != packed_pred_mode)
9868
1.42M
    {
9869
1.42M
        ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
9870
1.42M
    }
9871
9872
    /**------------- Compute header data if required --------------**/
9873
2.92M
    if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
9874
858k
    {
9875
858k
        WORD32 cbf_bits;
9876
858k
        WORD32 cu_bits;
9877
858k
        WORD32 unit_4x4_size = cu_size >> 2;
9878
9879
        /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
9880
        be copied as the base reference for the next cu
9881
        Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
9882
        luma and chroma are being reevaluated*/
9883
858k
        COPY_CABAC_STATES(
9884
858k
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9885
858k
                 .s_cabac_ctxt.au1_ctxt_models[0],
9886
858k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
9887
858k
            IHEVC_CAB_CTXT_END);
9888
9889
        /* get the neighbour availability flags for current cu  */
9890
858k
        ihevce_get_only_nbr_flag(
9891
858k
            &s_nbr,
9892
858k
            ps_ctxt->pu1_ctb_nbr_map,
9893
858k
            ps_ctxt->i4_nbr_map_strd,
9894
858k
            (cu_pos_x << 1),
9895
858k
            (cu_pos_y << 1),
9896
858k
            unit_4x4_size,
9897
858k
            unit_4x4_size);
9898
9899
858k
        cu_bits = ihevce_entropy_rdo_encode_cu(
9900
858k
            &ps_ctxt->s_rdopt_entropy_ctxt,
9901
858k
            ps_best_cu_prms,
9902
858k
            cu_pos_x,
9903
858k
            cu_pos_y,
9904
858k
            cu_size,
9905
858k
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
9906
858k
                                           : s_nbr.u1_top_avail,
9907
858k
            s_nbr.u1_left_avail,
9908
858k
            (pu1_final_ecd_data - total_bytes),
9909
858k
            &cbf_bits);
9910
9911
        /* cbf bits are excluded from header bits, instead considered as texture bits */
9912
858k
        ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
9913
858k
        ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
9914
858k
    }
9915
9916
2.92M
    if(ps_prms->u1_will_cabac_state_change)
9917
2.92M
    {
9918
2.92M
        ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
9919
2.92M
    }
9920
2.92M
}
9921
9922
/*!
9923
******************************************************************************
9924
* \if Function name : ihevce_set_eval_flags \endif
9925
*
9926
* \brief
9927
*    Function which decides which eval flags have to be set based on present
9928
*    and RDOQ conditions
9929
*
9930
* \param[in] ps_ctxt : encoder ctxt pointer
9931
* \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
9932
*
9933
* \return
9934
*    None
9935
*
9936
* \author
9937
*  Ittiam
9938
*
9939
*****************************************************************************
9940
*/
9941
void ihevce_set_eval_flags(
9942
    ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
9943
2.92M
{
9944
2.92M
    WORD32 count = 0;
9945
9946
2.92M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
9947
9948
2.92M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
9949
2.92M
        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
9950
9951
2.92M
    if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
9952
35.6k
    {
9953
35.6k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
9954
35.6k
    }
9955
2.88M
    else
9956
2.88M
    {
9957
2.88M
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
9958
2.88M
    }
9959
9960
2.92M
    if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
9961
2.92M
       (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
9962
0
    {
9963
        /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
9964
        RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
9965
        for the current CU will change. Therefore, we need to reevaluate the pred data*/
9966
0
        if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
9967
0
           (ps_enc_loop_bestprms->u1_intra_flag == 1))
9968
0
        {
9969
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
9970
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
9971
0
        }
9972
0
        if(ps_enc_loop_bestprms->u1_skip_flag == 1)
9973
0
        {
9974
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9975
0
            {
9976
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9977
0
                    .b1_eval_luma_iq_and_coeff_data = 0;
9978
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9979
0
                    .b1_eval_chroma_iq_and_coeff_data = 0;
9980
0
            }
9981
0
        }
9982
0
        else
9983
0
        {
9984
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9985
0
            {
9986
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9987
0
                    .b1_eval_luma_iq_and_coeff_data = 1;
9988
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9989
0
                    .b1_eval_chroma_iq_and_coeff_data = 1;
9990
0
            }
9991
0
        }
9992
0
    }
9993
2.92M
    else
9994
2.92M
    {
9995
2.92M
        switch(ps_ctxt->i4_quality_preset)
9996
2.92M
        {
9997
1.65M
        case IHEVCE_QUALITY_P0:
9998
1.81M
        case IHEVCE_QUALITY_P2:
9999
2.06M
        case IHEVCE_QUALITY_P3:
10000
2.06M
        {
10001
5.58M
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10002
3.51M
            {
10003
3.51M
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10004
3.51M
                    .b1_eval_luma_iq_and_coeff_data = 0;
10005
3.51M
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10006
3.51M
                    .b1_eval_chroma_iq_and_coeff_data =
10007
3.51M
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10008
3.51M
            }
10009
10010
2.06M
            break;
10011
1.81M
        }
10012
170k
        case IHEVCE_QUALITY_P4:
10013
324k
        case IHEVCE_QUALITY_P5:
10014
324k
        {
10015
1.03M
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10016
712k
            {
10017
712k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10018
712k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10019
712k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10020
712k
                    .b1_eval_chroma_iq_and_coeff_data =
10021
712k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10022
712k
            }
10023
10024
324k
            break;
10025
170k
        }
10026
533k
        case IHEVCE_QUALITY_P6:
10027
533k
        {
10028
1.48M
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10029
948k
            {
10030
948k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10031
948k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10032
948k
#if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
10033
948k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10034
948k
                    .b1_eval_chroma_iq_and_coeff_data =
10035
948k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10036
#else
10037
                if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
10038
                   (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
10039
                {
10040
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10041
                        .b1_eval_chroma_iq_and_coeff_data =
10042
                        ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
10043
                }
10044
                else
10045
                {
10046
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10047
                        .b1_eval_chroma_iq_and_coeff_data =
10048
                        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10049
                }
10050
#endif
10051
948k
            }
10052
10053
533k
            break;
10054
170k
        }
10055
0
        default:
10056
0
        {
10057
0
            break;
10058
170k
        }
10059
2.92M
        }
10060
2.92M
    }
10061
10062
    /* Not recomputing Luma pred-data and header data for any preset now */
10063
2.92M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
10064
2.92M
}
10065
10066
/**
10067
******************************************************************************
10068
*
10069
*  @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
10070
*         (not coded children) into a parent node(not coded).
10071
*
10072
*  @par   Description
10073
*         This is required post RDO evaluation as TU decisions are
10074
*         pre-determined(pre RDO) based on recursive SATD,
10075
*         while the quad children TU's can be skipped during RDO
10076
*
10077
*         The shrink process is applied iteratively till there are no
10078
*         more modes to shrink
10079
*
10080
*  @param[inout]   ps_tu_enc_loop
10081
*       pointer to tu enc loop params of inter cu
10082
*
10083
*  @param[inout]   ps_tu_enc_loop_temp_prms
10084
*       pointer to temp tu enc loop params of inter cu
10085
*
10086
*  @param[in]   num_tu_in_cu
10087
*       number of tus in cu
10088
*
10089
*  @return      modified number of tus in cu
10090
*
10091
******************************************************************************
10092
*/
10093
WORD32 ihevce_shrink_inter_tu_tree(
10094
    tu_enc_loop_out_t *ps_tu_enc_loop,
10095
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
10096
    recon_datastore_t *ps_recon_datastore,
10097
    WORD32 num_tu_in_cu,
10098
    UWORD8 u1_is_422)
10099
475k
{
10100
475k
    WORD32 recurse = 1;
10101
475k
    WORD32 ctr;
10102
10103
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10104
    /* Post RDO, if all 4 child nodes are not coded the overheads of split TU    */
10105
    /* flags and cbf flags are saved by merging to parent node and marking       */
10106
    /* parent TU as not coded                                                    */
10107
    /*                                                                           */
10108
    /*                               ParentTUSplit=1                             */
10109
    /*                                      |                                    */
10110
    /*       ---------------------------------------------------------           */
10111
    /*       |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded)      */
10112
    /*                                     ||                                    */
10113
    /*                                     \/                                    */
10114
    /*                                                                           */
10115
    /*                              ParentTUSplit=0 (Not Coded)                  */
10116
    /*                                                                           */
10117
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10118
606k
    while((num_tu_in_cu > 4) && recurse)
10119
130k
    {
10120
130k
        recurse = 0;
10121
10122
        /* Validate inter CU */
10123
        //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
10124
10125
        /* loop for all tu blocks in current cu */
10126
910k
        for(ctr = 0; ctr < num_tu_in_cu;)
10127
780k
        {
10128
            /* Get current tu posx, posy and size */
10129
780k
            WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
10130
780k
            WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
10131
            /* +1 is for parents size */
10132
780k
            WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
10133
10134
            /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
10135
780k
            WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
10136
780k
            eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
10137
10138
            /* As TUs are published in encode order (Z SCAN),                      */
10139
            /* Four consecutive TUS of same size implies we have hit leaf nodes.   */
10140
780k
            if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
10141
780k
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
10142
780k
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
10143
780k
               eval_merge)
10144
232k
            {
10145
232k
                WORD32 merge_parent = 1;
10146
10147
                /* If any leaf noded is coded, it cannot be merged to parent */
10148
232k
                if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
10149
232k
                   (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
10150
10151
232k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
10152
232k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
10153
232k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
10154
10155
232k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
10156
232k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
10157
232k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
10158
10159
232k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
10160
232k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
10161
232k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
10162
222k
                {
10163
222k
                    merge_parent = 0;
10164
222k
                }
10165
10166
232k
                if(u1_is_422)
10167
0
                {
10168
0
                    if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
10169
0
                       (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
10170
10171
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
10172
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
10173
10174
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
10175
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
10176
10177
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
10178
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
10179
0
                    {
10180
0
                        merge_parent = 0;
10181
0
                    }
10182
0
                }
10183
10184
232k
                if(merge_parent)
10185
9.46k
                {
10186
                    /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
10187
10188
9.46k
                    if(ps_recon_datastore->u1_is_lumaRecon_available)
10189
174
                    {
10190
174
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
10191
10192
174
                        memmove(
10193
174
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
10194
174
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
10195
174
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10196
174
                    }
10197
10198
9.46k
                    if(ps_recon_datastore->au1_is_chromaRecon_available[0])
10199
174
                    {
10200
174
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
10201
174
                            UCHAR_MAX;
10202
174
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
10203
174
                            UCHAR_MAX;
10204
10205
174
                        memmove(
10206
174
                            &ps_recon_datastore
10207
174
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
10208
174
                            &ps_recon_datastore
10209
174
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
10210
174
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10211
10212
174
                        memmove(
10213
174
                            &ps_recon_datastore
10214
174
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
10215
174
                            &ps_recon_datastore
10216
174
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
10217
174
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10218
10219
174
                        if(u1_is_422)
10220
0
                        {
10221
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
10222
0
                                UCHAR_MAX;
10223
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
10224
0
                                UCHAR_MAX;
10225
10226
0
                            memmove(
10227
0
                                &ps_recon_datastore
10228
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
10229
0
                                &ps_recon_datastore
10230
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
10231
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10232
10233
0
                            memmove(
10234
0
                                &ps_recon_datastore
10235
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
10236
0
                                &ps_recon_datastore
10237
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
10238
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10239
0
                        }
10240
174
                    }
10241
10242
                    /* Parent node size is one more than that of child */
10243
9.46k
                    ps_tu_enc_loop[ctr].s_tu.b3_size++;
10244
10245
9.46k
                    ctr++;
10246
10247
                    /* move the subsequent TUs to next element */
10248
9.46k
                    ASSERT(num_tu_in_cu >= (ctr + 3));
10249
9.46k
                    memmove(
10250
9.46k
                        (void *)(ps_tu_enc_loop + ctr),
10251
9.46k
                        (void *)(ps_tu_enc_loop + ctr + 3),
10252
9.46k
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
10253
10254
                    /* Also memmove the temp TU params */
10255
9.46k
                    memmove(
10256
9.46k
                        (void *)(ps_tu_enc_loop_temp_prms + ctr),
10257
9.46k
                        (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
10258
9.46k
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
10259
10260
                    /* Number of TUs in CU are now less by 3 */
10261
9.46k
                    num_tu_in_cu -= 3;
10262
10263
                    /* Recurse again as new parent also be can be merged later */
10264
9.46k
                    recurse = 1;
10265
9.46k
                }
10266
222k
                else
10267
222k
                {
10268
                    /* Go to next set of leaf nodes */
10269
222k
                    ctr += 4;
10270
222k
                }
10271
232k
            }
10272
547k
            else
10273
547k
            {
10274
547k
                ctr++;
10275
547k
            }
10276
780k
        }
10277
130k
    }
10278
10279
    /* return the modified num TUs*/
10280
475k
    ASSERT(num_tu_in_cu > 0);
10281
475k
    return (num_tu_in_cu);
10282
475k
}
10283
10284
UWORD8 ihevce_intra_mode_nxn_hash_updater(
10285
    UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
10286
1.18M
{
10287
1.18M
    WORD32 i;
10288
1.18M
    WORD32 i4_mode;
10289
10290
4.73M
    for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
10291
3.55M
    {
10292
3.55M
        if(pu1_mode_array[i] < 35)
10293
3.55M
        {
10294
3.55M
            if(pu1_mode_array[i] != 0)
10295
2.81M
            {
10296
2.81M
                i4_mode = pu1_mode_array[i] - 1;
10297
10298
2.81M
                if(!pu1_hash_table[i4_mode])
10299
1.25M
                {
10300
1.25M
                    pu1_hash_table[i4_mode] = 1;
10301
1.25M
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10302
1.25M
                    u1_num_ipe_modes++;
10303
1.25M
                }
10304
2.81M
            }
10305
10306
3.55M
            if(pu1_mode_array[i] != 34)
10307
3.52M
            {
10308
3.52M
                i4_mode = pu1_mode_array[i] + 1;
10309
10310
3.52M
                if((!pu1_hash_table[i4_mode]))
10311
1.95M
                {
10312
1.95M
                    pu1_hash_table[i4_mode] = 1;
10313
1.95M
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10314
1.95M
                    u1_num_ipe_modes++;
10315
1.95M
                }
10316
3.52M
            }
10317
3.55M
        }
10318
3.55M
    }
10319
10320
1.18M
    if(!pu1_hash_table[INTRA_PLANAR])
10321
333k
    {
10322
333k
        pu1_hash_table[INTRA_PLANAR] = 1;
10323
333k
        pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
10324
333k
        u1_num_ipe_modes++;
10325
333k
    }
10326
10327
1.18M
    if(!pu1_hash_table[INTRA_DC])
10328
333k
    {
10329
333k
        pu1_hash_table[INTRA_DC] = 1;
10330
333k
        pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
10331
333k
        u1_num_ipe_modes++;
10332
333k
    }
10333
10334
1.18M
    return u1_num_ipe_modes;
10335
1.18M
}
10336
10337
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
10338
WORD32 ihevce_determine_tu_tree_distribution(
10339
    cu_inter_cand_t *ps_cu_data,
10340
    me_func_selector_t *ps_func_selector,
10341
    WORD16 *pi2_scratch_mem,
10342
    UWORD8 *pu1_inp,
10343
    WORD32 i4_inp_stride,
10344
    WORD32 i4_lambda,
10345
    UWORD8 u1_lambda_q_shift,
10346
    UWORD8 u1_cu_size,
10347
    UWORD8 u1_max_tr_depth)
10348
{
10349
    err_prms_t s_err_prms;
10350
10351
    PF_SAD_FXN_TU_REC pf_err_compute[4];
10352
10353
    WORD32 i4_satd;
10354
10355
    s_err_prms.pi4_sad_grid = &i4_satd;
10356
    s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
10357
    s_err_prms.pu1_inp = pu1_inp;
10358
    s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
10359
    s_err_prms.i4_inp_stride = i4_inp_stride;
10360
    s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
10361
    s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
10362
10363
    if(u1_cu_size == 64)
10364
    {
10365
        s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
10366
    }
10367
    else
10368
    {
10369
        s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
10370
    }
10371
10372
    pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
10373
    pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
10374
    pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
10375
    pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
10376
10377
    i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
10378
        &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
10379
10380
    if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
10381
    {
10382
        ps_cu_data->ai4_tu_split_flag[0] = 1;
10383
    }
10384
10385
    return i4_satd;
10386
}
10387
#endif
10388
10389
void ihevce_populate_nbr_4x4_with_pu_data(
10390
    nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
10391
664k
{
10392
664k
    WORD32 i, j;
10393
10394
664k
    nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
10395
10396
664k
    WORD32 ht = (ps_pu->b4_ht + 1);
10397
664k
    WORD32 wd = (ps_pu->b4_wd + 1);
10398
10399
664k
    ps_nbr_4x4->b1_intra_flag = 0;
10400
664k
    ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
10401
664k
    ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
10402
664k
    ps_nbr_4x4->mv = ps_pu->mv;
10403
10404
2.03M
    for(i = 0; i < ht; i++)
10405
1.36M
    {
10406
8.01M
        for(j = 0; j < wd; j++)
10407
6.65M
        {
10408
6.65M
            ps_tmp_4x4[j] = *ps_nbr_4x4;
10409
6.65M
        }
10410
10411
1.36M
        ps_tmp_4x4 += i4_nbr_buf_stride;
10412
1.36M
    }
10413
664k
}
10414
10415
void ihevce_call_luma_inter_pred_rdopt_pass1(
10416
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
10417
0
{
10418
0
    pu_t *ps_pu;
10419
0
    UWORD8 *pu1_pred;
10420
0
    WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
10421
0
    WORD32 inter_pu_wd, inter_pu_ht;
10422
10423
0
    pu1_pred = ps_inter_cand->pu1_pred_data_scr;
10424
0
    pred_stride = ps_inter_cand->i4_pred_data_stride;
10425
0
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
10426
10427
0
    for(ctr = 0; ctr < num_cu_part; ctr++)
10428
0
    {
10429
0
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
10430
10431
        /* IF AMP then each partitions can have diff wd ht */
10432
0
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
10433
0
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
10434
10435
0
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
10436
        //if(0 == skip_or_merge_flag)
10437
0
        {
10438
0
            ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
10439
0
        }
10440
0
        if((2 == num_cu_part) && (0 == ctr))
10441
0
        {
10442
            /* 2Nx__ partion case */
10443
0
            if(inter_pu_wd == cu_size)
10444
0
            {
10445
0
                pu1_pred += (inter_pu_ht * pred_stride);
10446
0
            }
10447
10448
            /* __x2N partion case */
10449
0
            if(inter_pu_ht == cu_size)
10450
0
            {
10451
0
                pu1_pred += inter_pu_wd;
10452
0
            }
10453
0
        }
10454
0
    }
10455
0
}
10456
10457
LWORD64 ihevce_it_recon_ssd(
10458
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10459
    UWORD8 *pu1_src,
10460
    WORD32 i4_src_strd,
10461
    UWORD8 *pu1_pred,
10462
    WORD32 i4_pred_strd,
10463
    WORD16 *pi2_deq_data,
10464
    WORD32 i4_deq_data_strd,
10465
    UWORD8 *pu1_recon,
10466
    WORD32 i4_recon_stride,
10467
    UWORD8 *pu1_ecd_data,
10468
    UWORD8 u1_trans_size,
10469
    UWORD8 u1_pred_mode,
10470
    WORD32 i4_cbf,
10471
    WORD32 i4_zero_col,
10472
    WORD32 i4_zero_row,
10473
    CHROMA_PLANE_ID_T e_chroma_plane)
10474
31.7M
{
10475
31.7M
    if(NULL_PLANE == e_chroma_plane)
10476
13.9M
    {
10477
13.9M
        ihevce_it_recon_fxn(
10478
13.9M
            ps_ctxt,
10479
13.9M
            pi2_deq_data,
10480
13.9M
            i4_deq_data_strd,
10481
13.9M
            pu1_pred,
10482
13.9M
            i4_pred_strd,
10483
13.9M
            pu1_recon,
10484
13.9M
            i4_recon_stride,
10485
13.9M
            pu1_ecd_data,
10486
13.9M
            u1_trans_size,
10487
13.9M
            u1_pred_mode,
10488
13.9M
            i4_cbf,
10489
13.9M
            i4_zero_col,
10490
13.9M
            i4_zero_row);
10491
10492
13.9M
        return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
10493
13.9M
            pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
10494
13.9M
            e_chroma_plane);
10495
13.9M
    }
10496
17.7M
    else
10497
17.7M
    {
10498
17.7M
        ihevce_chroma_it_recon_fxn(
10499
17.7M
            ps_ctxt,
10500
17.7M
            pi2_deq_data,
10501
17.7M
            i4_deq_data_strd,
10502
17.7M
            pu1_pred,
10503
17.7M
            i4_pred_strd,
10504
17.7M
            pu1_recon,
10505
17.7M
            i4_recon_stride,
10506
17.7M
            pu1_ecd_data,
10507
17.7M
            u1_trans_size,
10508
17.7M
            i4_cbf,
10509
17.7M
            i4_zero_col,
10510
17.7M
            i4_zero_row,
10511
17.7M
            e_chroma_plane);
10512
10513
17.7M
        return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10514
17.7M
            pu1_recon,
10515
17.7M
            pu1_src,
10516
17.7M
            i4_recon_stride,
10517
17.7M
            i4_src_strd,
10518
17.7M
            u1_trans_size,
10519
17.7M
            u1_trans_size,
10520
17.7M
            e_chroma_plane);
10521
17.7M
    }
10522
31.7M
}
10523
10524
/*!
10525
******************************************************************************
10526
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
10527
*
10528
* \brief
10529
*    Transform unit level (Chroma) enc_loop function
10530
*
10531
* \param[in] ps_ctxt    enc_loop module ctxt pointer
10532
* \param[in] pu1_pred       pointer to predicted data buffer
10533
* \param[in] pred_strd      predicted buffer stride
10534
* \param[in] pu1_src    pointer to source data buffer
10535
* \param[in] src_strd   source buffer stride
10536
* \param[in] pi2_deq_data   pointer to store iq data
10537
* \param[in] deq_data_strd  iq data buffer stride
10538
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
10539
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
10540
*                           block
10541
* \param[out] csbf_strd     csbf buffer stride
10542
* \param[in] trans_size     transform size (4, 8, 16)
10543
* \param[in] intra_flag     0:Inter/Skip 1:Intra
10544
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
10545
*                           coeff buffer
10546
the current TU in RDopt Mode
10547
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
10548
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
10549
*
10550
* \return
10551
*    CBF of the current block
10552
*
10553
* \author
10554
*  Ittiam
10555
*
10556
*****************************************************************************
10557
*/
10558
WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
10559
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10560
    UWORD8 *pu1_pred,
10561
    WORD32 pred_strd,
10562
    UWORD8 *pu1_src,
10563
    WORD32 src_strd,
10564
    WORD16 *pi2_deq_data,
10565
    WORD32 deq_data_strd,
10566
    UWORD8 *pu1_recon,
10567
    WORD32 i4_recon_stride,
10568
    UWORD8 *pu1_ecd_data,
10569
    UWORD8 *pu1_csbf_buf,
10570
    WORD32 csbf_strd,
10571
    WORD32 trans_size,
10572
    WORD32 i4_scan_idx,
10573
    WORD32 intra_flag,
10574
    WORD32 *pi4_coeff_off,
10575
    WORD32 *pi4_tu_bits,
10576
    WORD32 *pi4_zero_col,
10577
    WORD32 *pi4_zero_row,
10578
    UWORD8 *pu1_is_recon_available,
10579
    WORD32 i4_perform_sbh,
10580
    WORD32 i4_perform_rdoq,
10581
    LWORD64 *pi8_cost,
10582
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10583
    WORD32 i4_alpha_stim_multiplier,
10584
    UWORD8 u1_is_cu_noisy,
10585
#endif
10586
    UWORD8 u1_is_skip,
10587
    SSD_TYPE_T e_ssd_type,
10588
    CHROMA_PLANE_ID_T e_chroma_plane)
10589
31.9M
{
10590
31.9M
    WORD32 trans_idx, cbf, u4_blk_sad;
10591
31.9M
    WORD16 *pi2_quant_coeffs;
10592
31.9M
    WORD16 *pi2_trans_values;
10593
31.9M
    WORD32 quant_scale_mat_offset;
10594
31.9M
    WORD32 *pi4_trans_scratch;
10595
31.9M
    WORD32 *pi4_subBlock2csbfId_map = NULL;
10596
10597
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10598
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
10599
#endif
10600
10601
31.9M
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
10602
10603
31.9M
    WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
10604
31.9M
                             (!intra_flag && ENABLE_INTER_ZCU_COST);
10605
31.9M
    WORD32 i4_perform_coeff_level_rdoq =
10606
31.9M
        (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
10607
31.9M
        (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
10608
10609
31.9M
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
10610
31.9M
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
10611
10612
31.9M
    *pi4_coeff_off = 0;
10613
31.9M
    *pi4_tu_bits = 0;
10614
31.9M
    pu1_is_recon_available[0] = 0;
10615
10616
31.9M
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
10617
31.9M
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
10618
31.9M
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
10619
10620
31.9M
    if(2 == trans_size)
10621
0
    {
10622
0
        trans_size = 4;
10623
0
    }
10624
10625
    /* translate the transform size to index */
10626
31.9M
    trans_idx = trans_size >> 2;
10627
10628
31.9M
    if(16 == trans_size)
10629
4.72M
    {
10630
4.72M
        trans_idx = 3;
10631
4.72M
    }
10632
10633
31.9M
    if(u1_is_skip)
10634
0
    {
10635
0
        pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10636
0
            pu1_pred,
10637
0
            pu1_src,
10638
0
            pred_strd,
10639
0
            src_strd,
10640
0
            trans_size,
10641
0
            trans_size,
10642
0
            e_chroma_plane);
10643
10644
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10645
0
        {
10646
            /* buffer copy fromp pred to recon */
10647
0
            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
10648
0
                pu1_pred,
10649
0
                pred_strd,
10650
0
                pu1_recon,
10651
0
                i4_recon_stride,
10652
0
                trans_size,
10653
0
                trans_size,
10654
0
                e_chroma_plane);
10655
10656
0
            pu1_is_recon_available[0] = 1;
10657
0
        }
10658
10659
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10660
0
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
10661
0
        {
10662
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10663
0
                pu1_src,
10664
0
                src_strd,
10665
0
                pu1_pred,
10666
0
                pred_strd,
10667
0
                pi8_cost[0],
10668
0
                i4_alpha_stim_multiplier,
10669
0
                trans_size,
10670
0
                0,
10671
0
                ps_ctxt->u1_enable_psyRDOPT,
10672
0
                e_chroma_plane);
10673
0
        }
10674
0
#endif
10675
10676
0
#if ENABLE_INTER_ZCU_COST
10677
#if !WEIGH_CHROMA_COST
10678
        /* cbf = 0, accumulate cu not coded cost */
10679
        ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
10680
#else
10681
0
        ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
10682
0
                                          (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
10683
0
                                         CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
10684
0
#endif
10685
0
#endif
10686
10687
0
        return 0;
10688
0
    }
10689
10690
31.9M
    if(intra_flag == 1)
10691
23.8M
    {
10692
23.8M
        quant_scale_mat_offset = 0;
10693
10694
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10695
        ai4_quant_rounding_factors[0][0] =
10696
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
10697
10698
        for(i = 0; i < trans_size * trans_size; i++)
10699
        {
10700
            ai4_quant_rounding_factors[1][i] =
10701
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
10702
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10703
            ai4_quant_rounding_factors[2][i] =
10704
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
10705
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10706
        }
10707
#endif
10708
23.8M
    }
10709
8.06M
    else
10710
8.06M
    {
10711
8.06M
        quant_scale_mat_offset = NUM_TRANS_TYPES;
10712
8.06M
    }
10713
10714
31.9M
    switch(trans_size)
10715
31.9M
    {
10716
15.7M
    case 4:
10717
15.7M
    {
10718
15.7M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
10719
10720
15.7M
        break;
10721
0
    }
10722
11.4M
    case 8:
10723
11.4M
    {
10724
11.4M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
10725
10726
11.4M
        break;
10727
0
    }
10728
4.72M
    case 16:
10729
4.72M
    {
10730
4.72M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
10731
10732
4.72M
        break;
10733
0
    }
10734
0
    case 32:
10735
0
    {
10736
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
10737
10738
0
        break;
10739
0
    }
10740
31.9M
    }
10741
10742
    /* ---------- call residue and transform block ------- */
10743
31.9M
    u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
10744
31.9M
        pu1_src,
10745
31.9M
        pu1_pred,
10746
31.9M
        pi4_trans_scratch,
10747
31.9M
        pi2_trans_values,
10748
31.9M
        src_strd,
10749
31.9M
        pred_strd,
10750
31.9M
        trans_size,
10751
31.9M
        e_chroma_plane);
10752
31.9M
    (void)u4_blk_sad;
10753
    /* -------- calculate SSD calculation in Transform Domain ------ */
10754
10755
31.9M
    cbf = ps_ctxt->apf_quant_iquant_ssd
10756
31.9M
              [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
10757
10758
31.9M
          (pi2_trans_values,
10759
31.9M
           ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
10760
31.9M
           pi2_quant_coeffs,
10761
31.9M
           pi2_deq_data,
10762
31.9M
           trans_size,
10763
31.9M
           ps_ctxt->i4_chrm_cu_qp_div6,
10764
31.9M
           ps_ctxt->i4_chrm_cu_qp_mod6,
10765
31.9M
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10766
31.9M
           ps_ctxt->i4_quant_rnd_factor[intra_flag],
10767
31.9M
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10768
31.9M
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10769
#else
10770
           intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
10771
           intra_flag ? ai4_quant_rounding_factors[1]
10772
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10773
           intra_flag ? ai4_quant_rounding_factors[2]
10774
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10775
#endif
10776
31.9M
           trans_size,
10777
31.9M
           trans_size,
10778
31.9M
           deq_data_strd,
10779
31.9M
           pu1_csbf_buf,
10780
31.9M
           csbf_strd,
10781
31.9M
           pi4_zero_col,
10782
31.9M
           pi4_zero_row,
10783
31.9M
           ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
10784
31.9M
           pi8_cost);
10785
10786
31.9M
    if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
10787
17.7M
    {
10788
17.7M
        pi8_cost[0] = UINT_MAX;
10789
17.7M
    }
10790
10791
31.9M
    if(0 != cbf)
10792
4.64M
    {
10793
4.64M
        if(i4_perform_sbh || i4_perform_rdoq)
10794
3.27M
        {
10795
3.27M
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
10796
3.27M
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
10797
10798
3.27M
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
10799
3.27M
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
10800
3.27M
            ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
10801
3.27M
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
10802
3.27M
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
10803
10804
3.27M
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
10805
3.27M
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
10806
3.27M
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
10807
3.27M
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
10808
3.27M
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
10809
3.27M
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
10810
3.27M
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
10811
10812
3.27M
            if((!i4_perform_rdoq))
10813
1.40M
            {
10814
1.40M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10815
10816
1.40M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10817
1.40M
            }
10818
3.27M
        }
10819
10820
        /* ------- call coeffs scan function ------- */
10821
4.64M
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10822
4.64M
            pi2_quant_coeffs,
10823
4.64M
            pi4_subBlock2csbfId_map,
10824
4.64M
            i4_scan_idx,
10825
4.64M
            trans_size,
10826
4.64M
            pu1_ecd_data,
10827
4.64M
            pu1_csbf_buf,
10828
4.64M
            csbf_strd);
10829
4.64M
    }
10830
10831
    /*  Normalize Cost. Note : trans_idx, not (trans_idx-1) */
10832
31.9M
    pi8_cost[0] >>= ga_trans_shift[trans_idx];
10833
10834
31.9M
#if RDOPT_ZERO_CBF_ENABLE
10835
31.9M
    if((0 != cbf))
10836
4.64M
    {
10837
4.64M
        WORD32 tu_bits;
10838
4.64M
        LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
10839
10840
4.64M
        zero_cbf_cost_u = 0;
10841
10842
        /*Populating the feilds of rdoq_ctxt structure*/
10843
4.64M
        if(i4_perform_rdoq)
10844
1.87M
        {
10845
            //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
10846
            /* transform size to log2transform size */
10847
1.87M
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
10848
1.87M
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
10849
10850
1.87M
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
10851
1.87M
            ps_rdoq_sbh_ctxt->i4_is_luma = 0;
10852
1.87M
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
10853
1.87M
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
10854
1.87M
                (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
10855
1.87M
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
10856
1.87M
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
10857
1.87M
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
10858
1.87M
        }
10859
2.77M
        else if(i4_perform_zcbf)
10860
931k
        {
10861
            /* cost of zero cbf encoding */
10862
931k
            zero_cbf_cost_u =
10863
10864
931k
                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10865
931k
                    pu1_pred,
10866
931k
                    pu1_src,
10867
931k
                    pred_strd,
10868
931k
                    src_strd,
10869
931k
                    trans_size,
10870
931k
                    trans_size,
10871
931k
                    e_chroma_plane);
10872
931k
        }
10873
10874
        /************************************************************************/
10875
        /* call the entropy rdo encode to get the bit estimate for current tu   */
10876
        /* note that tu includes only residual coding bits and does not include */
10877
        /* tu split, cbf and qp delta encoding bits for a TU                    */
10878
        /************************************************************************/
10879
4.64M
        if(i4_perform_rdoq)
10880
1.87M
        {
10881
1.87M
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
10882
1.87M
                &ps_ctxt->s_rdopt_entropy_ctxt,
10883
1.87M
                pu1_ecd_data,
10884
1.87M
                trans_size,
10885
1.87M
                0,
10886
1.87M
                ps_rdoq_sbh_ctxt,
10887
1.87M
                pi8_cost,
10888
1.87M
                &zero_cbf_cost_u,
10889
1.87M
                0);
10890
            //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
10891
10892
1.87M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
10893
82.9k
            {
10894
82.9k
                cbf = 0;
10895
10896
                /* num bytes is set to 0 */
10897
82.9k
                *pi4_coeff_off = 0;
10898
82.9k
            }
10899
10900
1.87M
            (*pi4_tu_bits) += tu_bits;
10901
10902
1.87M
            if((i4_perform_sbh) && (0 != cbf))
10903
1.79M
            {
10904
1.79M
                ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
10905
10906
1.79M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10907
10908
1.79M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10909
1.79M
            }
10910
10911
            /*Add round value before normalizing*/
10912
1.87M
            pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
10913
1.87M
            pi8_cost[0] >>= ga_trans_shift[trans_idx];
10914
10915
1.87M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
10916
1.79M
            {
10917
1.79M
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10918
1.79M
                    pi2_quant_coeffs,
10919
1.79M
                    pi4_subBlock2csbfId_map,
10920
1.79M
                    i4_scan_idx,
10921
1.79M
                    trans_size,
10922
1.79M
                    pu1_ecd_data,
10923
1.79M
                    ps_rdoq_sbh_ctxt->pu1_csbf_buf,
10924
1.79M
                    csbf_strd);
10925
1.79M
            }
10926
1.87M
        }
10927
2.77M
        else
10928
2.77M
        {
10929
            /************************************************************************/
10930
            /* call the entropy rdo encode to get the bit estimate for current tu   */
10931
            /* note that tu includes only residual coding bits and does not include */
10932
            /* tu split, cbf and qp delta encoding bits for a TU                    */
10933
            /************************************************************************/
10934
2.77M
            tu_bits = ihevce_entropy_rdo_encode_tu(
10935
2.77M
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
10936
10937
2.77M
            (*pi4_tu_bits) += tu_bits;
10938
2.77M
        }
10939
10940
4.64M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10941
1.62M
        {
10942
1.62M
            pi8_cost[0] = ihevce_it_recon_ssd(
10943
1.62M
                ps_ctxt,
10944
1.62M
                pu1_src,
10945
1.62M
                src_strd,
10946
1.62M
                pu1_pred,
10947
1.62M
                pred_strd,
10948
1.62M
                pi2_deq_data,
10949
1.62M
                deq_data_strd,
10950
1.62M
                pu1_recon,
10951
1.62M
                i4_recon_stride,
10952
1.62M
                pu1_ecd_data,
10953
1.62M
                trans_size,
10954
1.62M
                PRED_MODE_INTRA,
10955
1.62M
                cbf,
10956
1.62M
                pi4_zero_col[0],
10957
1.62M
                pi4_zero_row[0],
10958
1.62M
                e_chroma_plane);
10959
10960
1.62M
            pu1_is_recon_available[0] = 1;
10961
1.62M
        }
10962
10963
4.64M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10964
4.64M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10965
0
        {
10966
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10967
0
                pu1_src,
10968
0
                src_strd,
10969
0
                pu1_recon,
10970
0
                i4_recon_stride,
10971
0
                pi8_cost[0],
10972
0
                i4_alpha_stim_multiplier,
10973
0
                trans_size,
10974
0
                0,
10975
0
                ps_ctxt->u1_enable_psyRDOPT,
10976
0
                e_chroma_plane);
10977
0
        }
10978
4.64M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10979
0
        {
10980
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10981
0
                pu1_src,
10982
0
                src_strd,
10983
0
                pu1_pred,
10984
0
                pred_strd,
10985
0
                pi8_cost[0],
10986
0
                i4_alpha_stim_multiplier,
10987
0
                trans_size,
10988
0
                0,
10989
0
                ps_ctxt->u1_enable_psyRDOPT,
10990
0
                e_chroma_plane);
10991
0
        }
10992
4.64M
#endif
10993
10994
4.64M
        curr_cb_cod_cost = pi8_cost[0];
10995
10996
        /* add the SSD cost to bits estimate given by ECD */
10997
4.64M
        curr_cb_cod_cost +=
10998
4.64M
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
10999
11000
4.64M
        if(i4_perform_zcbf)
11001
1.60M
        {
11002
1.60M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11003
1.60M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
11004
0
            {
11005
0
                zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
11006
0
                    pu1_src,
11007
0
                    src_strd,
11008
0
                    pu1_pred,
11009
0
                    pred_strd,
11010
0
                    zero_cbf_cost_u,
11011
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11012
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11013
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11014
0
                                                 100.0,
11015
0
                    trans_size,
11016
0
                    0,
11017
0
                    ps_ctxt->u1_enable_psyRDOPT,
11018
0
                    e_chroma_plane);
11019
0
            }
11020
1.60M
#endif
11021
            /* force the tu as zero cbf if zero_cbf_cost is lower */
11022
1.60M
            if(zero_cbf_cost_u < curr_cb_cod_cost)
11023
29.3k
            {
11024
29.3k
                *pi4_coeff_off = 0;
11025
29.3k
                cbf = 0;
11026
29.3k
                (*pi4_tu_bits) = 0;
11027
29.3k
                pi8_cost[0] = zero_cbf_cost_u;
11028
11029
29.3k
                pu1_is_recon_available[0] = 0;
11030
11031
29.3k
                if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11032
12.1k
                {
11033
12.1k
                    ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
11034
12.1k
                        pu1_pred,
11035
12.1k
                        pred_strd,
11036
12.1k
                        pu1_recon,
11037
12.1k
                        i4_recon_stride,
11038
12.1k
                        trans_size,
11039
12.1k
                        trans_size,
11040
12.1k
                        e_chroma_plane);
11041
11042
12.1k
                    pu1_is_recon_available[0] = 1;
11043
12.1k
                }
11044
29.3k
            }
11045
11046
1.60M
#if ENABLE_INTER_ZCU_COST
11047
1.60M
            if(!intra_flag)
11048
1.60M
            {
11049
#if !WEIGH_CHROMA_COST
11050
                ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
11051
#else
11052
1.60M
                ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11053
1.60M
                    (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
11054
1.60M
                     (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11055
1.60M
                    CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11056
1.60M
#endif
11057
1.60M
            }
11058
1.60M
#endif
11059
1.60M
        }
11060
4.64M
    }
11061
27.3M
    else
11062
27.3M
    {
11063
27.3M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11064
16.1M
        {
11065
16.1M
            pi8_cost[0] = ihevce_it_recon_ssd(
11066
16.1M
                ps_ctxt,
11067
16.1M
                pu1_src,
11068
16.1M
                src_strd,
11069
16.1M
                pu1_pred,
11070
16.1M
                pred_strd,
11071
16.1M
                pi2_deq_data,
11072
16.1M
                deq_data_strd,
11073
16.1M
                pu1_recon,
11074
16.1M
                i4_recon_stride,
11075
16.1M
                pu1_ecd_data,
11076
16.1M
                trans_size,
11077
16.1M
                PRED_MODE_INTRA,
11078
16.1M
                cbf,
11079
16.1M
                pi4_zero_col[0],
11080
16.1M
                pi4_zero_row[0],
11081
16.1M
                e_chroma_plane);
11082
11083
16.1M
            pu1_is_recon_available[0] = 1;
11084
16.1M
        }
11085
11086
27.3M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11087
27.3M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11088
0
        {
11089
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11090
0
                pu1_src,
11091
0
                src_strd,
11092
0
                pu1_recon,
11093
0
                i4_recon_stride,
11094
0
                pi8_cost[0],
11095
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11096
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11097
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11098
0
                                             100.0,
11099
0
                trans_size,
11100
0
                0,
11101
0
                ps_ctxt->u1_enable_psyRDOPT,
11102
0
                e_chroma_plane);
11103
0
        }
11104
27.3M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11105
0
        {
11106
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11107
0
                pu1_src,
11108
0
                src_strd,
11109
0
                pu1_pred,
11110
0
                pred_strd,
11111
0
                pi8_cost[0],
11112
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11113
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11114
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11115
0
                                             100.0,
11116
0
                trans_size,
11117
0
                0,
11118
0
                ps_ctxt->u1_enable_psyRDOPT,
11119
0
                e_chroma_plane);
11120
0
        }
11121
27.3M
#endif
11122
11123
27.3M
#if ENABLE_INTER_ZCU_COST
11124
27.3M
        if(!intra_flag)
11125
6.46M
        {
11126
#if !WEIGH_CHROMA_COST
11127
            /* cbf = 0, accumulate cu not coded cost */
11128
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
11129
#else
11130
            /* cbf = 0, accumulate cu not coded cost */
11131
11132
6.46M
            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11133
6.46M
                (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
11134
6.46M
                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11135
6.46M
                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11136
6.46M
#endif
11137
6.46M
        }
11138
27.3M
#endif
11139
27.3M
    }
11140
31.9M
#endif /* RDOPT_ZERO_CBF_ENABLE */
11141
11142
31.9M
    return (cbf);
11143
31.9M
}