Coverage Report

Created: 2026-04-01 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/ihevce_enc_loop_utils.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*!
22
******************************************************************************
23
* \file ihevce_enc_loop_utils.c
24
*
25
* \brief
26
*    This file contains utility functions of Encode loop
27
*
28
* \date
29
*    18/09/2012
30
*
31
* \author
32
*    Ittiam
33
*
34
*
35
* List of Functions
36
*
37
*
38
******************************************************************************
39
*/
40
41
/*****************************************************************************/
42
/* File Includes                                                             */
43
/*****************************************************************************/
44
/* System include files */
45
#include <stdio.h>
46
#include <string.h>
47
#include <stdlib.h>
48
#include <assert.h>
49
#include <stdarg.h>
50
#include <math.h>
51
#include <limits.h>
52
53
/* User include files */
54
#include "ihevc_typedefs.h"
55
#include "itt_video_api.h"
56
#include "ihevce_api.h"
57
58
#include "rc_cntrl_param.h"
59
#include "rc_frame_info_collector.h"
60
#include "rc_look_ahead_params.h"
61
62
#include "ihevc_defs.h"
63
#include "ihevc_macros.h"
64
#include "ihevc_debug.h"
65
#include "ihevc_structs.h"
66
#include "ihevc_platform_macros.h"
67
#include "ihevc_deblk.h"
68
#include "ihevc_itrans_recon.h"
69
#include "ihevc_chroma_itrans_recon.h"
70
#include "ihevc_chroma_intra_pred.h"
71
#include "ihevc_intra_pred.h"
72
#include "ihevc_inter_pred.h"
73
#include "ihevc_mem_fns.h"
74
#include "ihevc_padding.h"
75
#include "ihevc_weighted_pred.h"
76
#include "ihevc_sao.h"
77
#include "ihevc_resi_trans.h"
78
#include "ihevc_quant_iquant_ssd.h"
79
#include "ihevc_cabac_tables.h"
80
#include "ihevc_common_tables.h"
81
82
#include "ihevce_defs.h"
83
#include "ihevce_hle_interface.h"
84
#include "ihevce_lap_enc_structs.h"
85
#include "ihevce_multi_thrd_structs.h"
86
#include "ihevce_multi_thrd_funcs.h"
87
#include "ihevce_me_common_defs.h"
88
#include "ihevce_had_satd.h"
89
#include "ihevce_error_codes.h"
90
#include "ihevce_bitstream.h"
91
#include "ihevce_cabac.h"
92
#include "ihevce_rdoq_macros.h"
93
#include "ihevce_function_selector.h"
94
#include "ihevce_enc_structs.h"
95
#include "ihevce_entropy_structs.h"
96
#include "ihevce_cmn_utils_instr_set_router.h"
97
#include "ihevce_ipe_instr_set_router.h"
98
#include "ihevce_decomp_pre_intra_structs.h"
99
#include "ihevce_decomp_pre_intra_pass.h"
100
#include "ihevce_enc_loop_structs.h"
101
#include "ihevce_nbr_avail.h"
102
#include "ihevce_enc_loop_utils.h"
103
#include "ihevce_sub_pic_rc.h"
104
#include "ihevce_global_tables.h"
105
#include "ihevce_bs_compute_ctb.h"
106
#include "ihevce_cabac_rdo.h"
107
#include "ihevce_deblk.h"
108
#include "ihevce_frame_process.h"
109
#include "ihevce_rc_enc_structs.h"
110
#include "hme_datatype.h"
111
#include "hme_interface.h"
112
#include "hme_common_defs.h"
113
#include "hme_defs.h"
114
#include "hme_common_utils.h"
115
#include "ihevce_me_instr_set_router.h"
116
#include "ihevce_enc_subpel_gen.h"
117
#include "ihevce_inter_pred.h"
118
#include "ihevce_mv_pred.h"
119
#include "ihevce_mv_pred_merge.h"
120
#include "ihevce_enc_loop_inter_mode_sifter.h"
121
#include "ihevce_enc_cu_recursion.h"
122
#include "ihevce_enc_loop_pass.h"
123
#include "ihevce_common_utils.h"
124
#include "ihevce_dep_mngr_interface.h"
125
#include "ihevce_sao.h"
126
#include "ihevce_tile_interface.h"
127
#include "ihevce_profile.h"
128
#include "ihevce_stasino_helpers.h"
129
#include "ihevce_tu_tree_selector.h"
130
131
/*****************************************************************************/
132
/* Globals                                                                   */
133
/*****************************************************************************/
134
135
extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
136
extern const UWORD8 gu1_hevce_scan4x4[3][16];
137
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
138
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
139
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
140
141
/*****************************************************************************/
142
/* Constant Macros                                                           */
143
/*****************************************************************************/
144
#define ENABLE_ZERO_CBF 1
145
#define DISABLE_RDOQ_INTRA 0
146
147
/*****************************************************************************/
148
/* Function Definitions                                                      */
149
/*****************************************************************************/
150
void *ihevce_tu_tree_update(
151
    tu_prms_t *ps_tu_prms,
152
    WORD32 *pnum_tu_in_cu,
153
    WORD32 depth,
154
    WORD32 tu_split_flag,
155
    WORD32 tu_early_cbf,
156
    WORD32 i4_x_off,
157
    WORD32 i4_y_off)
158
197k
{
159
    //WORD32 tu_split_flag = p_tu_split_flag[0];
160
197k
    WORD32 p_tu_split_flag[4];
161
197k
    WORD32 p_tu_early_cbf[4];
162
163
197k
    WORD32 tu_size = ps_tu_prms->u1_tu_size;
164
165
197k
    if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
166
16.4k
    {
167
16.4k
        if((tu_size >> depth) == 32)
168
1.96k
        {
169
            /* Get the individual TU split flags */
170
1.96k
            p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
171
1.96k
            p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
172
1.96k
            p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
173
1.96k
            p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
174
175
            /* Get the early CBF flags */
176
1.96k
            p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
177
1.96k
            p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
178
1.96k
            p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
179
1.96k
            p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
180
1.96k
        }
181
14.4k
        else
182
14.4k
        {
183
            /* Get the individual TU split flags */
184
14.4k
            p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
185
14.4k
            p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
186
14.4k
            p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
187
14.4k
            p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
188
189
            /* Get the early CBF flags */
190
14.4k
            p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
191
14.4k
            p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
192
14.4k
            p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
193
14.4k
            p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
194
14.4k
        }
195
196
16.4k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
197
16.4k
            ps_tu_prms,
198
16.4k
            pnum_tu_in_cu,
199
16.4k
            depth + 1,
200
16.4k
            p_tu_split_flag[0],
201
16.4k
            p_tu_early_cbf[0],
202
16.4k
            i4_x_off,
203
16.4k
            i4_y_off);
204
205
16.4k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
206
16.4k
            ps_tu_prms,
207
16.4k
            pnum_tu_in_cu,
208
16.4k
            depth + 1,
209
16.4k
            p_tu_split_flag[1],
210
16.4k
            p_tu_early_cbf[1],
211
16.4k
            (i4_x_off + (tu_size >> (depth + 1))),
212
16.4k
            i4_y_off);
213
214
16.4k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
215
16.4k
            ps_tu_prms,
216
16.4k
            pnum_tu_in_cu,
217
16.4k
            depth + 1,
218
16.4k
            p_tu_split_flag[2],
219
16.4k
            p_tu_early_cbf[2],
220
16.4k
            i4_x_off,
221
16.4k
            (i4_y_off + (tu_size >> (depth + 1))));
222
223
16.4k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
224
16.4k
            ps_tu_prms,
225
16.4k
            pnum_tu_in_cu,
226
16.4k
            depth + 1,
227
16.4k
            p_tu_split_flag[3],
228
16.4k
            p_tu_early_cbf[3],
229
16.4k
            (i4_x_off + (tu_size >> (depth + 1))),
230
16.4k
            (i4_y_off + (tu_size >> (depth + 1))));
231
16.4k
    }
232
181k
    else
233
181k
    {
234
181k
        if(tu_split_flag & 0x1)
235
40.0k
        {
236
            /* This piece of code will be entered for the 8x8, if it is split
237
            Update the 4 child TU's accordingly. */
238
239
40.0k
            (*pnum_tu_in_cu) += 4;
240
241
            /* TL TU update */
242
40.0k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
243
244
40.0k
            ps_tu_prms->u1_x_off = i4_x_off;
245
246
40.0k
            ps_tu_prms->u1_y_off = i4_y_off;
247
248
            /* Early CBF is not done for 4x4 transforms */
249
40.0k
            ps_tu_prms->i4_early_cbf = 1;
250
251
40.0k
            ps_tu_prms++;
252
253
            /* TR TU update */
254
40.0k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
255
256
40.0k
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
257
258
40.0k
            ps_tu_prms->u1_y_off = i4_y_off;
259
260
            /* Early CBF is not done for 4x4 transforms */
261
40.0k
            ps_tu_prms->i4_early_cbf = 1;
262
263
40.0k
            ps_tu_prms++;
264
265
            /* BL TU update */
266
40.0k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
267
268
40.0k
            ps_tu_prms->u1_x_off = i4_x_off;
269
270
40.0k
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
271
272
            /* Early CBF is not done for 4x4 transforms */
273
40.0k
            ps_tu_prms->i4_early_cbf = 1;
274
275
40.0k
            ps_tu_prms++;
276
277
            /* BR TU update */
278
40.0k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
279
280
40.0k
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
281
282
40.0k
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
283
284
            /* Early CBF is not done for 4x4 transforms */
285
40.0k
            ps_tu_prms->i4_early_cbf = 1;
286
40.0k
        }
287
141k
        else
288
141k
        {
289
            /* Update the TU params */
290
141k
            ps_tu_prms->u1_tu_size = tu_size >> depth;
291
292
141k
            ps_tu_prms->u1_x_off = i4_x_off;
293
294
141k
            ps_tu_prms->u1_y_off = i4_y_off;
295
296
141k
            (*pnum_tu_in_cu)++;
297
298
            /* Early CBF update for current TU */
299
141k
            ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
300
141k
        }
301
181k
        if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
302
181k
        {
303
181k
            ps_tu_prms++;
304
305
181k
            ps_tu_prms->u1_tu_size = tu_size;
306
181k
        }
307
181k
    }
308
309
197k
    return ps_tu_prms;
310
197k
}
311
312
/*!
313
******************************************************************************
314
* \if Function name : ihevce_compute_quant_rel_param \endif
315
*
316
* \brief
317
*    This function updates quantization related parameters like qp_mod_6 etc in
318
*       context according to new qp
319
*
320
* \date
321
*    08/01/2013
322
*
323
* \author
324
*    Ittiam
325
*
326
* \return
327
*
328
* List of Functions
329
*
330
*
331
******************************************************************************
332
*/
333
void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
334
905k
{
335
905k
    WORD32 i4_div_factor;
336
337
905k
    ps_ctxt->i4_chrm_cu_qp =
338
905k
        (ps_ctxt->u1_chroma_array_type == 2)
339
905k
            ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
340
905k
            : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
341
905k
    ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
342
905k
    i4_div_factor = (i1_cu_qp + 3) / 6;
343
905k
    i4_div_factor = CLIP3(i4_div_factor, 3, 6);
344
905k
    ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
345
905k
    ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
346
905k
    ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
347
348
905k
#define INTER_RND_QP_BY_6
349
905k
#ifdef INTER_RND_QP_BY_6
350
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
351
905k
    {
352
905k
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
353
905k
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
354
905k
    }
355
#else
356
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
357
    ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
358
#endif
359
360
905k
    if(ISLICE == ps_ctxt->i1_slice_type)
361
452k
    {
362
        /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
363
452k
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
364
452k
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
365
452k
    }
366
453k
    else
367
453k
    {
368
453k
        if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
369
0
        {
370
            /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
371
0
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
372
0
                (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
373
0
        }
374
453k
        else
375
453k
        {
376
            /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
377
453k
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
378
453k
                ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
379
            /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
380
453k
        }
381
453k
    }
382
905k
}
383
384
/*!
385
******************************************************************************
386
* \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
387
*
388
* \brief
389
*    Function whihc calculates the Lambda params for current picture
390
*
391
* \param[in] ps_enc_ctxt : encoder ctxt pointer
392
* \param[in] ps_cur_pic_ctxt : current pic ctxt
393
* \param[in] i4_cur_frame_qp : current pic QP
394
* \param[in] first_field : is first field flag
395
* \param[in] i4_temporal_lyr_id : Current picture layer id
396
*
397
* \return
398
*    None
399
*
400
* \author
401
*  Ittiam
402
*
403
*****************************************************************************
404
*/
405
void ihevce_populate_cl_cu_lambda_prms(
406
    ihevce_enc_loop_ctxt_t *ps_ctxt,
407
    frm_lambda_ctxt_t *ps_frm_lamda,
408
    WORD32 i4_slice_type,
409
    WORD32 i4_temporal_lyr_id,
410
    WORD32 i4_lambda_type)
411
2.52k
{
412
2.52k
    WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
413
2.52k
    double lambda_modifier;
414
2.52k
    double lambda_uv_modifier;
415
2.52k
    double lambda;
416
2.52k
    double lambda_uv;
417
418
2.52k
    WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
419
420
    /*Populate lamda modifier */
421
2.52k
    ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
422
2.52k
    ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
423
2.52k
    ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
424
425
2.52k
    for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
426
131k
        i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
427
128k
        i4_curr_cu_qp++)
428
128k
    {
429
128k
        WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
430
128k
                               ? MIN(i4_curr_cu_qp, 51)
431
128k
                               : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
432
433
128k
        i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
434
435
128k
        lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
436
128k
        lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
437
438
128k
        if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
439
1.83k
        {
440
1.83k
            lambda_modifier = ps_frm_lamda->lambda_modifier *
441
1.83k
                              CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
442
1.83k
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
443
1.83k
                                 CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
444
1.83k
        }
445
126k
        else
446
126k
        {
447
126k
            lambda_modifier = ps_frm_lamda->lambda_modifier;
448
126k
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
449
126k
        }
450
128k
        if(ps_ctxt->i4_use_const_lamda_modifier)
451
0
        {
452
0
            if(ISLICE == ps_ctxt->i1_slice_type)
453
0
            {
454
0
                lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
455
0
                lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
456
0
            }
457
0
            else
458
0
            {
459
0
                lambda_modifier = CONST_LAMDA_MOD_VAL;
460
0
                lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
461
0
            }
462
0
        }
463
128k
        switch(i4_lambda_type)
464
128k
        {
465
0
        case 0:
466
0
        {
467
0
            i4_qp_bdoffset = 0;
468
469
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
470
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
471
472
0
            lambda *= lambda_modifier;
473
0
            lambda_uv *= lambda_uv_modifier;
474
475
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
476
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
477
478
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
479
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
480
481
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
482
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
483
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
484
0
            {
485
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
486
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
487
0
            }
488
0
            else
489
0
            {
490
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
491
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
492
0
            }
493
494
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
495
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
496
497
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
498
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
499
500
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
501
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
502
503
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
504
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
505
506
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
507
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
508
509
0
            break;
510
0
        }
511
0
        case 1:
512
0
        {
513
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
514
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
515
516
0
            lambda *= lambda_modifier;
517
0
            lambda_uv *= lambda_uv_modifier;
518
519
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
520
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
521
522
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
523
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
524
525
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
526
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
527
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
528
0
            {
529
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
530
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
531
0
            }
532
0
            else
533
0
            {
534
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
535
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
536
0
            }
537
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
538
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
539
540
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
541
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
542
543
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
544
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
545
546
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
547
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
548
549
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
550
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
551
552
0
            break;
553
0
        }
554
128k
        case 2:
555
128k
        {
556
128k
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
557
128k
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
558
559
128k
            lambda *= lambda_modifier;
560
128k
            lambda_uv *= lambda_uv_modifier;
561
562
128k
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
563
128k
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
564
565
128k
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
566
128k
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
567
568
128k
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
569
128k
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
570
571
128k
            if(ps_ctxt->i4_use_const_lamda_modifier)
572
0
            {
573
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
574
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
575
0
            }
576
128k
            else
577
128k
            {
578
128k
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
579
128k
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
580
128k
            }
581
128k
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
582
128k
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
583
584
            /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
585
128k
            lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
586
128k
            lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
587
588
128k
            lambda *= lambda_modifier;
589
128k
            lambda_uv *= lambda_uv_modifier;
590
591
128k
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
592
128k
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
593
594
128k
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
595
128k
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
596
597
128k
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
598
128k
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
599
128k
            if(ps_ctxt->i4_use_const_lamda_modifier)
600
0
            {
601
0
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
602
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
603
0
            }
604
128k
            else
605
128k
            {
606
128k
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
607
128k
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
608
128k
            }
609
610
128k
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
611
128k
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
612
613
128k
            break;
614
0
        }
615
0
        default:
616
0
        {
617
            /* Intended to be a barren wasteland! */
618
0
            ASSERT(0);
619
0
        }
620
128k
        }
621
128k
    }
622
2.52k
}
623
624
/*!
625
******************************************************************************
626
* \if Function name : ihevce_get_cl_cu_lambda_prms \endif
627
*
628
* \brief
629
*    Function whihc calculates the Lambda params for current picture
630
*
631
* \param[in] ps_enc_ctxt : encoder ctxt pointer
632
* \param[in] ps_cur_pic_ctxt : current pic ctxt
633
* \param[in] i4_cur_frame_qp : current pic QP
634
* \param[in] first_field : is first field flag
635
* \param[in] i4_temporal_lyr_id : Current picture layer id
636
*
637
* \return
638
*    None
639
*
640
* \author
641
*  Ittiam
642
*
643
*****************************************************************************
644
*/
645
void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
646
905k
{
647
905k
    WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
648
905k
                           ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
649
905k
                           : gai1_ihevc_chroma_qp_scale
650
905k
                                 [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
651
652
    /* closed loop ssd lambda is same as final lambda */
653
905k
    ps_ctxt->i8_cl_ssd_lambda_qf =
654
905k
        ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
655
905k
    ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
656
905k
        ps_ctxt
657
905k
            ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
658
905k
    ps_ctxt->u4_chroma_cost_weighing_factor =
659
905k
        ps_ctxt->au4_chroma_cost_weighing_factor_array
660
905k
            [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
661
    /* --- Initialized the lambda for SATD computations --- */
662
    /* --- 0.95 is the multiplication factor as per HM --- */
663
    /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
664
905k
    ps_ctxt->i4_satd_lamda =
665
905k
        ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
666
905k
    ps_ctxt->i4_sad_lamda =
667
905k
        ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
668
905k
}
669
670
/*!
671
******************************************************************************
672
* \if Function name : ihevce_update_pred_qp \endif
673
*
674
* \brief
675
*    Computes pred qp for the given CU
676
*
677
* \param[in]
678
*
679
* \return
680
*
681
*
682
* \author
683
*  Ittiam
684
*
685
*****************************************************************************
686
*/
687
void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
688
369k
{
689
369k
    WORD32 i4_pred_qp = 0x7FFFFFFF;
690
369k
    WORD32 i4_top, i4_left;
691
369k
    if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
692
19.6k
    {
693
19.6k
        i4_pred_qp = ps_ctxt->i4_prev_QP;
694
19.6k
    }
695
349k
    else
696
349k
    {
697
349k
        if(cu_pos_y == 0) /*CTB boundary*/
698
58.9k
        {
699
58.9k
            i4_top = ps_ctxt->i4_prev_QP;
700
58.9k
        }
701
290k
        else /*within CTB*/
702
290k
        {
703
290k
            i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
704
290k
        }
705
349k
        if(cu_pos_x == 0) /*CTB boundary*/
706
58.6k
        {
707
58.6k
            i4_left = ps_ctxt->i4_prev_QP;
708
58.6k
        }
709
290k
        else /*within CTB*/
710
290k
        {
711
290k
            i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
712
290k
        }
713
349k
        i4_pred_qp = (i4_left + i4_top + 1) >> 1;
714
349k
    }
715
369k
    ps_ctxt->i4_pred_qp = i4_pred_qp;
716
369k
    return;
717
369k
}
718
/*!
719
******************************************************************************
720
* \if Function name : ihevce_compute_cu_level_QP \endif
721
*
722
* \brief
723
*    Computes cu level QP with Traqo,Spatial Mod and In-frame RC
724
*
725
* \param[in]
726
*
727
* \return
728
*
729
*
730
* \author
731
*  Ittiam
732
*
733
*****************************************************************************
734
*/
735
void ihevce_compute_cu_level_QP(
736
    ihevce_enc_loop_ctxt_t *ps_ctxt,
737
    WORD32 i4_activity_for_qp,
738
    WORD32 i4_activity_for_lamda,
739
    WORD32 i4_reduce_qp)
740
774k
{
741
    /*modify quant related param in ctxt based on current cu qp*/
742
774k
    WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
743
774k
    WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
744
745
774k
    WORD32 i4_max_qp_allowed;
746
774k
    WORD32 i4_min_qp_allowed;
747
774k
    WORD32 i4_pred_qp;
748
749
774k
    i4_pred_qp = ps_ctxt->i4_pred_qp;
750
751
774k
    if(ps_ctxt->i4_sub_pic_level_rc)
752
0
    {
753
0
        i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
754
0
        i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
755
0
    }
756
774k
    else
757
774k
    {
758
774k
        i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
759
774k
        i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
760
774k
    }
761
774k
    if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
762
0
        return;
763
764
#if LAMDA_BASED_ON_QUANT
765
    i4_activity_for_lamda = i4_activity_for_qp;
766
#endif
767
768
774k
    if(i4_activity_for_qp != -1)
769
774k
    {
770
774k
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
771
774k
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
772
774k
        if(ps_ctxt->i4_qp_mod)
773
774k
        {
774
            /*Recompute the Qp as per enc thread's frame level Qp*/
775
774k
            ASSERT(i4_activity_for_qp > 0);
776
774k
            cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
777
774k
                    QP_LEVEL_MOD_ACT_FACTOR;
778
774k
        }
779
780
        // To avoid access of uninitialised Qscale to qp conversion table
781
774k
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
782
2.29k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
783
772k
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
784
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
785
786
774k
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
787
788
774k
        if((1 == i4_reduce_qp) && (cu_qp > 1))
789
0
            cu_qp--;
790
791
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
792
774k
        if(cu_qp > i4_max_qp_allowed)
793
0
            cu_qp = i4_max_qp_allowed;
794
774k
        else if(cu_qp < i4_min_qp_allowed)
795
0
            cu_qp = i4_min_qp_allowed;
796
797
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
798
774k
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
799
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
800
774k
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
801
3.10k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
802
803
        /*cu qp must be populated in cu_analyse_t struct*/
804
774k
        ps_ctxt->i4_cu_qp = cu_qp;
805
        /*recompute quant related param at every cu level*/
806
774k
        ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
807
774k
    }
808
809
    /*Decoupling qp and lamda calculation */
810
774k
    if(i4_activity_for_lamda != -1)
811
774k
    {
812
774k
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
813
774k
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
814
815
774k
        if(ps_ctxt->i4_qp_mod)
816
774k
        {
817
774k
#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
818
            /*Recompute the Qp as per enc thread's frame level Qp*/
819
774k
            ASSERT(i4_activity_for_lamda > 0);
820
774k
            cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
821
774k
                    QP_LEVEL_MOD_ACT_FACTOR;
822
774k
#endif
823
774k
        }
824
774k
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
825
2.29k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
826
772k
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
827
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
828
829
774k
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
830
831
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
832
774k
        if(cu_qp > i4_max_qp_allowed)
833
0
            cu_qp = i4_max_qp_allowed;
834
774k
        else if(cu_qp < i4_min_qp_allowed)
835
0
            cu_qp = i4_min_qp_allowed;
836
837
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
838
774k
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
839
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
840
774k
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
841
4.08k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
842
        /* get frame level lambda params */
843
774k
        ihevce_get_cl_cu_lambda_prms(
844
774k
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
845
774k
    }
846
774k
}
847
848
void ihevce_update_cu_level_qp_lamda(
849
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra)
850
774k
{
851
774k
    WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
852
853
774k
    if(ps_cu_analyse->u1_cu_size == 64)
854
2.51k
    {
855
2.51k
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
856
2.51k
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
857
2.51k
        i4_act_counter_lamda = 3;
858
2.51k
    }
859
772k
    else if(ps_cu_analyse->u1_cu_size == 32)
860
38.0k
    {
861
38.0k
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
862
38.0k
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
863
38.0k
        i4_act_counter_lamda = 0;
864
38.0k
    }
865
734k
    else if(ps_cu_analyse->u1_cu_size == 16)
866
178k
    {
867
178k
        ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
868
178k
        i4_act_counter = (trans_size == 8) || (trans_size == 4);
869
178k
        i4_act_counter_lamda = 0;
870
178k
    }
871
555k
    else if(ps_cu_analyse->u1_cu_size == 8)
872
555k
    {
873
555k
        ASSERT((trans_size == 8) || (trans_size == 4));
874
555k
        i4_act_counter = 1;
875
555k
        i4_act_counter_lamda = 0;
876
555k
    }
877
0
    else
878
0
    {
879
0
        ASSERT(0);
880
0
    }
881
882
774k
    if(ps_ctxt->i4_use_ctb_level_lamda)
883
0
    {
884
0
        ihevce_compute_cu_level_QP(
885
0
            ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0);
886
0
    }
887
774k
    else
888
774k
    {
889
774k
        ihevce_compute_cu_level_QP(
890
774k
            ps_ctxt,
891
774k
            ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra],
892
774k
            ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra],
893
774k
            0);
894
774k
    }
895
896
774k
    ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
897
774k
}
898
899
/**
900
*******************************************************************************
901
* \if Function name : ihevce_scan_coeffs \endif
902
*
903
* @brief * Computes the coeff buffer for a coded TU for entropy coding
904
*
905
* @par   Description
906
* Computes the coeff buffer for a coded TU for entropy coding
907
*
908
* \param[in] pi2_quan_coeffs Quantized coefficient context
909
*
910
* \param[in] scan_idx Scan index specifying the scan order
911
*
912
* \param[in] trans_size Transform unit size
913
*
914
* \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
915
*
916
* \param[in] pu1_csbf_buf csb flag buffer
917
*
918
* @returns num_bytes
919
* Number of bytes written to pu1_out_data
920
*
921
* @remarks
922
*
923
* \author
924
*  Ittiam
925
*
926
*******************************************************************************
927
*/
928
929
WORD32 ihevce_scan_coeffs(
930
    WORD16 *pi2_quant_coeffs,
931
    WORD32 *pi4_subBlock2csbfId_map,
932
    WORD32 scan_idx,
933
    WORD32 trans_size,
934
    UWORD8 *pu1_out_data,
935
    UWORD8 *pu1_csbf_buf,
936
    WORD32 i4_csbf_stride)
937
9.38M
{
938
9.38M
    WORD32 i, trans_unit_idx, num_gt1_flag;
939
9.38M
    UWORD16 u2_csbf0flags;
940
9.38M
    WORD32 num_bytes = 0;
941
9.38M
    UWORD8 *pu1_trans_table;
942
9.38M
    UWORD8 *pu1_csb_table;
943
9.38M
    WORD32 shift_value, mask_value;
944
9.38M
    UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
945
9.38M
    UWORD16 u2_sign_flags;
946
9.38M
    UWORD16 u2_abs_coeff_remaining[16];
947
9.38M
    WORD32 blk_row, blk_col;
948
949
9.38M
    UWORD8 *pu1_out_data_header;
950
9.38M
    UWORD16 *pu2_out_data_coeff;
951
952
9.38M
    WORD32 x_pos, y_pos;
953
9.38M
    WORD32 quant_coeff;
954
955
9.38M
    WORD32 num_gt0_flag;
956
9.38M
    (void)i4_csbf_stride;
957
9.38M
    pu1_out_data_header = pu1_out_data;
958
    /* Need only last 3 bits, rest are reserved for debugging and making */
959
    /* WORD alignment */
960
9.38M
    u2_csbf0flags = 0xBAD0;
961
962
    /* Select proper order for your transform unit and csb based on scan_idx*/
963
    /* and the trans_size */
964
965
    /* scan order inside a csb */
966
9.38M
    pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
967
    /* GETRANGE will give the log_2 of trans_size to shift_value */
968
9.38M
    GETRANGE(shift_value, trans_size);
969
9.38M
    shift_value = shift_value - 3; /* for finding. row no. from scan index */
970
9.38M
    mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
971
9.38M
    switch(trans_size)
972
9.38M
    {
973
48.5k
    case 32:
974
48.5k
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
975
48.5k
        break;
976
310k
    case 16:
977
310k
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
978
310k
        break;
979
1.39M
    case 8:
980
1.39M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
981
1.39M
        break;
982
7.63M
    case 4:
983
7.63M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
984
7.63M
        break;
985
0
    default:
986
0
        DBG_PRINTF("Invalid Trans Size\n");
987
0
        return -1;
988
0
        break;
989
9.38M
    }
990
991
    /*go through each csb in the scan order for first non-zero coded sub-block*/
992
11.8M
    for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
993
11.8M
    {
994
        /* check for the first csb flag in our scan order */
995
11.8M
        if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
996
9.38M
        {
997
9.38M
            UWORD8 u1_last_x, u1_last_y;
998
            /* row of csb */
999
9.38M
            blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
1000
            /* col of csb */
1001
9.38M
            blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
1002
1003
            /*check for the 1st non-0 values inside the csb in our scan order*/
1004
27.8M
            for(i = 15; i >= 0; i--)
1005
27.8M
            {
1006
27.8M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1007
27.8M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1008
1009
27.8M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1010
1011
27.8M
                if(quant_coeff != 0)
1012
9.38M
                    break;
1013
27.8M
            }
1014
1015
9.38M
            ASSERT(i >= 0);
1016
1017
9.38M
            u1_last_x = x_pos;
1018
9.38M
            u1_last_y = y_pos;
1019
1020
            /* storing last_x and last_y */
1021
9.38M
            *pu1_out_data_header = u1_last_x;
1022
9.38M
            pu1_out_data_header++;
1023
9.38M
            num_bytes++;
1024
9.38M
            *pu1_out_data_header = u1_last_y;
1025
9.38M
            pu1_out_data_header++;
1026
9.38M
            num_bytes++;
1027
1028
            /* storing the scan order */
1029
9.38M
            *pu1_out_data_header = scan_idx;
1030
9.38M
            pu1_out_data_header++;
1031
9.38M
            num_bytes++;
1032
            /* storing last_sub_block pos. in scan order count */
1033
9.38M
            *pu1_out_data_header = trans_unit_idx;
1034
9.38M
            pu1_out_data_header++;
1035
9.38M
            num_bytes++;
1036
1037
            /*stored the first 4 bytes, now all are word16. So word16 pointer*/
1038
9.38M
            pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
1039
1040
            /* u2_csbf0flags word */
1041
9.38M
            u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
1042
            /* storing u2_csbf0flags word */
1043
9.38M
            *pu2_out_data_coeff = u2_csbf0flags;
1044
9.38M
            pu2_out_data_coeff++;
1045
9.38M
            num_bytes += 2;
1046
1047
9.38M
            num_gt0_flag = 1;
1048
9.38M
            num_gt1_flag = 0;
1049
9.38M
            u2_sign_flags = 0;
1050
1051
            /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1052
9.38M
            u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
1053
9.38M
            if(abs(quant_coeff) > 1)
1054
5.08M
            {
1055
                /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1056
5.08M
                u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
1057
                /* update u2_abs_coeff_remaining */
1058
5.08M
                u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1059
1060
5.08M
                num_gt1_flag++;
1061
5.08M
            }
1062
1063
9.38M
            if(quant_coeff < 0)
1064
4.76M
            {
1065
                /* set the i th bit of u2_sign_flags */
1066
4.76M
                u2_sign_flags = u2_sign_flags | (1 << i);
1067
4.76M
            }
1068
1069
            /* Test remaining elements in our scan order */
1070
            /* Can optimize further by CLZ macro */
1071
131M
            for(i = i - 1; i >= 0; i--)
1072
122M
            {
1073
122M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1074
122M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1075
1076
122M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1077
1078
122M
                if(quant_coeff != 0)
1079
99.6M
                {
1080
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1081
99.6M
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1082
1083
99.6M
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1084
80.8M
                    {
1085
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1086
80.8M
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1087
1088
                        /* update u2_abs_coeff_remaining */
1089
80.8M
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1090
1091
80.8M
                        num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
1092
80.8M
                    }
1093
1094
99.6M
                    if(quant_coeff < 0)
1095
49.7M
                    {
1096
                        /* set the i th bit of u2_sign_flags */
1097
49.7M
                        u2_sign_flags |= (1 << i);
1098
49.7M
                    }
1099
1100
99.6M
                    num_gt0_flag++;
1101
99.6M
                }
1102
122M
            }
1103
1104
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1105
9.38M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1106
9.38M
            pu2_out_data_coeff++;
1107
9.38M
            num_bytes += 2;
1108
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1109
9.38M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1110
9.38M
            pu2_out_data_coeff++;
1111
9.38M
            num_bytes += 2;
1112
            /* storing u2_sign_flags 2 bytes */
1113
9.38M
            *pu2_out_data_coeff = u2_sign_flags;
1114
9.38M
            pu2_out_data_coeff++;
1115
9.38M
            num_bytes += 2;
1116
1117
            /* Store the u2_abs_coeff_remaining[] */
1118
95.2M
            for(i = 0; i < num_gt1_flag; i++)
1119
85.9M
            {
1120
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1121
85.9M
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1122
85.9M
                pu2_out_data_coeff++;
1123
85.9M
                num_bytes += 2;
1124
85.9M
            }
1125
1126
9.38M
            break; /*We just need this loop for finding 1st non-zero csb only*/
1127
9.38M
        }
1128
11.8M
    }
1129
1130
    /* go through remaining csb in the scan order */
1131
18.7M
    for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
1132
9.38M
    {
1133
9.38M
        blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
1134
9.38M
        blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
1135
1136
        /* u2_csbf0flags word */
1137
9.38M
        u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
1138
9.38M
                        (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
1139
1140
        /********************************************************************/
1141
        /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
1142
        /* block0, instead sig coeff map is directly signalled. This is     */
1143
        /* taken care by forcing csbf for block0 to be 1 even if it is 0    */
1144
        /********************************************************************/
1145
9.38M
        if(0 == trans_unit_idx)
1146
1.62M
        {
1147
1.62M
            u2_csbf0flags |= 1;
1148
1.62M
        }
1149
1150
9.38M
        if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
1151
7.49M
        {
1152
7.49M
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
1153
6.06M
            {
1154
                /* set the 2nd bit of u2_csbf0flags for right csbf */
1155
6.06M
                u2_csbf0flags = u2_csbf0flags | (1 << 1);
1156
6.06M
            }
1157
7.49M
        }
1158
9.38M
        if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
1159
7.32M
        {
1160
7.32M
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
1161
5.96M
            {
1162
                /* set the 3rd bit of u2_csbf0flags  for bottom csbf */
1163
5.96M
                u2_csbf0flags = u2_csbf0flags | (1 << 2);
1164
5.96M
            }
1165
7.32M
        }
1166
1167
        /* storing u2_csbf0flags word */
1168
9.38M
        *pu2_out_data_coeff = u2_csbf0flags;
1169
9.38M
        pu2_out_data_coeff++;
1170
9.38M
        num_bytes += 2;
1171
1172
        /* check for the csb flag in our scan order */
1173
9.38M
        if(u2_csbf0flags & 0x1)
1174
8.13M
        {
1175
8.13M
            u2_sig_coeff_abs_gt0_flags = 0;
1176
8.13M
            u2_sig_coeff_abs_gt1_flags = 0;
1177
8.13M
            u2_sign_flags = 0;
1178
1179
8.13M
            num_gt0_flag = 0;
1180
8.13M
            num_gt1_flag = 0;
1181
            /* check for the non-0 values inside the csb in our scan order */
1182
            /* Can optimize further by CLZ macro */
1183
138M
            for(i = 15; i >= 0; i--)
1184
130M
            {
1185
130M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1186
130M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1187
1188
130M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1189
1190
130M
                if(quant_coeff != 0)
1191
95.3M
                {
1192
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1193
95.3M
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1194
1195
95.3M
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1196
74.2M
                    {
1197
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1198
74.2M
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1199
1200
                        /* update u2_abs_coeff_remaining */
1201
74.2M
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1202
1203
74.2M
                        num_gt1_flag++;
1204
74.2M
                    }
1205
1206
95.3M
                    if(quant_coeff < 0)
1207
47.6M
                    {
1208
                        /* set the i th bit of u2_sign_flags */
1209
47.6M
                        u2_sign_flags = u2_sign_flags | (1 << i);
1210
47.6M
                    }
1211
1212
95.3M
                    num_gt0_flag++;
1213
95.3M
                }
1214
130M
            }
1215
1216
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1217
8.13M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1218
8.13M
            pu2_out_data_coeff++;
1219
8.13M
            num_bytes += 2;
1220
1221
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1222
8.13M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1223
8.13M
            pu2_out_data_coeff++;
1224
8.13M
            num_bytes += 2;
1225
1226
            /* storing u2_sign_flags 2 bytes */
1227
8.13M
            *pu2_out_data_coeff = u2_sign_flags;
1228
8.13M
            pu2_out_data_coeff++;
1229
8.13M
            num_bytes += 2;
1230
1231
            /* Store the u2_abs_coeff_remaining[] */
1232
82.4M
            for(i = 0; i < num_gt1_flag; i++)
1233
74.2M
            {
1234
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1235
74.2M
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1236
74.2M
                pu2_out_data_coeff++;
1237
74.2M
                num_bytes += 2;
1238
74.2M
            }
1239
8.13M
        }
1240
9.38M
    }
1241
1242
9.38M
    return num_bytes; /* Return the number of bytes written to out_data */
1243
9.38M
}
1244
1245
/**
1246
*******************************************************************************
1247
* \if Function name : ihevce_populate_intra_pred_mode \endif
1248
*
1249
* \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
1250
* b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
1251
*
1252
* \par   Description
1253
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1254
* for a CU
1255
*
1256
* \param[in] top_intra_mode Top intra mode
1257
* \param[in] left_intra_mode Left intra mode
1258
* \param[in] available_top Top availability flag
1259
* \param[in] available_left Left availability flag
1260
* \param[in] cu_pos_y CU 'y' position
1261
* \param[in] ps_cand_mode_list pointer to populate candidate list
1262
*
1263
* \returns none
1264
*
1265
* \author
1266
*  Ittiam
1267
*
1268
*******************************************************************************
1269
*/
1270
1271
void ihevce_populate_intra_pred_mode(
1272
    WORD32 top_intra_mode,
1273
    WORD32 left_intra_mode,
1274
    WORD32 available_top,
1275
    WORD32 available_left,
1276
    WORD32 cu_pos_y,
1277
    WORD32 *ps_cand_mode_list)
1278
639k
{
1279
    /* local variables */
1280
639k
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1281
1282
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1283
    /* N = top */
1284
639k
    if(0 == available_top)
1285
39.4k
    {
1286
39.4k
        cand_intra_pred_mode_top = INTRA_DC;
1287
39.4k
    }
1288
    /* for neighbour != INTRA, setting DC is done outside */
1289
599k
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1290
37.7k
    {
1291
37.7k
        cand_intra_pred_mode_top = INTRA_DC;
1292
37.7k
    }
1293
562k
    else
1294
562k
    {
1295
562k
        cand_intra_pred_mode_top = top_intra_mode;
1296
562k
    }
1297
1298
    /* N = left */
1299
639k
    if(0 == available_left)
1300
31.8k
    {
1301
31.8k
        cand_intra_pred_mode_left = INTRA_DC;
1302
31.8k
    }
1303
    /* for neighbour != INTRA, setting DC is done outside */
1304
607k
    else
1305
607k
    {
1306
607k
        cand_intra_pred_mode_left = left_intra_mode;
1307
607k
    }
1308
1309
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1310
639k
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1311
188k
    {
1312
188k
        if(cand_intra_pred_mode_left < 2)
1313
108k
        {
1314
108k
            ps_cand_mode_list[0] = INTRA_PLANAR;
1315
108k
            ps_cand_mode_list[1] = INTRA_DC;
1316
108k
            ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1317
108k
        }
1318
79.8k
        else
1319
79.8k
        {
1320
79.8k
            ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1321
79.8k
            ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1322
79.8k
            ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1323
79.8k
        }
1324
188k
    }
1325
450k
    else
1326
450k
    {
1327
450k
        ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1328
450k
        ps_cand_mode_list[1] = cand_intra_pred_mode_top;
1329
1330
450k
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1331
363k
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1332
286k
        {
1333
286k
            ps_cand_mode_list[2] = INTRA_PLANAR;
1334
286k
        }
1335
163k
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1336
86.8k
        {
1337
86.8k
            ps_cand_mode_list[2] = INTRA_DC;
1338
86.8k
        }
1339
77.1k
        else
1340
77.1k
        {
1341
77.1k
            ps_cand_mode_list[2] = INTRA_ANGULAR(26);
1342
77.1k
        }
1343
450k
    }
1344
639k
}
1345
/**
1346
*******************************************************************************
1347
* \if Function name : ihevce_intra_pred_mode_signaling \endif
1348
*
1349
* \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
1350
* b5_rem_intra_pred_mode for a CU
1351
*
1352
* \par   Description
1353
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1354
* for a CU
1355
*
1356
* \param[in] ps_nbr_top Top neighbour context
1357
* \param[in] ps_nbr_left Left neighbour context
1358
* \param[in] available_top Top availability flag
1359
* \param[in] available_left Left availability flag
1360
* \param[in] cu_pos_y CU 'y' position
1361
* \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
1362
* \param[inout] ps_intra_pred_mode_current
1363
* Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
1364
* b5_rem_intra_pred_mode
1365
*
1366
* \returns none
1367
*
1368
* \author
1369
*  Ittiam
1370
*
1371
*******************************************************************************
1372
*/
1373
1374
void ihevce_intra_pred_mode_signaling(
1375
    WORD32 top_intra_mode,
1376
    WORD32 left_intra_mode,
1377
    WORD32 available_top,
1378
    WORD32 available_left,
1379
    WORD32 cu_pos_y,
1380
    WORD32 luma_intra_pred_mode_current,
1381
    intra_prev_rem_flags_t *ps_intra_pred_mode_current)
1382
6.09M
{
1383
    /* local variables */
1384
6.09M
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1385
6.09M
    WORD32 cand_mode_list[3];
1386
1387
6.09M
    ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1388
6.09M
    ps_intra_pred_mode_current->b2_mpm_idx = 0;  // for safety purpose
1389
6.09M
    ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
1390
1391
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1392
    /* N = top */
1393
6.09M
    if(0 == available_top)
1394
301k
    {
1395
301k
        cand_intra_pred_mode_top = INTRA_DC;
1396
301k
    }
1397
    /* for neighbour != INTRA, setting DC is done outside */
1398
5.79M
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1399
359k
    {
1400
359k
        cand_intra_pred_mode_top = INTRA_DC;
1401
359k
    }
1402
5.43M
    else
1403
5.43M
    {
1404
5.43M
        cand_intra_pred_mode_top = top_intra_mode;
1405
5.43M
    }
1406
1407
    /* N = left */
1408
6.09M
    if(0 == available_left)
1409
226k
    {
1410
226k
        cand_intra_pred_mode_left = INTRA_DC;
1411
226k
    }
1412
    /* for neighbour != INTRA, setting DC is done outside */
1413
5.86M
    else
1414
5.86M
    {
1415
5.86M
        cand_intra_pred_mode_left = left_intra_mode;
1416
5.86M
    }
1417
1418
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1419
6.09M
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1420
1.92M
    {
1421
1.92M
        if(cand_intra_pred_mode_left < 2)
1422
980k
        {
1423
980k
            cand_mode_list[0] = INTRA_PLANAR;
1424
980k
            cand_mode_list[1] = INTRA_DC;
1425
980k
            cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1426
980k
        }
1427
944k
        else
1428
944k
        {
1429
944k
            cand_mode_list[0] = cand_intra_pred_mode_left;
1430
944k
            cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1431
944k
            cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1432
944k
        }
1433
1.92M
    }
1434
4.16M
    else
1435
4.16M
    {
1436
4.16M
        cand_mode_list[0] = cand_intra_pred_mode_left;
1437
4.16M
        cand_mode_list[1] = cand_intra_pred_mode_top;
1438
1439
4.16M
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1440
3.35M
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1441
2.64M
        {
1442
2.64M
            cand_mode_list[2] = INTRA_PLANAR;
1443
2.64M
        }
1444
1.51M
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1445
829k
        {
1446
829k
            cand_mode_list[2] = INTRA_DC;
1447
829k
        }
1448
688k
        else
1449
688k
        {
1450
688k
            cand_mode_list[2] = INTRA_ANGULAR(26);
1451
688k
        }
1452
4.16M
    }
1453
1454
    /* Signal Generation */
1455
1456
    /* Flag & mpm_index generation */
1457
6.09M
    if(cand_mode_list[0] == luma_intra_pred_mode_current)
1458
1.47M
    {
1459
1.47M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1460
1.47M
        ps_intra_pred_mode_current->b2_mpm_idx = 0;
1461
1.47M
    }
1462
4.61M
    else if(cand_mode_list[1] == luma_intra_pred_mode_current)
1463
1.17M
    {
1464
1.17M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1465
1.17M
        ps_intra_pred_mode_current->b2_mpm_idx = 1;
1466
1.17M
    }
1467
3.43M
    else if(cand_mode_list[2] == luma_intra_pred_mode_current)
1468
752k
    {
1469
752k
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1470
752k
        ps_intra_pred_mode_current->b2_mpm_idx = 2;
1471
752k
    }
1472
    /* Flag & b5_rem_intra_pred_mode generation */
1473
2.68M
    else
1474
2.68M
    {
1475
2.68M
        WORD32 rem_mode;
1476
1477
2.68M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1478
1479
        /* sorting cand_mode_list */
1480
2.68M
        if(cand_mode_list[0] > cand_mode_list[1])
1481
1.35M
        {
1482
1.35M
            SWAP(cand_mode_list[0], cand_mode_list[1]);
1483
1.35M
        }
1484
2.68M
        if(cand_mode_list[0] > cand_mode_list[2])
1485
1.32M
        {
1486
1.32M
            SWAP(cand_mode_list[0], cand_mode_list[2]);
1487
1.32M
        }
1488
2.68M
        if(cand_mode_list[1] > cand_mode_list[2])
1489
1.72M
        {
1490
1.72M
            SWAP(cand_mode_list[1], cand_mode_list[2]);
1491
1.72M
        }
1492
1493
2.68M
        rem_mode = luma_intra_pred_mode_current;
1494
1495
2.68M
        if((rem_mode) >= cand_mode_list[2])
1496
863k
        {
1497
863k
            (rem_mode)--;
1498
863k
        }
1499
2.68M
        if((rem_mode) >= cand_mode_list[1])
1500
2.06M
        {
1501
2.06M
            (rem_mode)--;
1502
2.06M
        }
1503
2.68M
        if((rem_mode) >= cand_mode_list[0])
1504
2.43M
        {
1505
2.43M
            (rem_mode)--;
1506
2.43M
        }
1507
2.68M
        ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
1508
2.68M
    }
1509
6.09M
}
1510
1511
void ihevce_quant_rounding_factor_gen(
1512
    WORD32 i4_trans_size,
1513
    WORD32 is_luma,
1514
    rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
1515
    WORD32 *pi4_quant_round_0_1,
1516
    WORD32 *pi4_quant_round_1_2,
1517
    double i4_lamda_modifier,
1518
    UWORD8 i4_is_tu_level_quant_rounding)
1519
798k
{
1520
    //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
1521
798k
    UWORD8 *pu1_ctxt_model;
1522
798k
    WORD32 scan_pos;
1523
798k
    WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1524
798k
    WORD32 abs_gt1_base_ctxt;
1525
798k
    WORD32 log2_tr_size, i;
1526
798k
    UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
1527
798k
    UWORD16 u4_bits_estimated_r1_temp;
1528
798k
    WORD32 j = 0;
1529
798k
    WORD32 k = 0;
1530
798k
    WORD32 temp2;
1531
1532
798k
    double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
1533
798k
    LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
1534
    /* transform size to log2transform size */
1535
798k
    GETRANGE(log2_tr_size, i4_trans_size);
1536
798k
    log2_tr_size -= 1;
1537
1538
798k
    if(1 == i4_is_tu_level_quant_rounding)
1539
0
    {
1540
0
        entropy_context_t *ps_cur_tu_entropy;
1541
0
        cab_ctxt_t *ps_cabac;
1542
0
        WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1543
0
        ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
1544
1545
0
        ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
1546
1547
0
        pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1548
0
    }
1549
798k
    else
1550
798k
    {
1551
798k
        pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
1552
798k
    }
1553
    /*If transform size is 4x4, then only one sub-block*/
1554
798k
    if(is_luma)
1555
506k
    {
1556
506k
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1557
506k
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1558
1559
506k
        if(3 == log2_tr_size)
1560
214k
        {
1561
            /* 8x8 transform size */
1562
            /* Assuming diagnol scan idx for now */
1563
214k
            sig_coeff_base_ctxt += 9;
1564
214k
        }
1565
291k
        else if(3 < log2_tr_size)
1566
77.4k
        {
1567
            /* larger transform sizes */
1568
77.4k
            sig_coeff_base_ctxt += 21;
1569
77.4k
        }
1570
506k
    }
1571
291k
    else
1572
291k
    {
1573
        /* chroma context initializations */
1574
291k
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1575
291k
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1576
1577
291k
        if(3 == log2_tr_size)
1578
64.0k
        {
1579
            /* 8x8 transform size */
1580
64.0k
            sig_coeff_base_ctxt += 9;
1581
64.0k
        }
1582
227k
        else if(3 < log2_tr_size)
1583
13.3k
        {
1584
            /* larger transform sizes */
1585
13.3k
            sig_coeff_base_ctxt += 12;
1586
13.3k
        }
1587
291k
    }
1588
1589
    /*Transform size of 4x4 will have only a single CSB */
1590
    /* derive the context inc as per section 9.3.3.1.4 */
1591
1592
798k
    if(2 == log2_tr_size)
1593
428k
    {
1594
428k
        UWORD8 sig_ctxinc;
1595
428k
        WORD32 state_mps;
1596
428k
        WORD32 gt1_ctxt = 0;
1597
428k
        WORD32 ctxt_set = 0;
1598
428k
        WORD32 ctxt_idx = 0;
1599
1600
        /* context set based on luma subblock pos */
1601
1602
        /* Encodet the abs level gt1 bins */
1603
        /* Currently calculating trade off between mps(2) and mps(1)*/
1604
        /* The estimation has to be further done for mps(11) and mps(111)*/
1605
        /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
1606
        /* gt1_ctxt = 0 for the co-ef value to be 2 */
1607
1608
428k
        ctxt_set = gt1_ctxt = 0;
1609
428k
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1610
1611
428k
        state_mps = pu1_ctxt_model[ctxt_idx];
1612
1613
428k
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1614
1615
428k
        u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1616
1617
428k
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
1618
7.28M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1619
6.86M
        {
1620
6.86M
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1621
6.86M
        }
1622
1623
7.28M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1624
6.86M
        {
1625
            //UWORD8 nbr_csbf = 1;
1626
            /* derive the x,y pos */
1627
6.86M
            UWORD8 y_pos_x_pos = scan_pos;  //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1628
1629
            /* 4x4 transform size increment uses lookup */
1630
6.86M
            sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1631
1632
            /*Get the mps state based on ctxt modes */
1633
6.86M
            state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
1634
1635
            /* Bits taken to encode sig co-ef flag as 0 */
1636
6.86M
            u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1637
1638
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1639
            //
1640
6.86M
            u4_bits_estimated_r1 =
1641
6.86M
                (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1642
1643
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1644
6.86M
            u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1645
1646
6.86M
            QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1647
6.86M
            *(pi4_quant_round_0_1 + scan_pos) = temp2;
1648
6.86M
        }
1649
428k
    }
1650
369k
    else
1651
369k
    {
1652
369k
        UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
1653
369k
        WORD32 is_nbr_csb_state_mps;
1654
1655
369k
        WORD32 state_mps;
1656
369k
        WORD32 gt1_ctxt = 0;
1657
369k
        WORD32 ctxt_set = 0;
1658
369k
        WORD32 ctxt_idx;
1659
        /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
1660
        /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
1661
1662
        /*ctxt_set = 0 DC subblock, the previous state did not have 2
1663
        ctxt_set = 1 DC subblock, the previous state did have >= 2
1664
        ctxt_set = 2 AC subblock, the previous state did not have 2
1665
        ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1666
369k
        i = 1;
1667
369k
        ctxt_set = (i && is_luma) ? 2 : 0;
1668
1669
369k
        ctxt_set++;
1670
1671
        /*0th position indicates the probability of 2 */
1672
        /*1th position indicates the probability of 1 */
1673
        /*2th position indicates the probability of 11 */
1674
        /*3th position indicates the probability of 111 */
1675
1676
369k
        gt1_ctxt = 0;
1677
369k
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1678
1679
369k
        state_mps = pu1_ctxt_model[ctxt_idx];
1680
1681
369k
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1682
1683
369k
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1684
369k
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1685
1686
51.6M
        for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
1687
51.2M
        {
1688
51.2M
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1689
51.2M
        }
1690
1691
369k
        i = 0;
1692
369k
        ctxt_set = (i && is_luma) ? 2 : 0;
1693
369k
        ctxt_set++;
1694
1695
        /*0th position indicates the probability of 2 */
1696
        /*1th position indicates the probability of 1 */
1697
        /*2th position indicates the probability of 11 */
1698
        /*3th position indicates the probability of 111 */
1699
1700
369k
        gt1_ctxt = 0;
1701
369k
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1702
1703
369k
        state_mps = pu1_ctxt_model[ctxt_idx];
1704
1705
369k
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1706
1707
369k
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1708
369k
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1709
1710
6.27M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1711
5.90M
        {
1712
5.90M
            *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1713
5.90M
        }
1714
1715
369k
        {
1716
369k
            WORD32 ctxt_idx;
1717
1718
369k
            WORD32 nbr_csbf_0, nbr_csbf_1;
1719
369k
            WORD32 state_mps_0, state_mps_1;
1720
369k
            ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1721
369k
            ctxt_idx += is_luma ? 0 : 2;
1722
1723
            /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1724
            /* if neibhor not available, ctxt idx = 0*/
1725
369k
            nbr_csbf_0 = 0;
1726
369k
            ctxt_idx += nbr_csbf_0 ? 1 : 0;
1727
369k
            state_mps_0 = pu1_ctxt_model[ctxt_idx];
1728
1729
369k
            nbr_csbf_1 = 1;
1730
369k
            ctxt_idx += nbr_csbf_1 ? 1 : 0;
1731
369k
            state_mps_1 = pu1_ctxt_model[ctxt_idx];
1732
1733
369k
            is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
1734
369k
        }
1735
1736
369k
        if(1 == is_nbr_csb_state_mps)
1737
28.8k
        {
1738
271k
            for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
1739
243k
            {
1740
243k
                UWORD8 sig_ctxinc;
1741
243k
                WORD32 state_mps;
1742
243k
                WORD32 gt1_ctxt = 0;
1743
243k
                WORD32 ctxt_set = 0;
1744
1745
243k
                WORD32 ctxt_idx;
1746
1747
                /*Check if the cabac states had previous nbr available */
1748
1749
243k
                if(i == 0)
1750
28.8k
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
1751
214k
                else if(i < (i4_trans_size >> 2))
1752
46.6k
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
1753
167k
                else if((i % (i4_trans_size >> 2)) == 0)
1754
46.6k
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
1755
120k
                else
1756
120k
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1757
1758
243k
                if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
1759
46.6k
                    k++;
1760
1761
243k
                j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
1762
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1763
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1764
                ctxt_set = 2 AC subblock, the previous state did not have 2
1765
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1766
1767
243k
                ctxt_set = (i && is_luma) ? 2 : 0;
1768
1769
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1770
243k
                gt1_ctxt = 0;
1771
243k
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1772
1773
243k
                state_mps = pu1_ctxt_model[ctxt_idx];
1774
1775
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1776
243k
                u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1777
1778
4.13M
                for(scan_pos = 0; scan_pos < 16; scan_pos++)
1779
3.88M
                {
1780
3.88M
                    UWORD8 y_pos_x_pos;
1781
1782
3.88M
                    if(scan_pos || i)
1783
3.86M
                    {
1784
3.86M
                        y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1785
                        /* ctxt for AC coeff depends on curpos and neigbour csbf */
1786
3.86M
                        sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1787
1788
                        /* based on luma subblock pos */
1789
3.86M
                        sig_ctxinc += (i && is_luma) ? 3 : 0;
1790
1791
3.86M
                        sig_ctxinc += sig_coeff_base_ctxt;
1792
3.86M
                    }
1793
28.8k
                    else
1794
28.8k
                    {
1795
                        /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1796
                        /* DC coeff has fixed context for luma and chroma */
1797
28.8k
                        sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1798
28.8k
                    }
1799
1800
                    /*Get the mps state based on ctxt modes */
1801
3.88M
                    state_mps = pu1_ctxt_model[sig_ctxinc];
1802
1803
                    /* Bits taken to encode sig co-ef flag as 0 */
1804
3.88M
                    u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1805
1806
3.88M
                    u4_bits_estimated_r1 =
1807
3.88M
                        (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1808
1809
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1810
3.88M
                    u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1811
3.88M
                    {
1812
3.88M
                        QUANT_ROUND_FACTOR(
1813
3.88M
                            temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1814
3.88M
                        *(pi4_quant_round_0_1 +
1815
3.88M
                          ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
1816
3.88M
                    }
1817
3.88M
                }
1818
243k
            }
1819
28.8k
        }
1820
340k
        else
1821
340k
        {
1822
            /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
1823
            Hence will write the same value to all sub block, and overwrite for the 1st one */
1824
340k
            i = 1;
1825
340k
            {
1826
340k
                UWORD8 sig_ctxinc;
1827
340k
                UWORD8 y_pos_x_pos;
1828
340k
                WORD32 quant_rounding_0_1;
1829
1830
340k
                pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
1831
1832
340k
                scan_pos = 0;
1833
340k
                y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1834
                /* ctxt for AC coeff depends on curpos and neigbour csbf */
1835
340k
                sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1836
1837
                /* based on luma subblock pos */
1838
340k
                sig_ctxinc += (is_luma) ? 3 : 0;
1839
1840
340k
                sig_ctxinc += sig_coeff_base_ctxt;
1841
1842
                /*Get the mps state based on ctxt modes */
1843
340k
                state_mps = pu1_ctxt_model[sig_ctxinc];
1844
1845
                /* Bits taken to encode sig co-ef flag as 0 */
1846
340k
                u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1847
1848
340k
                u4_bits_estimated_r1 =
1849
340k
                    (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1850
1851
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1852
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1853
                ctxt_set = 2 AC subblock, the previous state did not have 2
1854
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1855
1856
340k
                ctxt_set = (i && is_luma) ? 2 : 0;
1857
1858
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1859
340k
                gt1_ctxt = 0;
1860
340k
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1861
1862
340k
                state_mps = pu1_ctxt_model[ctxt_idx];
1863
1864
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1865
340k
                u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1866
1867
340k
                QUANT_ROUND_FACTOR(
1868
340k
                    quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1869
1870
47.7M
                for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
1871
47.3M
                    scan_pos++)
1872
47.3M
                {
1873
47.3M
                    *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
1874
47.3M
                }
1875
340k
            }
1876
1877
            /*First Subblock*/
1878
340k
            i = 0;
1879
1880
340k
            {
1881
340k
                UWORD8 sig_ctxinc;
1882
340k
                WORD32 state_mps;
1883
340k
                WORD32 gt1_ctxt = 0;
1884
340k
                WORD32 ctxt_set = 0;
1885
1886
340k
                WORD32 ctxt_idx;
1887
1888
                /*Check if the cabac states had previous nbr available */
1889
1890
340k
                {
1891
340k
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1892
1893
                    /*ctxt_set = 0 DC subblock, the previous state did not have 2
1894
                    ctxt_set = 1 DC subblock, the previous state did have >= 2
1895
                    ctxt_set = 2 AC subblock, the previous state did not have 2
1896
                    ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1897
340k
                    ctxt_set = (i && is_luma) ? 2 : 0;
1898
1899
                    /* gt1_ctxt = 1 for the co-ef value to be 1 */
1900
340k
                    gt1_ctxt = 0;
1901
340k
                    ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1902
1903
340k
                    state_mps = pu1_ctxt_model[ctxt_idx];
1904
1905
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1906
340k
                    u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1907
1908
5.78M
                    for(scan_pos = 0; scan_pos < 16; scan_pos++)
1909
5.44M
                    {
1910
5.44M
                        UWORD8 y_pos_x_pos;
1911
1912
5.44M
                        if(scan_pos)
1913
5.10M
                        {
1914
5.10M
                            y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1915
                            /* ctxt for AC coeff depends on curpos and neigbour csbf */
1916
5.10M
                            sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1917
1918
                            /* based on luma subblock pos */
1919
5.10M
                            sig_ctxinc += (i && is_luma) ? 3 : 0;
1920
1921
5.10M
                            sig_ctxinc += sig_coeff_base_ctxt;
1922
5.10M
                        }
1923
340k
                        else
1924
340k
                        {
1925
                            /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1926
                            /* DC coeff has fixed context for luma and chroma */
1927
340k
                            sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1928
340k
                        }
1929
1930
                        /*Get the mps state based on ctxt modes */
1931
5.44M
                        state_mps = pu1_ctxt_model[sig_ctxinc];
1932
1933
                        /* Bits taken to encode sig co-ef flag as 0 */
1934
5.44M
                        u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1935
1936
5.44M
                        u4_bits_estimated_r1 =
1937
5.44M
                            (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1938
1939
                        /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1940
5.44M
                        u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1941
5.44M
                        {
1942
5.44M
                            QUANT_ROUND_FACTOR(
1943
5.44M
                                temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1944
5.44M
                            *(pi4_quant_round_0_1 +
1945
5.44M
                              ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1946
5.44M
                        }
1947
5.44M
                    }
1948
340k
                }
1949
340k
            }
1950
340k
        }
1951
369k
    }
1952
798k
    return;
1953
798k
}
1954
1955
/*!
1956
******************************************************************************
1957
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
1958
*
1959
* \brief
1960
*    Transform unit level (Luma) enc_loop function
1961
*
1962
* \param[in] ps_ctxt    enc_loop module ctxt pointer
1963
* \param[in] pu1_pred   pointer to predicted data buffer
1964
* \param[in] pred_strd  predicted buffer stride
1965
* \param[in] pu1_src    pointer to source data buffer
1966
* \param[in] src_strd   source buffer stride
1967
* \param[in] pi2_deq_data   pointer to store iq data
1968
* \param[in] deq_data_strd  iq data buffer stride
1969
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
1970
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
1971
*                           block
1972
* \param[out] csbf_strd  csbf buffer stride
1973
* \param[in] trans_size transform size (4, 8, 16,32)
1974
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
1975
* \param[out] pi4_cost      pointer to store the cost
1976
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
1977
*                           coeff buffer
1978
* \param[out] pu4_tu_bits   pointer to store the best TU bits required encode
1979
the current TU in RDopt Mode
1980
* \param[out] pu4_blk_sad   pointer to store the block sad for RC
1981
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
1982
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
1983
* \param[in]  i4_perform_rdoq Indicates if RDOQ should be performed or not
1984
* \param[in]  i4_perform_sbh Indicates if SBH should be performed or not
1985
*
1986
* \return
1987
*    CBF of the current block
1988
*
1989
* \author
1990
*  Ittiam
1991
*
1992
*****************************************************************************
1993
*/
1994
1995
WORD32 ihevce_t_q_iq_ssd_scan_fxn(
1996
    ihevce_enc_loop_ctxt_t *ps_ctxt,
1997
    UWORD8 *pu1_pred,
1998
    WORD32 pred_strd,
1999
    UWORD8 *pu1_src,
2000
    WORD32 src_strd,
2001
    WORD16 *pi2_deq_data,
2002
    WORD32 deq_data_strd,
2003
    UWORD8 *pu1_recon,
2004
    WORD32 i4_recon_stride,
2005
    UWORD8 *pu1_ecd_data,
2006
    UWORD8 *pu1_csbf_buf,
2007
    WORD32 csbf_strd,
2008
    WORD32 trans_size,
2009
    WORD32 packed_pred_mode,
2010
    LWORD64 *pi8_cost,
2011
    WORD32 *pi4_coeff_off,
2012
    WORD32 *pi4_tu_bits,
2013
    UWORD32 *pu4_blk_sad,
2014
    WORD32 *pi4_zero_col,
2015
    WORD32 *pi4_zero_row,
2016
    UWORD8 *pu1_is_recon_available,
2017
    WORD32 i4_perform_rdoq,
2018
    WORD32 i4_perform_sbh,
2019
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2020
    WORD32 i4_alpha_stim_multiplier,
2021
    UWORD8 u1_is_cu_noisy,
2022
#endif
2023
    SSD_TYPE_T e_ssd_type,
2024
    WORD32 early_cbf)
2025
7.12M
{
2026
7.12M
    WORD32 cbf = 0;
2027
7.12M
    WORD32 trans_idx;
2028
7.12M
    WORD32 quant_scale_mat_offset;
2029
7.12M
    WORD32 *pi4_trans_scratch;
2030
7.12M
    WORD16 *pi2_trans_values;
2031
7.12M
    WORD16 *pi2_quant_coeffs;
2032
7.12M
    WORD32 *pi4_subBlock2csbfId_map = NULL;
2033
2034
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2035
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
2036
#endif
2037
2038
7.12M
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
2039
2040
7.12M
    WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
2041
6.09M
                             (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
2042
7.12M
    WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
2043
7.12M
    WORD8 intra_flag = 0;
2044
7.12M
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
2045
2046
7.12M
    *pi4_tu_bits = 0;
2047
7.12M
    *pi4_coeff_off = 0;
2048
7.12M
    pu1_is_recon_available[0] = 0;
2049
2050
7.12M
    if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
2051
113k
    {
2052
113k
        if(e_ssd_type != NULL_TYPE)
2053
113k
        {
2054
            /* SSD cost is stored to the pointer */
2055
113k
            pi8_cost[0] =
2056
2057
113k
                ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
2058
113k
                    pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
2059
2060
113k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2061
113k
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2062
0
            {
2063
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2064
0
                    pu1_src,
2065
0
                    src_strd,
2066
0
                    pu1_pred,
2067
0
                    pred_strd,
2068
0
                    pi8_cost[0],
2069
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2070
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2071
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2072
0
                                                 100.0,
2073
0
                    trans_size,
2074
0
                    0,
2075
0
                    ps_ctxt->u1_enable_psyRDOPT,
2076
0
                    NULL_PLANE);
2077
0
            }
2078
113k
#endif
2079
2080
            /* copy pred to recon for skip mode */
2081
113k
            if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2082
19.7k
            {
2083
19.7k
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2084
19.7k
                    pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2085
19.7k
                pu1_is_recon_available[0] = 1;
2086
19.7k
            }
2087
93.2k
            else
2088
93.2k
            {
2089
93.2k
                pu1_is_recon_available[0] = 0;
2090
93.2k
            }
2091
2092
113k
#if ENABLE_INTER_ZCU_COST
2093
113k
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
2094
113k
#endif
2095
113k
        }
2096
0
        else
2097
0
        {
2098
0
            pi8_cost[0] = UINT_MAX;
2099
0
        }
2100
2101
        /* cbf is returned as 0 */
2102
113k
        return (0);
2103
113k
    }
2104
2105
    /* derive context variables */
2106
7.01M
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
2107
7.01M
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2108
7.01M
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
2109
2110
    /* translate the transform size to index for 4x4 and 8x8 */
2111
7.01M
    trans_idx = trans_size >> 2;
2112
2113
7.01M
    if(PRED_MODE_INTRA == packed_pred_mode)
2114
6.09M
    {
2115
6.09M
        quant_scale_mat_offset = 0;
2116
6.09M
        intra_flag = 1;
2117
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2118
        ai4_quant_rounding_factors[0][0] =
2119
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
2120
2121
        for(i = 0; i < trans_size * trans_size; i++)
2122
        {
2123
            ai4_quant_rounding_factors[1][i] =
2124
                MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
2125
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2126
            ai4_quant_rounding_factors[2][i] =
2127
                MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
2128
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2129
        }
2130
#endif
2131
6.09M
    }
2132
923k
    else
2133
923k
    {
2134
923k
        quant_scale_mat_offset = NUM_TRANS_TYPES;
2135
923k
    }
2136
    /* for intra 4x4 DST transform should be used */
2137
7.01M
    if((1 == trans_idx) && (1 == intra_flag))
2138
4.85M
    {
2139
4.85M
        trans_idx = 0;
2140
4.85M
    }
2141
    /* for 16x16 cases */
2142
2.15M
    else if(16 == trans_size)
2143
343k
    {
2144
343k
        trans_idx = 3;
2145
343k
    }
2146
    /* for 32x32 cases */
2147
1.81M
    else if(32 == trans_size)
2148
82.1k
    {
2149
82.1k
        trans_idx = 4;
2150
82.1k
    }
2151
2152
7.01M
    switch(trans_size)
2153
7.01M
    {
2154
5.39M
    case 4:
2155
5.39M
    {
2156
5.39M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
2157
2158
5.39M
        break;
2159
0
    }
2160
1.19M
    case 8:
2161
1.19M
    {
2162
1.19M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
2163
2164
1.19M
        break;
2165
0
    }
2166
343k
    case 16:
2167
343k
    {
2168
343k
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
2169
2170
343k
        break;
2171
0
    }
2172
82.1k
    case 32:
2173
82.1k
    {
2174
82.1k
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
2175
2176
82.1k
        break;
2177
0
    }
2178
7.01M
    }
2179
2180
    /* Do not call the FT and Quant functions if early_cbf is 0 */
2181
7.01M
    if(1 == early_cbf)
2182
7.01M
    {
2183
        /* ---------- call residue and transform block ------- */
2184
7.01M
        *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
2185
7.01M
            pu1_src,
2186
7.01M
            pu1_pred,
2187
7.01M
            pi4_trans_scratch,
2188
7.01M
            pi2_trans_values,
2189
7.01M
            src_strd,
2190
7.01M
            pred_strd,
2191
7.01M
            trans_size,
2192
7.01M
            NULL_PLANE);
2193
2194
7.01M
        cbf = ps_ctxt->apf_quant_iquant_ssd
2195
7.01M
                  [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
2196
7.01M
                      pi2_trans_values,
2197
7.01M
                      ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
2198
7.01M
                      pi2_quant_coeffs,
2199
7.01M
                      pi2_deq_data,
2200
7.01M
                      trans_size,
2201
7.01M
                      ps_ctxt->i4_cu_qp_div6,
2202
7.01M
                      ps_ctxt->i4_cu_qp_mod6,
2203
7.01M
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2204
7.01M
                      ps_ctxt->i4_quant_rnd_factor[intra_flag],
2205
7.01M
                      ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2206
7.01M
                      ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2207
#else
2208
                      intra_flag ? ai4_quant_rounding_factors[0][0]
2209
                                 : ps_ctxt->i4_quant_rnd_factor[intra_flag],
2210
                      intra_flag ? ai4_quant_rounding_factors[1]
2211
                                 : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2212
                      intra_flag ? ai4_quant_rounding_factors[2]
2213
                                 : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2214
#endif
2215
7.01M
                      trans_size,
2216
7.01M
                      trans_size,
2217
7.01M
                      deq_data_strd,
2218
7.01M
                      pu1_csbf_buf,
2219
7.01M
                      csbf_strd,
2220
7.01M
                      pi4_zero_col,
2221
7.01M
                      pi4_zero_row,
2222
7.01M
                      ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
2223
7.01M
                      pi8_cost);
2224
2225
7.01M
        if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
2226
1.01M
        {
2227
1.01M
            pi8_cost[0] = UINT_MAX;
2228
1.01M
        }
2229
7.01M
    }
2230
2231
7.01M
    if(0 != cbf)
2232
5.37M
    {
2233
5.37M
        if(i4_perform_sbh || i4_perform_rdoq)
2234
4.12M
        {
2235
4.12M
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
2236
4.12M
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
2237
4.12M
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
2238
2239
4.12M
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
2240
4.12M
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
2241
4.12M
            ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
2242
4.12M
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2243
4.12M
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
2244
2245
4.12M
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
2246
4.12M
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
2247
4.12M
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
2248
4.12M
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
2249
4.12M
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
2250
4.12M
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
2251
2252
            /* ------- call coeffs scan function ------- */
2253
4.12M
            if((!i4_perform_rdoq))
2254
2.02M
            {
2255
2.02M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2256
2257
2.02M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2258
2.02M
            }
2259
4.12M
        }
2260
2261
5.37M
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2262
5.37M
            pi2_quant_coeffs,
2263
5.37M
            pi4_subBlock2csbfId_map,
2264
5.37M
            ps_ctxt->i4_scan_idx,
2265
5.37M
            trans_size,
2266
5.37M
            pu1_ecd_data,
2267
5.37M
            pu1_csbf_buf,
2268
5.37M
            csbf_strd);
2269
5.37M
    }
2270
7.01M
    *pi8_cost >>= ga_trans_shift[trans_idx];
2271
2272
7.01M
#if RDOPT_ZERO_CBF_ENABLE
2273
    /* compare null cbf cost with encode tu rd-cost */
2274
7.01M
    if(cbf != 0)
2275
5.37M
    {
2276
5.37M
        WORD32 tu_bits;
2277
5.37M
        LWORD64 tu_rd_cost;
2278
2279
5.37M
        LWORD64 zero_cbf_cost = 0;
2280
2281
        /*Populating the feilds of rdoq_ctxt structure*/
2282
5.37M
        if(i4_perform_rdoq)
2283
2.09M
        {
2284
            /* transform size to log2transform size */
2285
2.09M
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
2286
2.09M
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
2287
2.09M
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
2288
2.09M
            ps_rdoq_sbh_ctxt->i4_is_luma = 1;
2289
2.09M
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
2290
2.09M
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
2291
2.09M
                (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
2292
2.09M
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
2293
2.09M
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
2294
2.09M
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
2295
2.09M
        }
2296
3.28M
        else if(i4_perform_zcbf)
2297
497k
        {
2298
497k
            zero_cbf_cost =
2299
2300
497k
                ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
2301
497k
                    pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
2302
497k
        }
2303
2304
        /************************************************************************/
2305
        /* call the entropy rdo encode to get the bit estimate for current tu   */
2306
        /* note that tu includes only residual coding bits and does not include */
2307
        /* tu split, cbf and qp delta encoding bits for a TU                    */
2308
        /************************************************************************/
2309
5.37M
        if(i4_perform_rdoq)
2310
2.09M
        {
2311
2.09M
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
2312
2.09M
                &ps_ctxt->s_rdopt_entropy_ctxt,
2313
2.09M
                (pu1_ecd_data),
2314
2.09M
                trans_size,
2315
2.09M
                1,
2316
2.09M
                ps_rdoq_sbh_ctxt,
2317
2.09M
                pi8_cost,
2318
2.09M
                &zero_cbf_cost,
2319
2.09M
                0);
2320
2321
2.09M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
2322
56.4k
            {
2323
56.4k
                cbf = 0;
2324
56.4k
                *pi4_coeff_off = 0;
2325
56.4k
            }
2326
2327
2.09M
            if((i4_perform_sbh) && (0 != cbf))
2328
2.03M
            {
2329
2.03M
                ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2330
2.03M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2331
2.03M
                *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2332
2.03M
            }
2333
2334
            /*Add round value before normalizing*/
2335
2.09M
            *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
2336
2.09M
            *pi8_cost >>= ga_trans_shift[trans_idx];
2337
2338
2.09M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
2339
2.03M
            {
2340
2.03M
                pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2341
2.03M
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2342
2.03M
                    pi2_quant_coeffs,
2343
2.03M
                    pi4_subBlock2csbfId_map,
2344
2.03M
                    ps_ctxt->i4_scan_idx,
2345
2.03M
                    trans_size,
2346
2.03M
                    pu1_ecd_data,
2347
2.03M
                    pu1_csbf_buf,
2348
2.03M
                    csbf_strd);
2349
2.03M
            }
2350
2.09M
        }
2351
3.28M
        else
2352
3.28M
        {
2353
3.28M
            tu_bits = ihevce_entropy_rdo_encode_tu(
2354
3.28M
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
2355
3.28M
        }
2356
2357
5.37M
        *pi4_tu_bits = tu_bits;
2358
2359
5.37M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2360
616k
        {
2361
616k
            *pi8_cost = ihevce_it_recon_ssd(
2362
616k
                ps_ctxt,
2363
616k
                pu1_src,
2364
616k
                src_strd,
2365
616k
                pu1_pred,
2366
616k
                pred_strd,
2367
616k
                pi2_deq_data,
2368
616k
                deq_data_strd,
2369
616k
                pu1_recon,
2370
616k
                i4_recon_stride,
2371
616k
                pu1_ecd_data,
2372
616k
                trans_size,
2373
616k
                packed_pred_mode,
2374
616k
                cbf,
2375
616k
                *pi4_zero_col,
2376
616k
                *pi4_zero_row,
2377
616k
                NULL_PLANE);
2378
2379
616k
            pu1_is_recon_available[0] = 1;
2380
616k
        }
2381
2382
5.37M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2383
5.37M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2384
0
        {
2385
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2386
0
                pu1_src,
2387
0
                src_strd,
2388
0
                pu1_recon,
2389
0
                i4_recon_stride,
2390
0
                pi8_cost[0],
2391
0
                i4_alpha_stim_multiplier,
2392
0
                trans_size,
2393
0
                0,
2394
0
                ps_ctxt->u1_enable_psyRDOPT,
2395
0
                NULL_PLANE);
2396
0
        }
2397
5.37M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2398
0
        {
2399
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2400
0
                pu1_src,
2401
0
                src_strd,
2402
0
                pu1_pred,
2403
0
                pred_strd,
2404
0
                pi8_cost[0],
2405
0
                i4_alpha_stim_multiplier,
2406
0
                trans_size,
2407
0
                0,
2408
0
                ps_ctxt->u1_enable_psyRDOPT,
2409
0
                NULL_PLANE);
2410
0
        }
2411
5.37M
#endif
2412
2413
        /* add the SSD cost to bits estimate given by ECD */
2414
5.37M
        tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
2415
5.37M
                                     tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
2416
2417
5.37M
        if(i4_perform_zcbf)
2418
701k
        {
2419
701k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2420
701k
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2421
0
            {
2422
0
                zero_cbf_cost = ihevce_inject_stim_into_distortion(
2423
0
                    pu1_src,
2424
0
                    src_strd,
2425
0
                    pu1_pred,
2426
0
                    pred_strd,
2427
0
                    zero_cbf_cost,
2428
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2429
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2430
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2431
0
                                                 100.0,
2432
0
                    trans_size,
2433
0
                    0,
2434
0
                    ps_ctxt->u1_enable_psyRDOPT,
2435
0
                    NULL_PLANE);
2436
0
            }
2437
701k
#endif
2438
2439
            /* force the tu as zero cbf if zero_cbf_cost is lower */
2440
701k
            if(zero_cbf_cost < tu_rd_cost)
2441
18.6k
            {
2442
                /* num bytes is set to 0 */
2443
18.6k
                *pi4_coeff_off = 0;
2444
2445
                /* cbf is returned as 0 */
2446
18.6k
                cbf = 0;
2447
2448
                /* cost is returned as 0 cbf cost */
2449
18.6k
                *pi8_cost = zero_cbf_cost;
2450
2451
                /* TU bits is set to 0 */
2452
18.6k
                *pi4_tu_bits = 0;
2453
18.6k
                pu1_is_recon_available[0] = 0;
2454
2455
18.6k
                if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2456
1.19k
                {
2457
                    /* copy pred to recon for zcbf mode */
2458
2459
1.19k
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2460
1.19k
                        pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2461
2462
1.19k
                    pu1_is_recon_available[0] = 1;
2463
1.19k
                }
2464
18.6k
            }
2465
            /* accumulate cu not coded cost with zcbf cost */
2466
701k
#if ENABLE_INTER_ZCU_COST
2467
701k
            ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
2468
701k
#endif
2469
701k
        }
2470
5.37M
    }
2471
1.63M
    else
2472
1.63M
    {
2473
        /* cbf = 0, accumulate cu not coded cost */
2474
1.63M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2475
397k
        {
2476
397k
            *pi8_cost = ihevce_it_recon_ssd(
2477
397k
                ps_ctxt,
2478
397k
                pu1_src,
2479
397k
                src_strd,
2480
397k
                pu1_pred,
2481
397k
                pred_strd,
2482
397k
                pi2_deq_data,
2483
397k
                deq_data_strd,
2484
397k
                pu1_recon,
2485
397k
                i4_recon_stride,
2486
397k
                pu1_ecd_data,
2487
397k
                trans_size,
2488
397k
                packed_pred_mode,
2489
397k
                cbf,
2490
397k
                *pi4_zero_col,
2491
397k
                *pi4_zero_row,
2492
397k
                NULL_PLANE);
2493
2494
397k
            pu1_is_recon_available[0] = 1;
2495
397k
        }
2496
2497
1.63M
#if ENABLE_INTER_ZCU_COST
2498
1.63M
        {
2499
1.63M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2500
1.63M
            if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2501
0
            {
2502
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2503
0
                    pu1_src,
2504
0
                    src_strd,
2505
0
                    pu1_recon,
2506
0
                    i4_recon_stride,
2507
0
                    pi8_cost[0],
2508
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2509
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2510
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2511
0
                                                 100.0,
2512
0
                    trans_size,
2513
0
                    0,
2514
0
                    ps_ctxt->u1_enable_psyRDOPT,
2515
0
                    NULL_PLANE);
2516
0
            }
2517
1.63M
            else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2518
0
            {
2519
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2520
0
                    pu1_src,
2521
0
                    src_strd,
2522
0
                    pu1_pred,
2523
0
                    pred_strd,
2524
0
                    pi8_cost[0],
2525
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2526
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2527
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2528
0
                                                 100.0,
2529
0
                    trans_size,
2530
0
                    0,
2531
0
                    ps_ctxt->u1_enable_psyRDOPT,
2532
0
                    NULL_PLANE);
2533
0
            }
2534
1.63M
#endif
2535
2536
1.63M
            ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
2537
1.63M
        }
2538
1.63M
#endif /* ENABLE_INTER_ZCU_COST */
2539
1.63M
    }
2540
7.01M
#endif
2541
2542
7.01M
    return (cbf);
2543
7.01M
}
2544
2545
/*!
2546
******************************************************************************
2547
* \if Function name : ihevce_it_recon_fxn \endif
2548
*
2549
* \brief
2550
*    Transform unit level (Luma) IT Recon function
2551
*
2552
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2553
* \param[in] pi2_deq_data   pointer to iq data
2554
* \param[in] deq_data_strd  iq data buffer stride
2555
* \param[in] pu1_pred       pointer to predicted data buffer
2556
* \param[in] pred_strd      predicted buffer stride
2557
* \param[in] pu1_recon      pointer to recon buffer
2558
* \param[in] recon_strd     recon buffer stride
2559
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2560
* \param[in] trans_size     transform size (4, 8, 16,32)
2561
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
2562
* \param[in] cbf            CBF of the current block
2563
* \param[in] zero_cols      zero_cols of the current block
2564
* \param[in] zero_rows      zero_rows of the current block
2565
*
2566
* \return
2567
*
2568
* \author
2569
*  Ittiam
2570
*
2571
*****************************************************************************
2572
*/
2573
2574
void ihevce_it_recon_fxn(
2575
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2576
    WORD16 *pi2_deq_data,
2577
    WORD32 deq_dat_strd,
2578
    UWORD8 *pu1_pred,
2579
    WORD32 pred_strd,
2580
    UWORD8 *pu1_recon,
2581
    WORD32 recon_strd,
2582
    UWORD8 *pu1_ecd_data,
2583
    WORD32 trans_size,
2584
    WORD32 packed_pred_mode,
2585
    WORD32 cbf,
2586
    WORD32 zero_cols,
2587
    WORD32 zero_rows)
2588
2.78M
{
2589
2.78M
    WORD32 dc_add_flag = 0;
2590
2.78M
    WORD32 trans_idx;
2591
2592
    /* translate the transform size to index for 4x4 and 8x8 */
2593
2.78M
    trans_idx = trans_size >> 2;
2594
2595
    /* if SKIP mode needs to be evaluated the pred is copied to recon */
2596
2.78M
    if(PRED_MODE_SKIP == packed_pred_mode)
2597
6.80k
    {
2598
6.80k
        UWORD8 *pu1_curr_recon, *pu1_curr_pred;
2599
2600
6.80k
        pu1_curr_pred = pu1_pred;
2601
6.80k
        pu1_curr_recon = pu1_recon;
2602
2603
        /* 2D copy of data */
2604
2605
6.80k
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2606
6.80k
            pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
2607
2608
6.80k
        return;
2609
6.80k
    }
2610
2611
    /* for intra 4x4 DST transform should be used */
2612
2.77M
    if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
2613
1.89M
    {
2614
1.89M
        trans_idx = 0;
2615
1.89M
    }
2616
    /* for 16x16 cases */
2617
889k
    else if(16 == trans_size)
2618
192k
    {
2619
192k
        trans_idx = 3;
2620
192k
    }
2621
    /* for 32x32 cases */
2622
696k
    else if(32 == trans_size)
2623
53.8k
    {
2624
53.8k
        trans_idx = 4;
2625
53.8k
    }
2626
2627
    /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
2628
2.77M
    if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2629
231k
    {
2630
231k
        dc_add_flag = 1;
2631
231k
    }
2632
2633
2.77M
    if(0 == cbf)
2634
1.01M
    {
2635
        /* buffer copy */
2636
1.01M
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2637
1.01M
            pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
2638
1.01M
    }
2639
1.76M
    else if((1 == dc_add_flag) && (0 != trans_idx))
2640
24.9k
    {
2641
        /* dc add */
2642
24.9k
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2643
24.9k
            pu1_pred,
2644
24.9k
            pred_strd,
2645
24.9k
            pu1_recon,
2646
24.9k
            recon_strd,
2647
24.9k
            trans_size,
2648
24.9k
            pi2_deq_data[0],
2649
24.9k
            NULL_PLANE /* luma */
2650
24.9k
        );
2651
24.9k
    }
2652
1.73M
    else
2653
1.73M
    {
2654
1.73M
        ps_ctxt->apf_it_recon[trans_idx](
2655
1.73M
            pi2_deq_data,
2656
1.73M
            &ps_ctxt->ai2_scratch[0],
2657
1.73M
            pu1_pred,
2658
1.73M
            pu1_recon,
2659
1.73M
            deq_dat_strd,
2660
1.73M
            pred_strd,
2661
1.73M
            recon_strd,
2662
1.73M
            zero_cols,
2663
1.73M
            zero_rows);
2664
1.73M
    }
2665
2.77M
}
2666
2667
/*!
2668
******************************************************************************
2669
* \if Function name : ihevce_chroma_it_recon_fxn \endif
2670
*
2671
* \brief
2672
*    Transform unit level (Chroma) IT Recon function
2673
*
2674
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2675
* \param[in] pi2_deq_data   pointer to iq data
2676
* \param[in] deq_data_strd  iq data buffer stride
2677
* \param[in] pu1_pred       pointer to predicted data buffer
2678
* \param[in] pred_strd      predicted buffer stride
2679
* \param[in] pu1_recon      pointer to recon buffer
2680
* \param[in] recon_strd     recon buffer stride
2681
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2682
* \param[in] trans_size     transform size (4, 8, 16)
2683
* \param[in] cbf            CBF of the current block
2684
* \param[in] zero_cols      zero_cols of the current block
2685
* \param[in] zero_rows      zero_rows of the current block
2686
*
2687
* \return
2688
*
2689
* \author
2690
*  Ittiam
2691
*
2692
*****************************************************************************
2693
*/
2694
2695
void ihevce_chroma_it_recon_fxn(
2696
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2697
    WORD16 *pi2_deq_data,
2698
    WORD32 deq_dat_strd,
2699
    UWORD8 *pu1_pred,
2700
    WORD32 pred_strd,
2701
    UWORD8 *pu1_recon,
2702
    WORD32 recon_strd,
2703
    UWORD8 *pu1_ecd_data,
2704
    WORD32 trans_size,
2705
    WORD32 cbf,
2706
    WORD32 zero_cols,
2707
    WORD32 zero_rows,
2708
    CHROMA_PLANE_ID_T e_chroma_plane)
2709
1.74M
{
2710
1.74M
    WORD32 trans_idx;
2711
2712
1.74M
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
2713
2714
    /* since 2x2 transform is not allowed for chroma*/
2715
1.74M
    if(2 == trans_size)
2716
0
    {
2717
0
        trans_size = 4;
2718
0
    }
2719
2720
    /* translate the transform size to index */
2721
1.74M
    trans_idx = trans_size >> 2;
2722
2723
    /* for 16x16 cases */
2724
1.74M
    if(16 == trans_size)
2725
108k
    {
2726
108k
        trans_idx = 3;
2727
108k
    }
2728
2729
1.74M
    if(0 == cbf)
2730
1.12M
    {
2731
        /* buffer copy */
2732
1.12M
        ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
2733
1.12M
            pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
2734
1.12M
    }
2735
615k
    else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2736
39.1k
    {
2737
        /* dc add */
2738
39.1k
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2739
39.1k
            pu1_pred,
2740
39.1k
            pred_strd,
2741
39.1k
            pu1_recon,
2742
39.1k
            recon_strd,
2743
39.1k
            trans_size,
2744
39.1k
            pi2_deq_data[0],
2745
39.1k
            e_chroma_plane /* chroma plane */
2746
39.1k
        );
2747
39.1k
    }
2748
576k
    else
2749
576k
    {
2750
576k
        ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
2751
576k
            pi2_deq_data,
2752
576k
            &ps_ctxt->ai2_scratch[0],
2753
576k
            pu1_pred + (WORD32)e_chroma_plane,
2754
576k
            pu1_recon + (WORD32)e_chroma_plane,
2755
576k
            deq_dat_strd,
2756
576k
            pred_strd,
2757
576k
            recon_strd,
2758
576k
            zero_cols,
2759
576k
            zero_rows);
2760
576k
    }
2761
1.74M
}
2762
2763
/**
2764
*******************************************************************************
2765
* \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
2766
*
2767
* \brief * Filters the RDOPT candidates based on mpm_idx
2768
*
2769
* \par   Description
2770
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
2771
* for a CU
2772
*
2773
* \param[in] ps_ctxt : ptr to enc loop context
2774
* \param[in] ps_cu_analyse : ptr to CU analyse structure
2775
* \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
2776
* \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
2777
* \param[in] pu1_luma_mode luma mode
2778
*
2779
* \returns none
2780
*
2781
* \author
2782
*  Ittiam
2783
*
2784
*******************************************************************************
2785
*/
2786
2787
void ihevce_mpm_idx_based_filter_RDOPT_cand(
2788
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2789
    cu_analyse_t *ps_cu_analyse,
2790
    nbr_4x4_t *ps_left_nbr_4x4,
2791
    nbr_4x4_t *ps_top_nbr_4x4,
2792
    UWORD8 *pu1_luma_mode,
2793
    UWORD8 *pu1_eval_mark)
2794
168k
{
2795
168k
    WORD32 cu_pos_x;
2796
168k
    WORD32 cu_pos_y;
2797
168k
    nbr_avail_flags_t s_nbr;
2798
168k
    WORD32 trans_size;
2799
168k
    WORD32 au4_cand_mode_list[3];
2800
168k
    WORD32 nbr_flags;
2801
168k
    UWORD8 *pu1_intra_luma_modes;
2802
168k
    WORD32 rdopt_cand_ctr = 0;
2803
168k
    UWORD8 *pu1_luma_eval_mark;
2804
2805
168k
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
2806
168k
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
2807
168k
    trans_size = ps_cu_analyse->u1_cu_size;
2808
2809
    /* get the neighbour availability flags */
2810
168k
    nbr_flags = ihevce_get_nbr_intra(
2811
168k
        &s_nbr,
2812
168k
        ps_ctxt->pu1_ctb_nbr_map,
2813
168k
        ps_ctxt->i4_nbr_map_strd,
2814
168k
        cu_pos_x,
2815
168k
        cu_pos_y,
2816
168k
        trans_size >> 2);
2817
168k
    (void)nbr_flags;
2818
    /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
2819
    *TU=CU/2 also since the modes are same in both the cases.
2820
    */
2821
168k
    ihevce_populate_intra_pred_mode(
2822
168k
        ps_top_nbr_4x4->b6_luma_intra_mode,
2823
168k
        ps_left_nbr_4x4->b6_luma_intra_mode,
2824
168k
        s_nbr.u1_top_avail,
2825
168k
        s_nbr.u1_left_avail,
2826
168k
        cu_pos_y,
2827
168k
        &au4_cand_mode_list[0]);
2828
2829
    /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
2830
    *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
2831
    */
2832
2833
168k
    pu1_intra_luma_modes = pu1_luma_mode;
2834
168k
    pu1_luma_eval_mark = pu1_eval_mark;
2835
2836
621k
    while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
2837
453k
    {
2838
453k
        WORD32 i;
2839
453k
        WORD32 found_flag = 0;
2840
2841
        /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
2842
        *irrespective of whether the cand is present in the mpm idx list or not
2843
        */
2844
453k
        if(rdopt_cand_ctr == 0)
2845
158k
        {
2846
158k
            rdopt_cand_ctr++;
2847
158k
            continue;
2848
158k
        }
2849
2850
901k
        for(i = 0; i < 3; i++)
2851
743k
        {
2852
743k
            if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
2853
138k
            {
2854
138k
                found_flag = 1;
2855
138k
                break;
2856
138k
            }
2857
743k
        }
2858
2859
295k
        if(found_flag == 0)
2860
157k
        {
2861
157k
            pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
2862
157k
        }
2863
2864
295k
        rdopt_cand_ctr++;
2865
295k
    }
2866
168k
}
2867
2868
/*!
2869
******************************************************************************
2870
* \if Function name : ihevce_intra_rdopt_cu_ntu \endif
2871
*
2872
* \brief
2873
*    Intra Coding unit funtion for RD opt mode
2874
*
2875
* \param[in] ps_ctxt    enc_loop module ctxt pointer
2876
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
2877
* \param[in] pu1_luma_mode : pointer to luma mode
2878
* \param[in] ps_cu_analyse  pointer to cu analyse pointer
2879
* \param[in] pu1_src    pointer to source data buffer
2880
* \param[in] src_strd   source buffer stride
2881
* \param[in] pu1_cu_left pointer to left recon data buffer
2882
* \param[in] pu1_cu_top  pointer to top recon data buffer
2883
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
2884
* \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
2885
* \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
2886
* \param[in] nbr_4x4_left_strd left nbr4x4 stride
2887
* \param[in] cu_left_stride left recon buffer stride
2888
* \param[in] curr_buf_idx RD opt buffer index for current usage
2889
* \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
2890
*
2891
* \return
2892
*    RDopt cost
2893
*
2894
* \author
2895
*  Ittiam
2896
*
2897
*****************************************************************************
2898
*/
2899
LWORD64 ihevce_intra_rdopt_cu_ntu(
2900
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2901
    enc_loop_cu_prms_t *ps_cu_prms,
2902
    void *pv_pred_org,
2903
    WORD32 pred_strd_org,
2904
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
2905
    UWORD8 *pu1_luma_mode,
2906
    cu_analyse_t *ps_cu_analyse,
2907
    void *pv_curr_src,
2908
    void *pv_cu_left,
2909
    void *pv_cu_top,
2910
    void *pv_cu_top_left,
2911
    nbr_4x4_t *ps_left_nbr_4x4,
2912
    nbr_4x4_t *ps_top_nbr_4x4,
2913
    WORD32 nbr_4x4_left_strd,
2914
    WORD32 cu_left_stride,
2915
    WORD32 curr_buf_idx,
2916
    WORD32 func_proc_mode,
2917
    WORD32 i4_alpha_stim_multiplier)
2918
1.39M
{
2919
1.39M
    enc_loop_cu_final_prms_t *ps_final_prms;
2920
1.39M
    nbr_avail_flags_t s_nbr;
2921
1.39M
    nbr_4x4_t *ps_nbr_4x4;
2922
1.39M
    nbr_4x4_t *ps_tmp_lt_4x4;
2923
1.39M
    recon_datastore_t *ps_recon_datastore;
2924
2925
1.39M
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
2926
2927
1.39M
    UWORD32 *pu4_nbr_flags;
2928
1.39M
    UWORD8 *pu1_intra_pred_mode;
2929
1.39M
    WORD32 cu_pos_x;
2930
1.39M
    WORD32 cu_pos_y;
2931
1.39M
    WORD32 trans_size = 0;
2932
1.39M
    UWORD8 *pu1_left;
2933
1.39M
    UWORD8 *pu1_top;
2934
1.39M
    UWORD8 *pu1_top_left;
2935
1.39M
    UWORD8 *pu1_recon;
2936
1.39M
    UWORD8 *pu1_csbf_buf;
2937
1.39M
    UWORD8 *pu1_ecd_data;
2938
1.39M
    WORD16 *pi2_deq_data;
2939
1.39M
    WORD32 deq_data_strd;
2940
1.39M
    LWORD64 total_rdopt_cost;
2941
1.39M
    WORD32 ctr;
2942
1.39M
    WORD32 left_strd;
2943
1.39M
    WORD32 i4_recon_stride;
2944
1.39M
    WORD32 csbf_strd;
2945
1.39M
    WORD32 ecd_data_bytes_cons;
2946
1.39M
    WORD32 num_4x4_in_tu;
2947
1.39M
    WORD32 num_4x4_in_cu;
2948
1.39M
    WORD32 chrm_present_flag;
2949
1.39M
    WORD32 tx_size;
2950
1.39M
    WORD32 cu_bits;
2951
1.39M
    WORD32 num_cu_parts = 0;
2952
1.39M
    WORD32 num_cands = 0;
2953
1.39M
    WORD32 cu_pos_x_8pelunits;
2954
1.39M
    WORD32 cu_pos_y_8pelunits;
2955
1.39M
    WORD32 i4_perform_rdoq;
2956
1.39M
    WORD32 i4_perform_sbh;
2957
1.39M
    UWORD8 u1_compute_spatial_ssd;
2958
1.39M
    UWORD8 u1_compute_recon;
2959
1.39M
    UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
2960
2961
1.39M
    UWORD16 u2_num_tus_in_cu = 0;
2962
1.39M
    WORD32 is_sub_pu_in_hq = 0;
2963
    /* Get the RDOPT cost of the best CU mode for early_exit */
2964
1.39M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
2965
    /* cabac context of prev intra luma pred flag */
2966
1.39M
    UWORD8 u1_prev_flag_cabac_ctxt =
2967
1.39M
        ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
2968
1.39M
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
2969
2970
1.39M
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
2971
2972
1.39M
    total_rdopt_cost = 0;
2973
1.39M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
2974
1.39M
    ps_recon_datastore = &ps_final_prms->s_recon_datastore;
2975
1.39M
    i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
2976
1.39M
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
2977
1.39M
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
2978
1.39M
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
2979
1.39M
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
2980
1.39M
    deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
2981
1.39M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
2982
1.39M
    ps_tmp_lt_4x4 = ps_left_nbr_4x4;
2983
1.39M
    pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
2984
1.39M
    pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
2985
1.39M
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
2986
1.39M
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
2987
1.39M
    cu_pos_x_8pelunits = cu_pos_x;
2988
1.39M
    cu_pos_y_8pelunits = cu_pos_y;
2989
2990
    /* reset cu not coded cost */
2991
1.39M
    ps_ctxt->i8_cu_not_coded_cost = 0;
2992
2993
    /* based on the Processng mode */
2994
1.39M
    if(TU_EQ_CU == func_proc_mode)
2995
887k
    {
2996
887k
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
2997
887k
        trans_size = ps_cu_analyse->u1_cu_size;
2998
887k
        num_cu_parts = 1;
2999
887k
        num_cands = 1;
3000
887k
        u2_num_tus_in_cu = 1;
3001
887k
    }
3002
503k
    else if(TU_EQ_CU_DIV2 == func_proc_mode)
3003
318k
    {
3004
318k
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
3005
318k
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3006
318k
        num_cu_parts = 4;
3007
318k
        num_cands = 1;
3008
318k
        u2_num_tus_in_cu = 4;
3009
318k
    }
3010
185k
    else if(TU_EQ_SUBCU == func_proc_mode)
3011
185k
    {
3012
185k
        ps_final_prms->u1_part_mode = SIZE_NxN;
3013
185k
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3014
185k
        num_cu_parts = 4;
3015
        /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
3016
185k
        if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
3017
118k
        {
3018
118k
            if(ps_ctxt->i1_slice_type != BSLICE)
3019
115k
            {
3020
115k
                num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
3021
115k
            }
3022
2.65k
            else
3023
2.65k
            {
3024
2.65k
                num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
3025
2.65k
            }
3026
118k
        }
3027
66.5k
        else
3028
66.5k
        {
3029
66.5k
            num_cands = MAX_INTRA_CU_CANDIDATES;
3030
66.5k
        }
3031
185k
        u2_num_tus_in_cu = 4;
3032
185k
    }
3033
0
    else
3034
0
    {
3035
        /* should not enter here */
3036
0
        ASSERT(0);
3037
0
    }
3038
3039
1.39M
    if(ps_ctxt->i1_cu_qp_delta_enable)
3040
533k
    {
3041
533k
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1);
3042
533k
    }
3043
3044
1.39M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
3045
0
    {
3046
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
3047
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
3048
0
             100.0f);
3049
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
3050
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
3051
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
3052
0
    }
3053
3054
1.39M
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
3055
357k
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3056
222k
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3057
3058
1.39M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
3059
0
    {
3060
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
3061
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3062
0
    }
3063
3064
    /* populate the neigbours */
3065
1.39M
    pu1_left = (UWORD8 *)pv_cu_left;
3066
1.39M
    pu1_top = (UWORD8 *)pv_cu_top;
3067
1.39M
    pu1_top_left = (UWORD8 *)pv_cu_top_left;
3068
1.39M
    left_strd = cu_left_stride;
3069
1.39M
    num_4x4_in_tu = (trans_size >> 2);
3070
1.39M
    num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
3071
1.39M
    chrm_present_flag = 1;
3072
1.39M
    ecd_data_bytes_cons = 0;
3073
1.39M
    cu_bits = 0;
3074
3075
    /* get the 4x4 level postion of current cu */
3076
1.39M
    cu_pos_x = cu_pos_x << 1;
3077
1.39M
    cu_pos_y = cu_pos_y << 1;
3078
3079
    /* pouplate cu level params knowing that current is intra */
3080
1.39M
    ps_final_prms->u1_skip_flag = 0;
3081
1.39M
    ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
3082
1.39M
    ps_final_prms->u2_num_pus_in_cu = 1;
3083
    /*init the is_cu_coded flag*/
3084
1.39M
    ps_final_prms->u1_is_cu_coded = 0;
3085
1.39M
    ps_final_prms->u4_cu_sad = 0;
3086
3087
1.39M
    ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
3088
1.39M
    ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
3089
1.39M
    ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
3090
1.39M
    ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
3091
1.39M
    ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
3092
1.39M
    ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
3093
3094
1.39M
    ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
3095
3096
    /*copy qp directly as intra cant be skip*/
3097
1.39M
    ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
3098
1.39M
    ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
3099
1.39M
    ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
3100
1.39M
    ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
3101
1.39M
    ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
3102
1.39M
    ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
3103
1.39M
    ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
3104
1.39M
    ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
3105
1.39M
    ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
3106
3107
    /* RDOPT copy States :  TU init (best until prev TU) to current */
3108
1.39M
    memcpy(
3109
1.39M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3110
1.39M
             .s_cabac_ctxt.au1_ctxt_models[0],
3111
1.39M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3112
1.39M
        IHEVC_CAB_COEFFX_PREFIX);
3113
3114
    /* RDOPT copy States :update to init state if 0 cbf */
3115
1.39M
    memcpy(
3116
1.39M
        &au1_intra_nxn_rdopt_ctxt_models[0][0],
3117
1.39M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3118
1.39M
        IHEVC_CAB_COEFFX_PREFIX);
3119
1.39M
    memcpy(
3120
1.39M
        &au1_intra_nxn_rdopt_ctxt_models[1][0],
3121
1.39M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3122
1.39M
        IHEVC_CAB_COEFFX_PREFIX);
3123
3124
    /* loop for all partitions in CU  blocks */
3125
3.98M
    for(ctr = 0; ctr < num_cu_parts; ctr++)
3126
2.83M
    {
3127
2.83M
        UWORD8 *pu1_curr_mode;
3128
2.83M
        WORD32 cand_ctr;
3129
2.83M
        WORD32 nbr_flags;
3130
3131
        /* for NxN case to track the best mode       */
3132
        /* for other cases zeroth index will be used */
3133
2.83M
        intra_prev_rem_flags_t as_intra_prev_rem[2];
3134
2.83M
        LWORD64 ai8_cand_rdopt_cost[2];
3135
2.83M
        UWORD32 au4_tu_sad[2];
3136
2.83M
        WORD32 ai4_tu_bits[2];
3137
2.83M
        WORD32 ai4_cbf[2];
3138
2.83M
        WORD32 ai4_curr_bytes[2];
3139
2.83M
        WORD32 ai4_zero_col[2];
3140
2.83M
        WORD32 ai4_zero_row[2];
3141
        /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
3142
        cand. are there) ping-pong buffer to store the best and current */
3143
2.83M
        UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
3144
2.83M
        UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
3145
2.83M
        WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
3146
        /* Context models stored for RDopt store and restore purpose */
3147
3148
2.83M
        UWORD8 au1_recon_availability[2];
3149
3150
2.83M
        WORD32 best_cand_idx = 0;
3151
2.83M
        LWORD64 best_cand_cost = MAX_COST_64;
3152
        /* counters to toggle b/w best and current */
3153
2.83M
        WORD32 best_intra_buf_idx = 1;
3154
2.83M
        WORD32 curr_intra_buf_idx = 0;
3155
3156
        /* copy the mode pointer to be used in inner loop */
3157
2.83M
        pu1_curr_mode = pu1_luma_mode;
3158
3159
        /* get the neighbour availability flags */
3160
2.83M
        nbr_flags = ihevce_get_nbr_intra(
3161
2.83M
            &s_nbr,
3162
2.83M
            ps_ctxt->pu1_ctb_nbr_map,
3163
2.83M
            ps_ctxt->i4_nbr_map_strd,
3164
2.83M
            cu_pos_x,
3165
2.83M
            cu_pos_y,
3166
2.83M
            num_4x4_in_tu);
3167
3168
        /* copy the nbr flags for chroma reuse */
3169
2.83M
        if(4 != trans_size)
3170
1.23M
        {
3171
1.23M
            *pu4_nbr_flags = nbr_flags;
3172
1.23M
        }
3173
1.60M
        else if(1 == chrm_present_flag)
3174
409k
        {
3175
            /* compute the avail flags assuming luma trans is 8x8 */
3176
            /* get the neighbour availability flags */
3177
409k
            *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
3178
409k
                ps_ctxt->pu1_ctb_nbr_map,
3179
409k
                ps_ctxt->i4_nbr_map_strd,
3180
409k
                cu_pos_x,
3181
409k
                cu_pos_y,
3182
409k
                (num_4x4_in_tu << 1),
3183
409k
                (num_4x4_in_tu << 1));
3184
409k
        }
3185
3186
2.83M
        u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
3187
3188
2.83M
        if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
3189
673k
        {
3190
673k
            ps_recon_datastore->u1_is_lumaRecon_available = 1;
3191
673k
        }
3192
2.15M
        else if(!ctr)
3193
717k
        {
3194
717k
            ps_recon_datastore->u1_is_lumaRecon_available = 0;
3195
717k
        }
3196
3197
2.83M
        ihevc_intra_pred_luma_ref_substitution_fptr =
3198
2.83M
            ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3199
3200
        /* call reference array substitution */
3201
2.83M
        ihevc_intra_pred_luma_ref_substitution_fptr(
3202
2.83M
            pu1_top_left,
3203
2.83M
            pu1_top,
3204
2.83M
            pu1_left,
3205
2.83M
            left_strd,
3206
2.83M
            trans_size,
3207
2.83M
            nbr_flags,
3208
2.83M
            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3209
2.83M
            1);
3210
3211
        /* Intra Mode gating based on MPM cand list and encoder quality preset */
3212
2.83M
        if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
3213
248k
           (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
3214
73.8k
        {
3215
73.8k
            ihevce_mpm_idx_based_filter_RDOPT_cand(
3216
73.8k
                ps_ctxt,
3217
73.8k
                ps_cu_analyse,
3218
73.8k
                ps_left_nbr_4x4,
3219
73.8k
                ps_top_nbr_4x4,
3220
73.8k
                pu1_luma_mode,
3221
73.8k
                &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
3222
73.8k
        }
3223
3224
2.83M
        if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3225
471k
           (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
3226
471k
        {
3227
471k
            WORD32 ai4_mpm_mode_list[3];
3228
471k
            WORD32 i;
3229
3230
471k
            WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
3231
3232
471k
            ihevce_populate_intra_pred_mode(
3233
471k
                ps_top_nbr_4x4->b6_luma_intra_mode,
3234
471k
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3235
471k
                s_nbr.u1_top_avail,
3236
471k
                s_nbr.u1_left_avail,
3237
471k
                cu_pos_y,
3238
471k
                &ai4_mpm_mode_list[0]);
3239
3240
1.88M
            for(i = 0; i < 3; i++)
3241
1.41M
            {
3242
1.41M
                if(ps_cu_analyse->s_cu_intra_cand
3243
1.41M
                       .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
3244
299k
                {
3245
299k
                    ASSERT(ai4_mpm_mode_list[i] < 35);
3246
3247
299k
                    ps_cu_analyse->s_cu_intra_cand
3248
299k
                        .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
3249
299k
                    pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
3250
299k
                    ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
3251
299k
                    i4_curr_index++;
3252
299k
                }
3253
1.41M
            }
3254
3255
471k
            pu1_luma_mode[i4_curr_index] = 255;
3256
471k
        }
3257
3258
        /* loop over candidates for each partition */
3259
9.01M
        for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
3260
6.71M
        {
3261
6.71M
            WORD32 curr_pred_mode;
3262
6.71M
            WORD32 bits = 0;
3263
6.71M
            LWORD64 curr_cost;
3264
6.71M
            WORD32 luma_pred_func_idx;
3265
6.71M
            UWORD8 *pu1_curr_ecd_data;
3266
6.71M
            WORD16 *pi2_curr_deq_data;
3267
6.71M
            WORD32 curr_deq_data_strd;
3268
6.71M
            WORD32 pred_strd;
3269
6.71M
            UWORD8 *pu1_pred;
3270
3271
            /* if NXN case the recon and ecd data is stored in temp buffers */
3272
6.71M
            if(TU_EQ_SUBCU == func_proc_mode)
3273
4.62M
            {
3274
4.62M
                pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
3275
4.62M
                pred_strd = trans_size;
3276
4.62M
                pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
3277
4.62M
                pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
3278
4.62M
                curr_deq_data_strd = trans_size;
3279
3280
4.62M
                ASSERT(trans_size == MIN_TU_SIZE);
3281
4.62M
            }
3282
2.09M
            else
3283
2.09M
            {
3284
2.09M
                pu1_pred = (UWORD8 *)pv_pred_org;
3285
2.09M
                pred_strd = pred_strd_org;
3286
2.09M
                pu1_curr_ecd_data = pu1_ecd_data;
3287
2.09M
                pi2_curr_deq_data = pi2_deq_data;
3288
2.09M
                curr_deq_data_strd = deq_data_strd;
3289
2.09M
            }
3290
3291
6.71M
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
3292
6.71M
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3293
3294
6.71M
            if(is_sub_pu_in_hq == 1)
3295
0
            {
3296
0
                curr_pred_mode = cand_ctr;
3297
0
            }
3298
6.71M
            else
3299
6.71M
            {
3300
6.71M
                curr_pred_mode = pu1_curr_mode[cand_ctr];
3301
6.71M
            }
3302
3303
            /* If the candidate mode is 255, then break */
3304
6.71M
            if(255 == curr_pred_mode)
3305
539k
            {
3306
539k
                break;
3307
539k
            }
3308
6.17M
            else if(250 == curr_pred_mode)
3309
0
            {
3310
0
                continue;
3311
0
            }
3312
3313
            /* check if this mode needs to be evaluated or not. For 2nx2n cases, this   */
3314
            /* function will be called once per candidate, so this check has been done  */
3315
            /* outside this function call. For NxN case, this function will be called   */
3316
            /* only once, and all the candidates will be evaluated here.                */
3317
6.17M
            if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
3318
1.81M
            {
3319
1.81M
                if((TU_EQ_SUBCU == func_proc_mode) &&
3320
724k
                   (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
3321
86.3k
                {
3322
86.3k
                    continue;
3323
86.3k
                }
3324
1.81M
            }
3325
3326
            /* call reference filtering */
3327
6.09M
            ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
3328
6.09M
                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3329
6.09M
                trans_size,
3330
6.09M
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3331
6.09M
                curr_pred_mode,
3332
6.09M
                ps_ctxt->i1_strong_intra_smoothing_enable_flag);
3333
3334
            /* use the look up to get the function idx */
3335
6.09M
            luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
3336
3337
            /* call the intra prediction function */
3338
6.09M
            ps_ctxt->apf_lum_ip[luma_pred_func_idx](
3339
6.09M
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3340
6.09M
                1,
3341
6.09M
                pu1_pred,
3342
6.09M
                pred_strd,
3343
6.09M
                trans_size,
3344
6.09M
                curr_pred_mode);
3345
3346
            /* populate the coeffs scan idx */
3347
6.09M
            ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
3348
3349
            /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
3350
6.09M
            if(trans_size < 16)
3351
5.79M
            {
3352
                /* for modes from 22 upto 30 horizontal scan is used */
3353
5.79M
                if((curr_pred_mode > 21) && (curr_pred_mode < 31))
3354
790k
                {
3355
790k
                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
3356
790k
                }
3357
                /* for modes from 6 upto 14 horizontal scan is used */
3358
5.00M
                else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
3359
2.44M
                {
3360
2.44M
                    ps_ctxt->i4_scan_idx = SCAN_VERT;
3361
2.44M
                }
3362
5.79M
            }
3363
3364
            /* RDOPT copy States :  TU init (best until prev TU) to current */
3365
6.09M
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3366
6.09M
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3367
6.09M
                        .s_cabac_ctxt.au1_ctxt_models[0] +
3368
6.09M
                    IHEVC_CAB_COEFFX_PREFIX,
3369
6.09M
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3370
6.09M
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3371
3372
6.09M
            i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
3373
6.09M
            i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
3374
3375
#if DISABLE_RDOQ_INTRA
3376
            i4_perform_rdoq = 0;
3377
#endif
3378
3379
            /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
3380
            /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
3381
            /* Currently the complete array will contain only single value*/
3382
            /*The rounding factor is calculated with the formula
3383
            Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
3384
            rounding factor = (1 - DeadZone Val)
3385
3386
            Assumption: Cabac states of All the sub-blocks in the TU are considered independent
3387
            */
3388
6.09M
            if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
3389
4.35M
            {
3390
4.35M
                if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
3391
0
                {
3392
0
                    double i4_lamda_modifier;
3393
3394
0
                    if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
3395
0
                    {
3396
0
                        i4_lamda_modifier =
3397
0
                            ps_ctxt->i4_lamda_modifier *
3398
0
                            CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3399
0
                    }
3400
0
                    else
3401
0
                    {
3402
0
                        i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
3403
0
                    }
3404
0
                    if(ps_ctxt->i4_use_const_lamda_modifier)
3405
0
                    {
3406
0
                        if(ISLICE == ps_ctxt->i1_slice_type)
3407
0
                        {
3408
0
                            i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3409
0
                        }
3410
0
                        else
3411
0
                        {
3412
0
                            i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
3413
0
                        }
3414
0
                    }
3415
3416
0
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3417
0
                        &ps_ctxt->i4_quant_round_tu[0][0];
3418
0
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3419
0
                        &ps_ctxt->i4_quant_round_tu[1][0];
3420
3421
0
                    memset(
3422
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3423
0
                        0,
3424
0
                        trans_size * trans_size * sizeof(WORD32));
3425
0
                    memset(
3426
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3427
0
                        0,
3428
0
                        trans_size * trans_size * sizeof(WORD32));
3429
3430
0
                    ihevce_quant_rounding_factor_gen(
3431
0
                        trans_size,
3432
0
                        1,
3433
0
                        &ps_ctxt->s_rdopt_entropy_ctxt,
3434
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3435
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3436
0
                        i4_lamda_modifier,
3437
0
                        1);
3438
0
                }
3439
4.35M
                else
3440
4.35M
                {
3441
4.35M
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3442
4.35M
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
3443
4.35M
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3444
4.35M
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
3445
4.35M
                }
3446
4.35M
            }
3447
3448
            /* call T Q IT IQ and recon function */
3449
6.09M
            ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
3450
6.09M
                ps_ctxt,
3451
6.09M
                pu1_pred,
3452
6.09M
                pred_strd,
3453
6.09M
                (UWORD8 *)pv_curr_src,
3454
6.09M
                src_strd,
3455
6.09M
                pi2_curr_deq_data,
3456
6.09M
                curr_deq_data_strd,
3457
6.09M
                pu1_recon,
3458
6.09M
                i4_recon_stride,
3459
6.09M
                pu1_curr_ecd_data,
3460
6.09M
                pu1_csbf_buf,
3461
6.09M
                csbf_strd,
3462
6.09M
                trans_size,
3463
6.09M
                PRED_MODE_INTRA,
3464
6.09M
                &ai8_cand_rdopt_cost[curr_intra_buf_idx],
3465
6.09M
                &ai4_curr_bytes[curr_intra_buf_idx],
3466
6.09M
                &ai4_tu_bits[curr_intra_buf_idx],
3467
6.09M
                &au4_tu_sad[curr_intra_buf_idx],
3468
6.09M
                &ai4_zero_col[curr_intra_buf_idx],
3469
6.09M
                &ai4_zero_row[curr_intra_buf_idx],
3470
6.09M
                &au1_recon_availability[curr_intra_buf_idx],
3471
6.09M
                i4_perform_rdoq,
3472
6.09M
                i4_perform_sbh,
3473
6.09M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3474
6.09M
                i4_alpha_stim_multiplier,
3475
6.09M
                u1_is_cu_noisy,
3476
6.09M
#endif
3477
6.09M
                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
3478
6.09M
                1 /*early_cbf */
3479
6.09M
            );
3480
3481
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3482
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
3483
            {
3484
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
3485
                ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3486
                    pv_curr_src,
3487
                    src_strd,
3488
                    pu1_pred,
3489
                    pred_strd,
3490
                    ai8_cand_rdopt_cost[curr_intra_buf_idx],
3491
                    i4_alpha_stim_multiplier,
3492
                    trans_size,
3493
                    0,
3494
                    ps_ctxt->u1_enable_psyRDOPT,
3495
                    NULL_PLANE);
3496
#else
3497
                if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
3498
                {
3499
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3500
                        pv_curr_src,
3501
                        src_strd,
3502
                        pu1_recon,
3503
                        i4_recon_stride,
3504
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3505
                        i4_alpha_stim_multiplier,
3506
                        trans_size,
3507
                        0,
3508
                        ps_ctxt->u1_enable_psyRDOPT,
3509
                        NULL_PLANE);
3510
                }
3511
                else
3512
                {
3513
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3514
                        pv_curr_src,
3515
                        src_strd,
3516
                        pu1_pred,
3517
                        pred_strd,
3518
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3519
                        i4_alpha_stim_multiplier,
3520
                        trans_size,
3521
                        0,
3522
                        ps_ctxt->u1_enable_psyRDOPT,
3523
                        NULL_PLANE);
3524
                }
3525
#endif
3526
            }
3527
#endif
3528
3529
6.09M
            if(TU_EQ_SUBCU == func_proc_mode)
3530
3.99M
            {
3531
3.99M
                ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
3532
3.99M
            }
3533
3534
            /* based on CBF/No CBF copy the corresponding state */
3535
6.09M
            if(0 == ai4_cbf[curr_intra_buf_idx])
3536
1.46M
            {
3537
                /* RDOPT copy States :update to init state if 0 cbf */
3538
1.46M
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3539
1.46M
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3540
1.46M
                        IHEVC_CAB_COEFFX_PREFIX,
3541
1.46M
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3542
1.46M
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3543
1.46M
            }
3544
4.63M
            else
3545
4.63M
            {
3546
                /* RDOPT copy States :update to new state only if CBF is non zero */
3547
4.63M
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3548
4.63M
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3549
4.63M
                        IHEVC_CAB_COEFFX_PREFIX,
3550
4.63M
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3551
4.63M
                            .s_cabac_ctxt.au1_ctxt_models[0] +
3552
4.63M
                        IHEVC_CAB_COEFFX_PREFIX,
3553
4.63M
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3554
4.63M
            }
3555
3556
            /* call the function which perform intra mode prediction */
3557
6.09M
            ihevce_intra_pred_mode_signaling(
3558
6.09M
                ps_top_nbr_4x4->b6_luma_intra_mode,
3559
6.09M
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3560
6.09M
                s_nbr.u1_top_avail,
3561
6.09M
                s_nbr.u1_left_avail,
3562
6.09M
                cu_pos_y,
3563
6.09M
                curr_pred_mode,
3564
6.09M
                &as_intra_prev_rem[curr_intra_buf_idx]);
3565
            /******************************************************************/
3566
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3567
            The bits for these are evaluated for every RDO mode of current subcu
3568
            as they can significantly contribute to RDO cost.  Note that these
3569
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3570
            are accounted for in encode_cu call later */
3571
3572
            /******************************************************************/
3573
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3574
            The bits for these are evaluated for every RDO mode of current subcu
3575
            as they can significantly contribute to RDO cost.  Note that these
3576
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3577
            are accounted for in encode_cu call later */
3578
3579
            /* Estimate bits to encode prev rem flag  for NXN mode */
3580
6.09M
            {
3581
6.09M
                WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
3582
6.09M
                    [u1_prev_flag_cabac_ctxt ^
3583
6.09M
                     as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3584
3585
                /* rounding the fractional bits to nearest integer */
3586
6.09M
                bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
3587
6.09M
            }
3588
3589
            /* based on prev flag all the mpmidx bits and rem bits */
3590
6.09M
            if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
3591
3.40M
            {
3592
                /* mpm_idx */
3593
3.40M
                bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
3594
3.40M
            }
3595
2.68M
            else
3596
2.68M
            {
3597
                /* rem intra mode */
3598
2.68M
                bits += 5;
3599
2.68M
            }
3600
3601
6.09M
            bits += ai4_tu_bits[curr_intra_buf_idx];
3602
3603
            /* compute the total cost for current candidate */
3604
6.09M
            curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
3605
3606
            /* get the final ssd cost */
3607
6.09M
            curr_cost +=
3608
6.09M
                COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3609
3610
            /* check of the best candidate cost */
3611
6.09M
            if(curr_cost < best_cand_cost)
3612
3.37M
            {
3613
3.37M
                best_cand_cost = curr_cost;
3614
3.37M
                best_cand_idx = cand_ctr;
3615
3.37M
                best_intra_buf_idx = curr_intra_buf_idx;
3616
3.37M
                curr_intra_buf_idx = !curr_intra_buf_idx;
3617
3.37M
            }
3618
6.09M
        }
3619
3620
        /***************    For TU_EQ_SUBCU case    *****************/
3621
        /* Copy the pred for best cand. to the final pred array     */
3622
        /* Copy the iq-coeff for best cand. to the final array      */
3623
        /* copy the best coeffs data to final buffer                */
3624
2.83M
        if(TU_EQ_SUBCU == func_proc_mode)
3625
735k
        {
3626
            /* Copy the pred for best cand. to the final pred array */
3627
3628
735k
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3629
735k
                (UWORD8 *)pv_pred_org,
3630
735k
                pred_strd_org,
3631
735k
                &au1_cur_pred_data[best_intra_buf_idx][0],
3632
735k
                trans_size,
3633
735k
                trans_size,
3634
735k
                trans_size);
3635
3636
            /* Copy the deq-coeff for best cand. to the final array */
3637
3638
735k
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3639
735k
                (UWORD8 *)pi2_deq_data,
3640
735k
                deq_data_strd << 1,
3641
735k
                (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
3642
735k
                trans_size << 1,
3643
735k
                trans_size << 1,
3644
735k
                trans_size);
3645
            /* copy the coeffs to final cu ecd bytes buffer */
3646
735k
            memcpy(
3647
735k
                pu1_ecd_data,
3648
735k
                &au1_intra_coeffs[best_intra_buf_idx][0],
3649
735k
                ai4_curr_bytes[best_intra_buf_idx]);
3650
3651
735k
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
3652
735k
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3653
735k
        }
3654
3655
        /*----------   Calculate Recon for the best INTRA mode     ---------*/
3656
        /* TU_EQ_CU case : No need for recon, otherwise recon is required   */
3657
        /* Compute recon only for the best mode for TU_EQ_SUBCU case        */
3658
2.83M
        if(u1_compute_recon)
3659
1.33M
        {
3660
1.33M
            ihevce_it_recon_fxn(
3661
1.33M
                ps_ctxt,
3662
1.33M
                pi2_deq_data,
3663
1.33M
                deq_data_strd,
3664
1.33M
                (UWORD8 *)pv_pred_org,
3665
1.33M
                pred_strd_org,
3666
1.33M
                pu1_recon,
3667
1.33M
                i4_recon_stride,
3668
1.33M
                pu1_ecd_data,
3669
1.33M
                trans_size,
3670
1.33M
                PRED_MODE_INTRA,
3671
1.33M
                ai4_cbf[best_intra_buf_idx],
3672
1.33M
                ai4_zero_col[best_intra_buf_idx],
3673
1.33M
                ai4_zero_row[best_intra_buf_idx]);
3674
3675
1.33M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3676
1.33M
        }
3677
1.50M
        else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
3678
364k
        {
3679
364k
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3680
364k
        }
3681
1.13M
        else
3682
1.13M
        {
3683
1.13M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
3684
1.13M
        }
3685
3686
        /* RDOPT copy States :update to best modes state */
3687
2.83M
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3688
2.83M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3689
2.83M
            &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
3690
2.83M
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3691
3692
        /* copy the prev,mpm_idx and rem modes from best cand */
3693
2.83M
        ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
3694
3695
        /* update the cabac context of prev intra pred mode flag */
3696
2.83M
        u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
3697
2.83M
            [(u1_prev_flag_cabac_ctxt << 1) |
3698
2.83M
             as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3699
3700
        /* accumulate the TU bits into cu bits */
3701
2.83M
        cu_bits += ai4_tu_bits[best_intra_buf_idx];
3702
3703
        /* copy the intra pred mode for chroma reuse */
3704
2.83M
        if(is_sub_pu_in_hq == 0)
3705
2.83M
        {
3706
2.83M
            *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
3707
2.83M
        }
3708
0
        else
3709
0
        {
3710
0
            *pu1_intra_pred_mode = best_cand_idx;
3711
0
        }
3712
3713
        /* Store luma mode as chroma mode. If chroma prcs happens, and
3714
        if a diff. mode wins, it should update this!! */
3715
2.83M
        if(1 == chrm_present_flag)
3716
1.64M
        {
3717
1.64M
            if(is_sub_pu_in_hq == 0)
3718
1.64M
            {
3719
1.64M
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3720
1.64M
                    ((ps_ctxt->u1_chroma_array_type == 2)
3721
1.64M
                         ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
3722
1.64M
                         : pu1_curr_mode[best_cand_idx]);
3723
1.64M
            }
3724
0
            else
3725
0
            {
3726
0
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3727
0
                    ((ps_ctxt->u1_chroma_array_type == 2)
3728
0
                         ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
3729
0
                         : best_cand_idx);
3730
0
            }
3731
3732
1.64M
            ps_final_prms->u1_chroma_intra_pred_mode = 4;
3733
1.64M
        }
3734
3735
        /*remember the cbf flag to replicate qp for 4x4 neighbour*/
3736
2.83M
        ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
3737
3738
        /*accumulate ssd over all TU of intra CU*/
3739
2.83M
        ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
3740
3741
        /* update the bytes */
3742
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3743
2.83M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
3744
2.83M
            ai4_curr_bytes[best_intra_buf_idx];
3745
        /* update the zero_row and col info for the final mode */
3746
2.83M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
3747
2.83M
            ai4_zero_col[best_intra_buf_idx];
3748
2.83M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
3749
2.83M
            ai4_zero_row[best_intra_buf_idx];
3750
3751
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3752
3753
        /* update the total bytes cons */
3754
2.83M
        ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
3755
2.83M
        pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
3756
3757
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3758
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
3759
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
3760
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
3761
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
3762
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
3763
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
3764
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
3765
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
3766
2.83M
        GETRANGE(tx_size, trans_size);
3767
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
3768
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
3769
2.83M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
3770
3771
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
3772
2.83M
        ps_nbr_4x4->b1_skip_flag = 0;
3773
2.83M
        ps_nbr_4x4->b1_intra_flag = 1;
3774
2.83M
        ps_nbr_4x4->b1_pred_l0_flag = 0;
3775
2.83M
        ps_nbr_4x4->b1_pred_l1_flag = 0;
3776
3777
2.83M
        if(is_sub_pu_in_hq == 0)
3778
2.83M
        {
3779
2.83M
            ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
3780
2.83M
        }
3781
0
        else
3782
0
        {
3783
0
            ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
3784
0
        }
3785
3786
2.83M
        ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3787
3788
        /* since tu size can be less than cusize, replication is done with strd */
3789
2.83M
        {
3790
2.83M
            WORD32 i, j;
3791
2.83M
            nbr_4x4_t *ps_tmp_4x4;
3792
3793
2.83M
            ps_tmp_4x4 = ps_nbr_4x4;
3794
3795
7.72M
            for(i = 0; i < num_4x4_in_tu; i++)
3796
4.89M
            {
3797
17.8M
                for(j = 0; j < num_4x4_in_tu; j++)
3798
12.9M
                {
3799
12.9M
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
3800
12.9M
                }
3801
                /* row level update*/
3802
4.89M
                ps_tmp_4x4 += num_4x4_in_cu;
3803
4.89M
            }
3804
2.83M
        }
3805
3806
2.83M
        if(TU_EQ_SUBCU == func_proc_mode)
3807
735k
        {
3808
735k
            pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
3809
735k
        }
3810
3811
2.83M
        if((num_cu_parts > 1) && (ctr < 3))
3812
1.47M
        {
3813
            /* set the neighbour map to 1 */
3814
1.47M
            ihevce_set_nbr_map(
3815
1.47M
                ps_ctxt->pu1_ctb_nbr_map,
3816
1.47M
                ps_ctxt->i4_nbr_map_strd,
3817
1.47M
                cu_pos_x,
3818
1.47M
                cu_pos_y,
3819
1.47M
                trans_size >> 2,
3820
1.47M
                1);
3821
3822
            /* block level updates block number (1 & 3 )*/
3823
1.47M
            pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
3824
1.47M
            pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
3825
1.47M
            pi2_deq_data += trans_size;
3826
3827
1.47M
            switch(ctr)
3828
1.47M
            {
3829
503k
            case 0:
3830
503k
            {
3831
503k
                pu1_left = pu1_recon + trans_size - 1;
3832
503k
                pu1_top += trans_size;
3833
503k
                pu1_top_left = pu1_top - 1;
3834
503k
                left_strd = i4_recon_stride;
3835
3836
503k
                break;
3837
0
            }
3838
492k
            case 1:
3839
492k
            {
3840
492k
                ASSERT(
3841
492k
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
3842
492k
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
3843
3844
                /* Since the 'lumaRefSubstitution' function expects both Top and */
3845
                /* TopRight recon pixels to be present in the same buffer */
3846
492k
                if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
3847
492k
                   ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
3848
70.3k
                {
3849
70.3k
                    UWORD8 *pu1_src =
3850
70.3k
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3851
70.3k
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3852
70.3k
                        trans_size;
3853
70.3k
                    UWORD8 *pu1_dst =
3854
70.3k
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3855
70.3k
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3856
70.3k
                        trans_size;
3857
3858
70.3k
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3859
70.3k
                        pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
3860
3861
70.3k
                    ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
3862
70.3k
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
3863
70.3k
                }
3864
3865
492k
                pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
3866
492k
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3867
492k
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3868
492k
                          (trans_size - 1) * i4_recon_stride;
3869
492k
                pu1_top_left = pu1_left - cu_left_stride;
3870
492k
                left_strd = cu_left_stride;
3871
3872
492k
                break;
3873
492k
            }
3874
481k
            case 2:
3875
481k
            {
3876
481k
                ASSERT(
3877
481k
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
3878
481k
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
3879
3880
481k
                pu1_left = pu1_recon + trans_size - 1;
3881
481k
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3882
481k
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3883
481k
                          (trans_size - 1) * i4_recon_stride + trans_size;
3884
481k
                pu1_top_left = pu1_top - 1;
3885
481k
                left_strd = i4_recon_stride;
3886
3887
481k
                break;
3888
481k
            }
3889
1.47M
            }
3890
3891
1.47M
            pu1_csbf_buf += num_4x4_in_tu;
3892
1.47M
            cu_pos_x += num_4x4_in_tu;
3893
1.47M
            ps_nbr_4x4 += num_4x4_in_tu;
3894
1.47M
            ps_top_nbr_4x4 += num_4x4_in_tu;
3895
1.47M
            ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
3896
3897
1.47M
            pu1_intra_pred_mode++;
3898
3899
            /* after 2 blocks increment the pointers to bottom blocks */
3900
1.47M
            if(1 == ctr)
3901
492k
            {
3902
492k
                pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
3903
492k
                pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
3904
3905
492k
                pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
3906
492k
                pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
3907
492k
                pi2_deq_data -= (trans_size << 1);
3908
492k
                pi2_deq_data += (trans_size * deq_data_strd);
3909
3910
492k
                pu1_csbf_buf -= (num_4x4_in_tu << 1);
3911
492k
                pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
3912
3913
492k
                ps_nbr_4x4 -= (num_4x4_in_tu << 1);
3914
492k
                ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
3915
492k
                ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
3916
492k
                ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
3917
3918
                /* decrement pos x to start */
3919
492k
                cu_pos_x -= (num_4x4_in_tu << 1);
3920
492k
                cu_pos_y += num_4x4_in_tu;
3921
492k
            }
3922
1.47M
        }
3923
3924
2.83M
#if RDOPT_ENABLE
3925
        /* compute the RDOPT cost for the current TU */
3926
2.83M
        ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
3927
2.83M
            ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3928
2.83M
#endif
3929
3930
        /* accumulate the costs */
3931
2.83M
        total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
3932
3933
2.83M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
3934
2.83M
        {
3935
            /* Early exit : If the current running cost exceeds
3936
            the prev. best mode cost, break */
3937
2.83M
            if(total_rdopt_cost > prev_best_rdopt_cost)
3938
240k
            {
3939
240k
                return (total_rdopt_cost);
3940
240k
            }
3941
2.83M
        }
3942
3943
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
3944
2.59M
        chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
3945
3946
2.59M
        pu4_nbr_flags++;
3947
2.59M
    }
3948
    /* Modify the cost function for this CU. */
3949
    /* loop in for 8x8 blocks */
3950
1.15M
    if(ps_ctxt->u1_enable_psyRDOPT)
3951
0
    {
3952
0
        UWORD8 *pu1_recon_cu;
3953
0
        WORD32 recon_stride;
3954
0
        WORD32 curr_pos_x;
3955
0
        WORD32 curr_pos_y;
3956
0
        WORD32 start_index;
3957
0
        WORD32 num_horz_cu_in_ctb;
3958
0
        WORD32 cu_size;
3959
0
        WORD32 had_block_size;
3960
3961
        /* tODO: sreenivasa ctb size has to be used appropriately */
3962
0
        had_block_size = 8;
3963
0
        cu_size = ps_cu_analyse->u1_cu_size; /* todo */
3964
0
        num_horz_cu_in_ctb = 64 / had_block_size;
3965
3966
0
        curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
3967
0
        curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
3968
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
3969
0
        pu1_recon_cu =
3970
0
            ((UWORD8 *)ps_final_prms->s_recon_datastore
3971
0
                 .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
3972
        /* + \  curr_pos_x + curr_pos_y * recon_stride; */
3973
3974
        /* start index to index the source satd of curr cu int he current ctb*/
3975
0
        start_index =
3976
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
3977
3978
0
        {
3979
0
            total_rdopt_cost += ihevce_psy_rd_cost(
3980
0
                ps_ctxt->ai4_source_satd_8x8,
3981
0
                pu1_recon_cu,
3982
0
                recon_stride,
3983
0
                1,  //
3984
0
                cu_size,
3985
0
                0,  // pic type
3986
0
                0,  //layer id
3987
0
                ps_ctxt->i4_satd_lamda,  // lambda
3988
0
                start_index,
3989
0
                ps_ctxt->u1_is_input_data_hbd,
3990
0
                ps_ctxt->u4_psy_strength,
3991
0
                &ps_ctxt->s_cmn_opt_func
3992
3993
0
            );  // 8 bit
3994
0
        }
3995
0
    }
3996
3997
#if !FORCE_INTRA_TU_DEPTH_TO_0  //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
3998
1.15M
    if(TU_EQ_SUBCU == func_proc_mode)
3999
178k
    {
4000
178k
        UWORD8 au1_tu_eq_cu_div2_modes[4];
4001
178k
        UWORD8 au1_freq_of_mode[4];
4002
4003
178k
        WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
4004
178k
            ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
4005
4006
178k
        if(1 == i4_num_clusters)
4007
23.4k
        {
4008
23.4k
            ps_final_prms->u2_num_pus_in_cu = 1;
4009
23.4k
            ps_final_prms->u1_part_mode = SIZE_2Nx2N;
4010
23.4k
        }
4011
178k
    }
4012
1.15M
#endif
4013
4014
    /* store the num TUs*/
4015
1.15M
    ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
4016
4017
    /* update the bytes consumed */
4018
1.15M
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4019
4020
    /* store the current cu size to final prms */
4021
1.15M
    ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
4022
4023
    /* cu bits will be having luma residual bits till this point    */
4024
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4025
1.15M
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4026
4027
    /* ------------- Chroma processing -------------- */
4028
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4029
1.15M
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4030
896k
    {
4031
896k
        LWORD64 chrm_rdopt_cost;
4032
896k
        WORD32 chrm_rdopt_tu_bits;
4033
4034
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4035
896k
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4036
4037
896k
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4038
896k
            ps_ctxt,
4039
896k
            curr_buf_idx,
4040
896k
            func_proc_mode,
4041
896k
            ps_chrm_cu_buf_prms->pu1_curr_src,
4042
896k
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4043
896k
            ps_chrm_cu_buf_prms->pu1_cu_left,
4044
896k
            ps_chrm_cu_buf_prms->pu1_cu_top,
4045
896k
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4046
896k
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4047
896k
            cu_pos_x_8pelunits,
4048
896k
            cu_pos_y_8pelunits,
4049
896k
            &chrm_rdopt_tu_bits,
4050
896k
            i4_alpha_stim_multiplier,
4051
896k
            u1_is_cu_noisy);
4052
4053
896k
#if WEIGH_CHROMA_COST
4054
896k
        chrm_rdopt_cost = (LWORD64)(
4055
896k
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4056
896k
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4057
896k
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4058
896k
#endif
4059
4060
896k
#if CHROMA_RDOPT_ENABLE
4061
896k
        total_rdopt_cost += chrm_rdopt_cost;
4062
896k
#endif
4063
896k
        cu_bits += chrm_rdopt_tu_bits;
4064
4065
        /* cu bits for chroma residual if chroma rdopt is on       */
4066
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4067
896k
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4068
4069
896k
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4070
896k
        {
4071
            /* Early exit : If the current running cost exceeds
4072
            the prev. best mode cost, break */
4073
896k
            if(total_rdopt_cost > prev_best_rdopt_cost)
4074
171k
            {
4075
171k
                return (total_rdopt_cost);
4076
171k
            }
4077
896k
        }
4078
896k
    }
4079
253k
    else
4080
253k
    {}
4081
4082
    /* RDOPT copy States :  Best after all luma TUs to current */
4083
978k
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4084
978k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4085
978k
                .s_cabac_ctxt.au1_ctxt_models[0] +
4086
978k
            IHEVC_CAB_COEFFX_PREFIX,
4087
978k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4088
978k
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4089
4090
    /* get the neighbour availability flags for current cu  */
4091
978k
    ihevce_get_only_nbr_flag(
4092
978k
        &s_nbr,
4093
978k
        ps_ctxt->pu1_ctb_nbr_map,
4094
978k
        ps_ctxt->i4_nbr_map_strd,
4095
978k
        (cu_pos_x_8pelunits << 1),
4096
978k
        (cu_pos_y_8pelunits << 1),
4097
978k
        (trans_size << 1),
4098
978k
        (trans_size << 1));
4099
4100
    /* call the entropy rdo encode to get the bit estimate for current cu */
4101
    /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
4102
978k
    {
4103
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4104
978k
        WORD32 cbf_bits, header_bits;
4105
4106
978k
        header_bits = ihevce_entropy_rdo_encode_cu(
4107
978k
            &ps_ctxt->s_rdopt_entropy_ctxt,
4108
978k
            ps_final_prms,
4109
978k
            cu_pos_x_8pelunits,
4110
978k
            cu_pos_y_8pelunits,
4111
978k
            ps_cu_analyse->u1_cu_size,
4112
978k
            s_nbr.u1_top_avail,
4113
978k
            s_nbr.u1_left_avail,
4114
978k
            &ps_final_prms->pu1_cu_coeffs[0],
4115
978k
            &cbf_bits);
4116
4117
978k
        cu_bits += header_bits;
4118
4119
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4120
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4121
978k
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4122
978k
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4123
4124
978k
#if RDOPT_ENABLE
4125
        /* add the cost of coding the cu bits */
4126
978k
        total_rdopt_cost +=
4127
978k
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4128
978k
#endif
4129
978k
    }
4130
978k
    return (total_rdopt_cost);
4131
1.15M
}
4132
/*!
4133
******************************************************************************
4134
* \if Function name : ihevce_inter_rdopt_cu_ntu \endif
4135
*
4136
* \brief
4137
*    Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
4138
*
4139
* \param[in] ps_ctxt       enc_loop module ctxt pointer
4140
* \param[in] ps_inter_cand pointer to inter candidate structure
4141
* \param[in] pu1_src       pointer to source data buffer
4142
* \param[in] cu_size       Current CU size
4143
* \param[in] cu_pos_x      cu position x w.r.t to ctb
4144
* \param[in] cu_pos_y      cu position y w.r.t to ctb
4145
* \param[in] src_strd      source buffer stride
4146
* \param[in] curr_buf_idx  buffer index for current output storage
4147
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
4148
*
4149
* \return
4150
*    Rdopt cost
4151
*
4152
* \author
4153
*  Ittiam
4154
*
4155
*****************************************************************************
4156
*/
4157
LWORD64 ihevce_inter_rdopt_cu_ntu(
4158
    ihevce_enc_loop_ctxt_t *ps_ctxt,
4159
    enc_loop_cu_prms_t *ps_cu_prms,
4160
    void *pv_src,
4161
    WORD32 cu_size,
4162
    WORD32 cu_pos_x,
4163
    WORD32 cu_pos_y,
4164
    WORD32 curr_buf_idx,
4165
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
4166
    cu_inter_cand_t *ps_inter_cand,
4167
    cu_analyse_t *ps_cu_analyse,
4168
    WORD32 i4_alpha_stim_multiplier)
4169
128k
{
4170
128k
    enc_loop_cu_final_prms_t *ps_final_prms;
4171
128k
    nbr_4x4_t *ps_nbr_4x4;
4172
128k
    tu_prms_t s_tu_prms[64 * 4];
4173
128k
    tu_prms_t *ps_tu_prms;
4174
4175
128k
    WORD32 i4_perform_rdoq;
4176
128k
    WORD32 i4_perform_sbh;
4177
128k
    WORD32 ai4_tu_split_flags[4];
4178
128k
    WORD32 ai4_tu_early_cbf[4];
4179
128k
    WORD32 num_split_flags = 1;
4180
128k
    WORD32 i;
4181
128k
    UWORD8 u1_tu_size;
4182
128k
    UWORD8 *pu1_pred;
4183
128k
    UWORD8 *pu1_ecd_data;
4184
128k
    WORD16 *pi2_deq_data;
4185
128k
    UWORD8 *pu1_csbf_buf;
4186
128k
    UWORD8 *pu1_tu_sz_sft;
4187
128k
    UWORD8 *pu1_tu_posx;
4188
128k
    UWORD8 *pu1_tu_posy;
4189
128k
    LWORD64 total_rdopt_cost;
4190
128k
    WORD32 ctr;
4191
128k
    WORD32 chrm_ctr;
4192
128k
    WORD32 num_tu_in_cu = 0;
4193
128k
    WORD32 pred_stride;
4194
128k
    WORD32 recon_stride;
4195
128k
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
4196
128k
    WORD32 csbf_strd;
4197
128k
    WORD32 chrm_present_flag;
4198
128k
    WORD32 ecd_data_bytes_cons;
4199
128k
    WORD32 num_4x4_in_cu;
4200
128k
    WORD32 num_4x4_in_tu;
4201
128k
    WORD32 recon_func_mode;
4202
128k
    WORD32 cu_bits;
4203
128k
    UWORD8 u1_compute_spatial_ssd;
4204
4205
    /* min_trans_size is initialized to some huge number than usual TU sizes */
4206
128k
    WORD32 i4_min_trans_size = 256;
4207
    /* Get the RDOPT cost of the best CU mode for early_exit */
4208
128k
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
4209
128k
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
4210
4211
    /* model for no residue syntax qt root cbf flag */
4212
128k
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
4213
4214
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4215
128k
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
4216
4217
    /* for skip cases tables are not reqquired */
4218
128k
    UWORD8 u1_skip_tu_sz_sft = 0;
4219
128k
    UWORD8 u1_skip_tu_posx = 0;
4220
128k
    UWORD8 u1_skip_tu_posy = 0;
4221
128k
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
4222
4223
    /* get the pointers based on curbuf idx */
4224
128k
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
4225
128k
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
4226
128k
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
4227
128k
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
4228
128k
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
4229
128k
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
4230
4231
128k
    pred_stride = ps_inter_cand->i4_pred_data_stride;
4232
128k
    recon_stride = cu_size;
4233
128k
    pu1_pred = ps_inter_cand->pu1_pred_data;
4234
128k
    chrm_ctr = 0;
4235
128k
    ecd_data_bytes_cons = 0;
4236
128k
    total_rdopt_cost = 0;
4237
128k
    num_4x4_in_cu = cu_size >> 2;
4238
128k
    recon_func_mode = PRED_MODE_INTER;
4239
128k
    cu_bits = 0;
4240
4241
    /* get the 4x4 level postion of current cu */
4242
128k
    cu_pos_x = cu_pos_x << 1;
4243
128k
    cu_pos_y = cu_pos_y << 1;
4244
4245
    /* default value for cu coded flag */
4246
128k
    ps_final_prms->u1_is_cu_coded = 0;
4247
4248
    /*init of ssd of CU accuumulated over all TU*/
4249
128k
    ps_final_prms->u4_cu_sad = 0;
4250
4251
    /* populate the coeffs scan idx */
4252
128k
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
4253
4254
128k
#if ENABLE_INTER_ZCU_COST
4255
    /* reset cu not coded cost */
4256
128k
    ps_ctxt->i8_cu_not_coded_cost = 0;
4257
4258
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4259
128k
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
4260
128k
#endif
4261
4262
128k
    if(ps_cu_analyse->u1_cu_size == 64)
4263
999
    {
4264
999
        num_split_flags = 4;
4265
999
        u1_tu_size = 32;
4266
999
    }
4267
127k
    else
4268
127k
    {
4269
127k
        num_split_flags = 1;
4270
127k
        u1_tu_size = ps_cu_analyse->u1_cu_size;
4271
127k
    }
4272
4273
    /* ckeck for skip mode */
4274
128k
    if(1 == ps_final_prms->u1_skip_flag)
4275
47.6k
    {
4276
47.6k
        if(64 == cu_size)
4277
314
        {
4278
            /* TU = CU/2 is set but no trnaform is evaluated  */
4279
314
            num_tu_in_cu = 4;
4280
314
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4281
314
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4282
314
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4283
314
        }
4284
47.3k
        else
4285
47.3k
        {
4286
            /* TU = CU is set but no trnaform is evaluated  */
4287
47.3k
            num_tu_in_cu = 1;
4288
47.3k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4289
47.3k
            pu1_tu_posx = &u1_skip_tu_posx;
4290
47.3k
            pu1_tu_posy = &u1_skip_tu_posy;
4291
47.3k
        }
4292
4293
47.6k
        recon_func_mode = PRED_MODE_SKIP;
4294
47.6k
    }
4295
    /* check for PU part mode being AMP or No AMP */
4296
81.1k
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
4297
74.2k
    {
4298
74.2k
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
4299
70.9k
        {
4300
            /* TU= CU is evaluated 2Nx2N inter case */
4301
70.9k
            num_tu_in_cu = 1;
4302
70.9k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4303
70.9k
            pu1_tu_posx = &u1_skip_tu_posx;
4304
70.9k
            pu1_tu_posy = &u1_skip_tu_posy;
4305
70.9k
        }
4306
3.32k
        else
4307
3.32k
        {
4308
            /* currently TU= CU/2 is evaluated for all inter case */
4309
3.32k
            num_tu_in_cu = 4;
4310
3.32k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4311
3.32k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4312
3.32k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4313
3.32k
        }
4314
74.2k
    }
4315
6.88k
    else
4316
6.88k
    {
4317
        /* for AMP cases one level of TU recurssion is done */
4318
        /* based on oreintation of the partitions           */
4319
6.88k
        num_tu_in_cu = 10;
4320
6.88k
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4321
6.88k
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4322
6.88k
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4323
6.88k
    }
4324
4325
128k
    ps_tu_prms = &s_tu_prms[0];
4326
128k
    num_tu_in_cu = 0;
4327
4328
260k
    for(i = 0; i < num_split_flags; i++)
4329
131k
    {
4330
131k
        WORD32 i4_x_off = 0, i4_y_off = 0;
4331
4332
131k
        if(i == 1 || i == 3)
4333
1.99k
        {
4334
1.99k
            i4_x_off = 32;
4335
1.99k
        }
4336
4337
131k
        if(i == 2 || i == 3)
4338
1.99k
        {
4339
1.99k
            i4_y_off = 32;
4340
1.99k
        }
4341
4342
131k
        if(1 == ps_final_prms->u1_skip_flag)
4343
48.5k
        {
4344
48.5k
            ai4_tu_split_flags[0] = 0;
4345
48.5k
            ps_inter_cand->ai4_tu_split_flag[i] = 0;
4346
4347
48.5k
            ai4_tu_early_cbf[0] = 0;
4348
48.5k
        }
4349
83.2k
        else
4350
83.2k
        {
4351
83.2k
            ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
4352
83.2k
            ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
4353
83.2k
        }
4354
4355
131k
        ps_tu_prms->u1_tu_size = u1_tu_size;
4356
4357
131k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
4358
131k
            ps_tu_prms,
4359
131k
            &num_tu_in_cu,
4360
131k
            0,
4361
131k
            ai4_tu_split_flags[0],
4362
131k
            ai4_tu_early_cbf[0],
4363
131k
            i4_x_off,
4364
131k
            i4_y_off);
4365
131k
    }
4366
4367
    /* loop for all tu blocks in current cu */
4368
128k
    ps_tu_prms = &s_tu_prms[0];
4369
430k
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4370
301k
    {
4371
301k
        trans_size = ps_tu_prms->u1_tu_size;
4372
4373
301k
        if(i4_min_trans_size > trans_size)
4374
132k
        {
4375
132k
            i4_min_trans_size = trans_size;
4376
132k
        }
4377
301k
        ps_tu_prms++;
4378
301k
    }
4379
4380
128k
    if(ps_ctxt->i1_cu_qp_delta_enable)
4381
32.6k
    {
4382
32.6k
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
4383
32.6k
    }
4384
4385
128k
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
4386
0
    {
4387
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
4388
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
4389
0
             100.0f);
4390
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
4391
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
4392
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
4393
0
    }
4394
4395
128k
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
4396
36.4k
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
4397
476
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4398
4399
128k
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
4400
0
    {
4401
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
4402
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4403
0
    }
4404
4405
128k
    if(!u1_compute_spatial_ssd)
4406
128k
    {
4407
128k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4408
128k
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4409
128k
    }
4410
476
    else
4411
476
    {
4412
476
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
4413
476
    }
4414
4415
128k
    ps_tu_prms = &s_tu_prms[0];
4416
4417
128k
    ASSERT(num_tu_in_cu <= 256);
4418
4419
    /* RDOPT copy States :  TU init (best until prev TU) to current */
4420
128k
    memcpy(
4421
128k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4422
128k
             .s_cabac_ctxt.au1_ctxt_models[0],
4423
128k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
4424
128k
        IHEVC_CAB_COEFFX_PREFIX);
4425
4426
403k
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4427
299k
    {
4428
299k
        WORD32 curr_bytes;
4429
299k
        WORD32 tx_size;
4430
299k
        WORD32 cbf, zero_col, zero_row;
4431
299k
        LWORD64 rdopt_cost;
4432
299k
        UWORD8 u1_is_recon_available;
4433
4434
299k
        WORD32 curr_pos_x;
4435
299k
        WORD32 curr_pos_y;
4436
299k
        nbr_4x4_t *ps_cur_nbr_4x4;
4437
299k
        UWORD8 *pu1_cur_pred;
4438
299k
        UWORD8 *pu1_cur_src;
4439
299k
        UWORD8 *pu1_cur_recon;
4440
299k
        WORD16 *pi2_cur_deq_data;
4441
299k
        UWORD32 u4_tu_sad;
4442
299k
        WORD32 tu_bits;
4443
4444
299k
        WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4445
4446
299k
        trans_size = ps_tu_prms->u1_tu_size;
4447
        /* get the current pos x and pos y in pixels */
4448
299k
        curr_pos_x = ps_tu_prms->u1_x_off;  //((cu_size >> 2) * pu1_tu_posx[ctr]);
4449
299k
        curr_pos_y = ps_tu_prms->u1_y_off;  //((cu_size >> 2) * pu1_tu_posy[ctr]);
4450
4451
299k
        num_4x4_in_tu = trans_size >> 2;
4452
4453
#if FORCE_8x8_TFR
4454
        if(cu_size == 64)
4455
        {
4456
            curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
4457
            curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
4458
        }
4459
#endif
4460
4461
        /* increment the pointers to start of current TU  */
4462
299k
        pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
4463
299k
        pu1_cur_src += (curr_pos_y * src_strd);
4464
299k
        pu1_cur_pred = (pu1_pred + curr_pos_x);
4465
299k
        pu1_cur_pred += (curr_pos_y * pred_stride);
4466
299k
        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
4467
299k
        pi2_cur_deq_data += (curr_pos_y * cu_size);
4468
299k
        pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
4469
299k
                        curr_pos_x + curr_pos_y * i4_recon_stride;
4470
4471
299k
        ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
4472
299k
        ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
4473
4474
        /* RDOPT copy States :  TU init (best until prev TU) to current */
4475
299k
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4476
299k
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4477
299k
                    .s_cabac_ctxt.au1_ctxt_models[0] +
4478
299k
                IHEVC_CAB_COEFFX_PREFIX,
4479
299k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4480
299k
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4481
4482
299k
        i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
4483
299k
        i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
4484
4485
        /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
4486
        /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
4487
        /* Currently the complete array will contain only single value*/
4488
        /*The rounding factor is calculated with the formula
4489
        Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
4490
        rounding factor = (1 - DeadZone Val)
4491
4492
        Assumption: Cabac states of All the sub-blocks in the TU are considered independent
4493
        */
4494
299k
        if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
4495
0
        {
4496
0
            double i4_lamda_modifier;
4497
4498
0
            if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
4499
0
            {
4500
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
4501
0
                                    CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
4502
0
            }
4503
0
            else
4504
0
            {
4505
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
4506
0
            }
4507
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
4508
0
            {
4509
0
                if(ISLICE == ps_ctxt->i1_slice_type)
4510
0
                {
4511
0
                    i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
4512
0
                }
4513
0
                else
4514
0
                {
4515
0
                    i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
4516
0
                }
4517
0
            }
4518
0
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4519
0
                &ps_ctxt->i4_quant_round_tu[0][0];
4520
0
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4521
0
                &ps_ctxt->i4_quant_round_tu[1][0];
4522
4523
0
            memset(
4524
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4525
0
                0,
4526
0
                trans_size * trans_size * sizeof(WORD32));
4527
0
            memset(
4528
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4529
0
                0,
4530
0
                trans_size * trans_size * sizeof(WORD32));
4531
4532
0
            ihevce_quant_rounding_factor_gen(
4533
0
                trans_size,
4534
0
                1,
4535
0
                &ps_ctxt->s_rdopt_entropy_ctxt,
4536
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4537
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4538
0
                i4_lamda_modifier,
4539
0
                1);
4540
0
        }
4541
299k
        else
4542
299k
        {
4543
299k
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4544
299k
                ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
4545
299k
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4546
299k
                ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
4547
299k
        }
4548
4549
        /* call T Q IT IQ and recon function */
4550
299k
        cbf = ihevce_t_q_iq_ssd_scan_fxn(
4551
299k
            ps_ctxt,
4552
299k
            pu1_cur_pred,
4553
299k
            pred_stride,
4554
299k
            pu1_cur_src,
4555
299k
            src_strd,
4556
299k
            pi2_cur_deq_data,
4557
299k
            cu_size,
4558
299k
            pu1_cur_recon,
4559
299k
            i4_recon_stride,
4560
299k
            pu1_ecd_data,
4561
299k
            pu1_csbf_buf,
4562
299k
            csbf_strd,
4563
299k
            trans_size,
4564
299k
            recon_func_mode,
4565
299k
            &rdopt_cost,
4566
299k
            &curr_bytes,
4567
299k
            &tu_bits,
4568
299k
            &u4_tu_sad,
4569
299k
            &zero_col,
4570
299k
            &zero_row,
4571
299k
            &u1_is_recon_available,
4572
299k
            i4_perform_rdoq,
4573
299k
            i4_perform_sbh,
4574
299k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4575
299k
            i4_alpha_stim_multiplier,
4576
299k
            u1_is_cu_noisy,
4577
299k
#endif
4578
299k
            u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
4579
299k
            ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
4580
4581
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4582
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
4583
        {
4584
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
4585
            rdopt_cost = ihevce_inject_stim_into_distortion(
4586
                pu1_cur_src,
4587
                src_strd,
4588
                pu1_cur_pred,
4589
                pred_stride,
4590
                rdopt_cost,
4591
                i4_alpha_stim_multiplier,
4592
                trans_size,
4593
                0,
4594
                ps_ctxt->u1_enable_psyRDOPT,
4595
                NULL_PLANE);
4596
#else
4597
            if(u1_compute_spatial_ssd && u1_is_recon_available)
4598
            {
4599
                rdopt_cost = ihevce_inject_stim_into_distortion(
4600
                    pu1_cur_src,
4601
                    src_strd,
4602
                    pu1_cur_recon,
4603
                    i4_recon_stride,
4604
                    rdopt_cost,
4605
                    i4_alpha_stim_multiplier,
4606
                    trans_size,
4607
                    0,
4608
                    NULL_PLANE);
4609
            }
4610
            else
4611
            {
4612
                rdopt_cost = ihevce_inject_stim_into_distortion(
4613
                    pu1_cur_src,
4614
                    src_strd,
4615
                    pu1_cur_pred,
4616
                    pred_stride,
4617
                    rdopt_cost,
4618
                    i4_alpha_stim_multiplier,
4619
                    trans_size,
4620
                    0,
4621
                    ps_ctxt->u1_enable_psyRDOPT,
4622
                    NULL_PLANE);
4623
            }
4624
#endif
4625
        }
4626
#endif
4627
4628
299k
        if(u1_compute_spatial_ssd && u1_is_recon_available)
4629
1.49k
        {
4630
1.49k
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
4631
1.49k
        }
4632
298k
        else
4633
298k
        {
4634
298k
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
4635
298k
        }
4636
4637
        /* accumulate the TU sad into cu sad */
4638
299k
        ps_final_prms->u4_cu_sad += u4_tu_sad;
4639
4640
        /* accumulate the TU bits into cu bits */
4641
299k
        cu_bits += tu_bits;
4642
4643
        /* inter cu is coded if any of the tu is coded in it */
4644
299k
        ps_final_prms->u1_is_cu_coded |= cbf;
4645
4646
        /* call the entropy function to get the bits */
4647
        /* add that to rd opt cost(SSD)              */
4648
4649
        /* update the bytes */
4650
299k
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4651
299k
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
4652
        /* update the zero_row and col info for the final mode */
4653
299k
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
4654
299k
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
4655
4656
        /* update the bytes */
4657
299k
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4658
4659
        /* update the total bytes cons */
4660
299k
        ecd_data_bytes_cons += curr_bytes;
4661
299k
        pu1_ecd_data += curr_bytes;
4662
4663
        /* RDOPT copy States :  New updated after curr TU to TU init */
4664
299k
        if(0 != cbf)
4665
99.9k
        {
4666
            /* update to new state only if CBF is non zero */
4667
99.9k
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4668
99.9k
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4669
99.9k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4670
99.9k
                        .s_cabac_ctxt.au1_ctxt_models[0] +
4671
99.9k
                    IHEVC_CAB_COEFFX_PREFIX,
4672
99.9k
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4673
99.9k
        }
4674
4675
        /* by default chroma present is set to 1*/
4676
299k
        chrm_present_flag = 1;
4677
299k
        if(4 == trans_size)
4678
159k
        {
4679
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
4680
159k
            if(0 != chrm_ctr)
4681
119k
            {
4682
119k
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
4683
119k
            }
4684
4685
            /* increment the chrm ctr unconditionally */
4686
159k
            chrm_ctr++;
4687
4688
            /* after ctr reached 4 reset it */
4689
159k
            if(4 == chrm_ctr)
4690
39.7k
            {
4691
39.7k
                chrm_ctr = 0;
4692
39.7k
            }
4693
159k
        }
4694
4695
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
4696
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
4697
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
4698
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
4699
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
4700
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
4701
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
4702
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
4703
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
4704
299k
        GETRANGE(tx_size, trans_size);
4705
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
4706
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
4707
299k
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
4708
4709
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
4710
299k
        ps_cur_nbr_4x4->b1_y_cbf = cbf;
4711
        /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
4712
299k
        ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
4713
4714
        /* Qp and cbf are stored for the all 4x4 in TU */
4715
299k
        {
4716
299k
            WORD32 i, j;
4717
299k
            nbr_4x4_t *ps_tmp_4x4;
4718
299k
            ps_tmp_4x4 = ps_cur_nbr_4x4;
4719
4720
934k
            for(i = 0; i < num_4x4_in_tu; i++)
4721
634k
            {
4722
2.94M
                for(j = 0; j < num_4x4_in_tu; j++)
4723
2.31M
                {
4724
2.31M
                    ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
4725
2.31M
                    ps_tmp_4x4[j].b1_y_cbf = cbf;
4726
2.31M
                }
4727
                /* row level update*/
4728
634k
                ps_tmp_4x4 += num_4x4_in_cu;
4729
634k
            }
4730
299k
        }
4731
4732
299k
#if RDOPT_ENABLE
4733
        /* compute the rdopt cost */
4734
299k
        rdopt_cost +=
4735
299k
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4736
299k
#endif
4737
        /* accumulate the costs */
4738
299k
        total_rdopt_cost += rdopt_cost;
4739
4740
299k
        ps_tu_prms++;
4741
4742
299k
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4743
299k
        {
4744
            /* Early exit : If the current running cost exceeds
4745
            the prev. best mode cost, break */
4746
299k
            if(total_rdopt_cost > prev_best_rdopt_cost)
4747
25.4k
            {
4748
25.4k
                return (total_rdopt_cost);
4749
25.4k
            }
4750
299k
        }
4751
299k
    }
4752
4753
    /* Modify the cost function for this CU. */
4754
    /* loop in for 8x8 blocks */
4755
103k
    if(ps_ctxt->u1_enable_psyRDOPT)
4756
0
    {
4757
0
        UWORD8 *pu1_recon_cu;
4758
0
        WORD32 recon_stride;
4759
0
        WORD32 curr_pos_x;
4760
0
        WORD32 curr_pos_y;
4761
0
        WORD32 start_index;
4762
0
        WORD32 num_horz_cu_in_ctb;
4763
0
        WORD32 had_block_size;
4764
4765
        /* tODO: sreenivasa ctb size has to be used appropriately */
4766
0
        had_block_size = 8;
4767
0
        num_horz_cu_in_ctb = 64 / had_block_size;
4768
4769
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
4770
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
4771
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4772
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
4773
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
4774
        //+ \curr_pos_x + curr_pos_y * recon_stride;
4775
4776
        /* start index to index the source satd of curr cu int he current ctb*/
4777
0
        start_index =
4778
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
4779
4780
0
        {
4781
0
            total_rdopt_cost += ihevce_psy_rd_cost(
4782
0
                ps_ctxt->ai4_source_satd_8x8,
4783
0
                pu1_recon_cu,
4784
0
                recon_stride,
4785
0
                1,  //howz stride
4786
0
                cu_size,
4787
0
                0,  // pic type
4788
0
                0,  //layer id
4789
0
                ps_ctxt->i4_satd_lamda,  // lambda
4790
0
                start_index,
4791
0
                ps_ctxt->u1_is_input_data_hbd,
4792
0
                ps_ctxt->u4_psy_strength,
4793
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
4794
0
        }
4795
0
    }
4796
4797
    /* store the num TUs*/
4798
103k
    ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
4799
4800
    /* update the bytes consumed */
4801
103k
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4802
4803
    /* store the current cu size to final prms */
4804
103k
    ps_final_prms->u1_cu_size = cu_size;
4805
4806
    /* cu bits will be having luma residual bits till this point    */
4807
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4808
103k
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4809
4810
    /* ------------- Chroma processing -------------- */
4811
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4812
103k
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4813
57.7k
    {
4814
57.7k
        LWORD64 chrm_rdopt_cost;
4815
57.7k
        WORD32 chrm_rdopt_tu_bits;
4816
4817
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4818
57.7k
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4819
4820
57.7k
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4821
57.7k
            ps_ctxt,
4822
57.7k
            curr_buf_idx,
4823
57.7k
            0, /* TU mode : Don't care in Inter patrh */
4824
57.7k
            ps_chrm_cu_buf_prms->pu1_curr_src,
4825
57.7k
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4826
57.7k
            ps_chrm_cu_buf_prms->pu1_cu_left,
4827
57.7k
            ps_chrm_cu_buf_prms->pu1_cu_top,
4828
57.7k
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4829
57.7k
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4830
57.7k
            (cu_pos_x >> 1),
4831
57.7k
            (cu_pos_y >> 1),
4832
57.7k
            &chrm_rdopt_tu_bits,
4833
57.7k
            i4_alpha_stim_multiplier,
4834
57.7k
            u1_is_cu_noisy);
4835
4836
57.7k
#if WEIGH_CHROMA_COST
4837
57.7k
        chrm_rdopt_cost = (LWORD64)(
4838
57.7k
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4839
57.7k
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4840
57.7k
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4841
57.7k
#endif
4842
4843
57.7k
#if CHROMA_RDOPT_ENABLE
4844
57.7k
        total_rdopt_cost += chrm_rdopt_cost;
4845
57.7k
#endif
4846
57.7k
        cu_bits += chrm_rdopt_tu_bits;
4847
4848
        /* during chroma evaluation if skip decision was over written     */
4849
        /* then the current skip candidate is set to a non skip candidate */
4850
57.7k
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
4851
4852
        /* cu bits for chroma residual if chroma rdopt is on       */
4853
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4854
57.7k
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4855
4856
57.7k
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4857
57.7k
        {
4858
            /* Early exit : If the current running cost exceeds
4859
            the prev. best mode cost, break */
4860
57.7k
            if(total_rdopt_cost > prev_best_rdopt_cost)
4861
9.26k
            {
4862
9.26k
                return (total_rdopt_cost);
4863
9.26k
            }
4864
57.7k
        }
4865
57.7k
    }
4866
45.5k
    else
4867
45.5k
    {}
4868
4869
94.0k
#if SHRINK_INTER_TUTREE
4870
    /* ------------- Quadtree TU split  optimization ------------  */
4871
94.0k
    if(ps_final_prms->u1_is_cu_coded)
4872
36.1k
    {
4873
36.1k
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
4874
36.1k
            &ps_final_prms->as_tu_enc_loop[0],
4875
36.1k
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
4876
36.1k
            &ps_final_prms->s_recon_datastore,
4877
36.1k
            num_tu_in_cu,
4878
36.1k
            (ps_ctxt->u1_chroma_array_type == 2));
4879
36.1k
    }
4880
94.0k
#endif
4881
4882
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
4883
94.0k
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4884
94.0k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4885
94.0k
                .s_cabac_ctxt.au1_ctxt_models[0] +
4886
94.0k
            IHEVC_CAB_COEFFX_PREFIX,
4887
94.0k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4888
94.0k
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4889
4890
    /* -------- Bit estimate for RD opt -------------- */
4891
94.0k
    {
4892
94.0k
        nbr_avail_flags_t s_nbr;
4893
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4894
94.0k
        WORD32 cbf_bits, header_bits;
4895
4896
        /* get the neighbour availability flags for current cu  */
4897
94.0k
        ihevce_get_only_nbr_flag(
4898
94.0k
            &s_nbr,
4899
94.0k
            ps_ctxt->pu1_ctb_nbr_map,
4900
94.0k
            ps_ctxt->i4_nbr_map_strd,
4901
94.0k
            cu_pos_x,
4902
94.0k
            cu_pos_y,
4903
94.0k
            (cu_size >> 2),
4904
94.0k
            (cu_size >> 2));
4905
4906
        /* call the entropy rdo encode to get the bit estimate for current cu */
4907
94.0k
        header_bits = ihevce_entropy_rdo_encode_cu(
4908
94.0k
            &ps_ctxt->s_rdopt_entropy_ctxt,
4909
94.0k
            ps_final_prms,
4910
94.0k
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
4911
94.0k
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
4912
94.0k
            cu_size,
4913
94.0k
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
4914
94.0k
                                           : s_nbr.u1_top_avail,
4915
94.0k
            s_nbr.u1_left_avail,
4916
94.0k
            &ps_final_prms->pu1_cu_coeffs[0],
4917
94.0k
            &cbf_bits);
4918
4919
94.0k
        cu_bits += header_bits;
4920
4921
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4922
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4923
94.0k
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4924
94.0k
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4925
4926
94.0k
#if RDOPT_ENABLE
4927
        /* add the cost of coding the header bits */
4928
94.0k
        total_rdopt_cost +=
4929
94.0k
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4930
4931
94.0k
#if ENABLE_INTER_ZCU_COST
4932
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
4933
94.0k
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
4934
36.1k
        {
4935
36.1k
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
4936
4937
36.1k
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
4938
30.6k
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
4939
4940
36.1k
            cab_ctxt_t *ps_cab_ctxt =
4941
36.1k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
4942
4943
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
4944
36.1k
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
4945
4946
            /* account for coding qt_root_cbf = 0 */
4947
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
4948
36.1k
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
4949
36.1k
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
4950
6
                u4_cu_hdr_bits_q12 = 0;
4951
36.1k
            else
4952
36.1k
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
4953
4954
            /* add the cost of coding the header bits */
4955
36.1k
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
4956
36.1k
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
4957
36.1k
                ps_ctxt->i8_cl_ssd_lambda_qf,
4958
36.1k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
4959
4960
36.1k
            if(ps_ctxt->u1_enable_psyRDOPT)
4961
0
            {
4962
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
4963
0
            }
4964
4965
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
4966
36.1k
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
4967
265
            {
4968
265
                WORD32 tx_size;
4969
4970
                /* force cu as not coded and update the cost */
4971
265
                ps_final_prms->u1_is_cu_coded = 0;
4972
265
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4973
265
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4974
4975
265
                total_rdopt_cost = i8_cu_not_coded_cost;
4976
4977
                /* reset num TUs to 1 unless cu size id 64 */
4978
265
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
4979
265
                trans_size = (64 == cu_size) ? 32 : cu_size;
4980
265
                GETRANGE(tx_size, trans_size);
4981
4982
                /* reset the bytes consumed */
4983
265
                ps_final_prms->i4_num_bytes_ecd_data = 0;
4984
4985
                /* reset texture related bits and roll back header bits*/
4986
265
                ps_final_prms->u4_cu_cbf_bits = 0;
4987
265
                ps_final_prms->u4_cu_luma_res_bits = 0;
4988
265
                ps_final_prms->u4_cu_chroma_res_bits = 0;
4989
265
                ps_final_prms->u4_cu_hdr_bits =
4990
265
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
4991
4992
                /* update cabac model with qtroot cbf = 0 decision */
4993
265
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
4994
265
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
4995
4996
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
4997
265
                memcpy(
4998
265
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
4999
265
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5000
265
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5001
5002
                /* mark all tus as not coded for final eval */
5003
542
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5004
277
                {
5005
277
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5006
277
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5007
5008
277
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5009
277
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5010
5011
277
                    num_4x4_in_tu = trans_size >> 2;
5012
5013
277
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5014
277
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5015
277
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5016
5017
277
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5018
277
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5019
277
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5020
5021
277
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5022
277
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5023
5024
277
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5025
277
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5026
277
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5027
5028
                    /* reset cbf for the all 4x4 in TU */
5029
277
                    {
5030
277
                        WORD32 i, j;
5031
277
                        nbr_4x4_t *ps_tmp_4x4;
5032
277
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5033
5034
1.71k
                        for(i = 0; i < num_4x4_in_tu; i++)
5035
1.43k
                        {
5036
10.3k
                            for(j = 0; j < num_4x4_in_tu; j++)
5037
8.92k
                            {
5038
8.92k
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5039
8.92k
                            }
5040
                            /* row level update*/
5041
1.43k
                            ps_tmp_4x4 += num_4x4_in_cu;
5042
1.43k
                        }
5043
277
                    }
5044
277
                }
5045
265
            }
5046
36.1k
        }
5047
94.0k
#endif /* ENABLE_INTER_ZCU_COST */
5048
5049
94.0k
#endif /* RDOPT_ENABLE */
5050
94.0k
    }
5051
5052
94.0k
    return (total_rdopt_cost);
5053
103k
}
5054
5055
#if ENABLE_RDO_BASED_TU_RECURSION
5056
LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
5057
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5058
    enc_loop_cu_prms_t *ps_cu_prms,
5059
    void *pv_src,
5060
    WORD32 cu_size,
5061
    WORD32 cu_pos_x,
5062
    WORD32 cu_pos_y,
5063
    WORD32 curr_buf_idx,
5064
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
5065
    cu_inter_cand_t *ps_inter_cand,
5066
    cu_analyse_t *ps_cu_analyse,
5067
    WORD32 i4_alpha_stim_multiplier)
5068
172k
{
5069
172k
    tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
5070
172k
    buffer_data_for_tu_t s_buffer_data_for_tu;
5071
172k
    enc_loop_cu_final_prms_t *ps_final_prms;
5072
172k
    nbr_4x4_t *ps_nbr_4x4;
5073
5074
172k
    WORD32 num_split_flags = 1;
5075
172k
    UWORD8 u1_tu_size;
5076
172k
    UWORD8 *pu1_pred;
5077
172k
    UWORD8 *pu1_ecd_data;
5078
172k
    WORD16 *pi2_deq_data;
5079
172k
    UWORD8 *pu1_csbf_buf;
5080
172k
    UWORD8 *pu1_tu_sz_sft;
5081
172k
    UWORD8 *pu1_tu_posx;
5082
172k
    UWORD8 *pu1_tu_posy;
5083
172k
    LWORD64 total_rdopt_cost;
5084
172k
    WORD32 ctr;
5085
172k
    WORD32 chrm_ctr;
5086
172k
    WORD32 pred_stride;
5087
172k
    WORD32 recon_stride;
5088
172k
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
5089
172k
    WORD32 csbf_strd;
5090
172k
    WORD32 ecd_data_bytes_cons;
5091
172k
    WORD32 num_4x4_in_cu;
5092
172k
    WORD32 num_4x4_in_tu;
5093
172k
    WORD32 recon_func_mode;
5094
172k
    WORD32 cu_bits;
5095
172k
    UWORD8 u1_compute_spatial_ssd;
5096
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5097
172k
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
5098
5099
172k
    WORD32 i4_min_trans_size = 256;
5100
172k
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
5101
172k
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
5102
    /* model for no residue syntax qt root cbf flag */
5103
172k
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
5104
172k
    UWORD8 u1_skip_tu_sz_sft = 0;
5105
172k
    UWORD8 u1_skip_tu_posx = 0;
5106
172k
    UWORD8 u1_skip_tu_posy = 0;
5107
172k
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
5108
5109
172k
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5110
172k
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5111
172k
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
5112
172k
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
5113
172k
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
5114
172k
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
5115
172k
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5116
172k
    recon_stride = cu_size;
5117
172k
    pu1_pred = ps_inter_cand->pu1_pred_data;
5118
172k
    chrm_ctr = 0;
5119
172k
    ecd_data_bytes_cons = 0;
5120
172k
    total_rdopt_cost = 0;
5121
172k
    num_4x4_in_cu = cu_size >> 2;
5122
172k
    recon_func_mode = PRED_MODE_INTER;
5123
172k
    cu_bits = 0;
5124
5125
    /* get the 4x4 level postion of current cu */
5126
172k
    cu_pos_x = cu_pos_x << 1;
5127
172k
    cu_pos_y = cu_pos_y << 1;
5128
5129
172k
    ps_final_prms->u1_is_cu_coded = 0;
5130
172k
    ps_final_prms->u4_cu_sad = 0;
5131
5132
    /* populate the coeffs scan idx */
5133
172k
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
5134
5135
172k
#if ENABLE_INTER_ZCU_COST
5136
    /* reset cu not coded cost */
5137
172k
    ps_ctxt->i8_cu_not_coded_cost = 0;
5138
5139
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5140
172k
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
5141
172k
#endif
5142
5143
172k
    if(ps_cu_analyse->u1_cu_size == 64)
5144
1.25k
    {
5145
1.25k
        num_split_flags = 4;
5146
1.25k
        u1_tu_size = 32;
5147
1.25k
    }
5148
171k
    else
5149
171k
    {
5150
171k
        num_split_flags = 1;
5151
171k
        u1_tu_size = ps_cu_analyse->u1_cu_size;
5152
171k
    }
5153
5154
172k
    if(1 == ps_final_prms->u1_skip_flag)
5155
52.9k
    {
5156
52.9k
        if(64 == cu_size)
5157
338
        {
5158
            /* TU = CU/2 is set but no trnaform is evaluated  */
5159
338
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5160
338
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5161
338
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5162
338
        }
5163
52.5k
        else
5164
52.5k
        {
5165
            /* TU = CU is set but no trnaform is evaluated  */
5166
52.5k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5167
52.5k
            pu1_tu_posx = &u1_skip_tu_posx;
5168
52.5k
            pu1_tu_posy = &u1_skip_tu_posy;
5169
52.5k
        }
5170
5171
52.9k
        recon_func_mode = PRED_MODE_SKIP;
5172
52.9k
    }
5173
    /* check for PU part mode being AMP or No AMP */
5174
119k
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
5175
93.9k
    {
5176
93.9k
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
5177
77.6k
        {
5178
            /* TU= CU is evaluated 2Nx2N inter case */
5179
77.6k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5180
77.6k
            pu1_tu_posx = &u1_skip_tu_posx;
5181
77.6k
            pu1_tu_posy = &u1_skip_tu_posy;
5182
77.6k
        }
5183
16.3k
        else
5184
16.3k
        {
5185
            /* currently TU= CU/2 is evaluated for all inter case */
5186
16.3k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5187
16.3k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5188
16.3k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5189
16.3k
        }
5190
93.9k
    }
5191
25.6k
    else
5192
25.6k
    {
5193
        /* for AMP cases one level of TU recurssion is done */
5194
        /* based on oreintation of the partitions           */
5195
25.6k
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5196
25.6k
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5197
25.6k
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5198
25.6k
    }
5199
5200
172k
    i4_min_trans_size = 4;
5201
5202
172k
    if(ps_ctxt->i1_cu_qp_delta_enable)
5203
73.8k
    {
5204
73.8k
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
5205
73.8k
    }
5206
5207
172k
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
5208
0
    {
5209
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
5210
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
5211
0
             100.0f);
5212
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
5213
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
5214
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
5215
0
    }
5216
5217
172k
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
5218
67.8k
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
5219
67.8k
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5220
5221
172k
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
5222
0
    {
5223
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
5224
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5225
0
    }
5226
5227
172k
    if(!u1_compute_spatial_ssd)
5228
104k
    {
5229
104k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5230
104k
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5231
104k
    }
5232
67.8k
    else
5233
67.8k
    {
5234
67.8k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
5235
5236
67.8k
        if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5237
0
        {
5238
0
            ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
5239
0
        }
5240
67.8k
    }
5241
5242
    /* RDOPT copy States :  TU init (best until prev TU) to current */
5243
172k
    memcpy(
5244
172k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5245
172k
             .s_cabac_ctxt.au1_ctxt_models[0],
5246
172k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
5247
172k
        IHEVC_CAB_COEFFX_PREFIX);
5248
5249
172k
    ihevce_tu_tree_init(
5250
172k
        as_tu_nodes,
5251
172k
        cu_size,
5252
172k
        (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
5253
172k
        ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
5254
172k
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5255
172k
        ps_ctxt->u1_chroma_array_type == 2);
5256
5257
172k
    if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
5258
0
    {
5259
0
        ihevce_tuSplitArray_to_tuTree_mapper(
5260
0
            as_tu_nodes,
5261
0
            ps_inter_cand->ai4_tu_split_flag,
5262
0
            cu_size,
5263
0
            cu_size,
5264
0
            MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
5265
0
            MIN(MAX_TU_SIZE, cu_size),
5266
0
            ps_inter_cand->b1_skip_flag);
5267
0
    }
5268
5269
172k
    ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
5270
5271
172k
#if ENABLE_INTER_ZCU_COST
5272
172k
    ps_ctxt->i8_cu_not_coded_cost = 0;
5273
172k
#endif
5274
5275
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
5276
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
5277
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
5278
172k
        ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
5279
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
5280
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
5281
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
5282
172k
        ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5283
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
5284
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
5285
172k
        ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
5286
172k
        curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
5287
172k
                                                              (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
5288
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
5289
172k
        ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
5290
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
5291
172k
        ps_chrm_cu_buf_prms->i4_chrm_src_stride;
5292
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
5293
172k
        ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
5294
172k
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
5295
172k
        ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
5296
172k
    s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
5297
172k
    s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
5298
172k
    s_buffer_data_for_tu.pi2_deq_data_chroma =
5299
172k
        pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
5300
172k
    s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
5301
172k
    s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
5302
172k
    s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
5303
172k
    s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
5304
5305
172k
    if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5306
0
    {
5307
0
        UWORD8 i;
5308
5309
0
        UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
5310
5311
0
        for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
5312
0
        {
5313
0
            pu_t *ps_pu;
5314
5315
0
            WORD32 inter_pu_wd;
5316
0
            WORD32 inter_pu_ht;
5317
5318
0
            ps_pu = ps_inter_cand->as_inter_pu + i;
5319
5320
0
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
5321
0
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
5322
0
            inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
5323
0
            ihevce_chroma_inter_pred_pu(
5324
0
                &ps_ctxt->s_mc_ctxt,
5325
0
                ps_pu,
5326
0
                pu1_pred,
5327
0
                s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5328
0
            if(!!ps_inter_cand->b3_part_size)
5329
0
            {
5330
                /* 2Nx__ partion case */
5331
0
                if(inter_pu_wd == cu_size)
5332
0
                {
5333
0
                    pu1_pred +=
5334
0
                        (inter_pu_ht *
5335
0
                         s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5336
0
                }
5337
5338
                /* __x2N partion case */
5339
0
                if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
5340
0
                {
5341
0
                    pu1_pred += inter_pu_wd;
5342
0
                }
5343
0
            }
5344
0
        }
5345
0
    }
5346
5347
#if !ENABLE_TOP_DOWN_TU_RECURSION
5348
    total_rdopt_cost = ihevce_tu_tree_selector(
5349
        ps_ctxt,
5350
        as_tu_nodes,
5351
        &s_buffer_data_for_tu,
5352
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5353
             .s_cabac_ctxt.au1_ctxt_models[0],
5354
        recon_func_mode,
5355
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5356
        i4_alpha_stim_multiplier,
5357
        u1_is_cu_noisy,
5358
#endif
5359
        0,
5360
        ps_ctxt->u1_max_inter_tr_depth,
5361
        ps_inter_cand->b3_part_size,
5362
        u1_compute_spatial_ssd);
5363
#else
5364
172k
    total_rdopt_cost = ihevce_topDown_tu_tree_selector(
5365
172k
        ps_ctxt,
5366
172k
        as_tu_nodes,
5367
172k
        &s_buffer_data_for_tu,
5368
172k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5369
172k
             .s_cabac_ctxt.au1_ctxt_models[0],
5370
172k
        recon_func_mode,
5371
172k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5372
172k
        i4_alpha_stim_multiplier,
5373
172k
        u1_is_cu_noisy,
5374
172k
#endif
5375
172k
        0,
5376
172k
        ps_ctxt->u1_max_inter_tr_depth,
5377
172k
        ps_inter_cand->b3_part_size,
5378
172k
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5379
172k
        u1_compute_spatial_ssd);
5380
172k
#endif
5381
5382
172k
    ps_final_prms->u2_num_tus_in_cu = 0;
5383
172k
    ps_final_prms->u4_cu_luma_res_bits = 0;
5384
172k
    ps_final_prms->u4_cu_sad = 0;
5385
172k
    total_rdopt_cost = 0;
5386
172k
    ecd_data_bytes_cons = 0;
5387
172k
    cu_bits = 0;
5388
172k
#if ENABLE_INTER_ZCU_COST
5389
172k
    ps_ctxt->i8_cu_not_coded_cost = 0;
5390
172k
#endif
5391
172k
    ps_final_prms->u1_is_cu_coded = 0;
5392
172k
    ps_final_prms->u1_cu_size = cu_size;
5393
5394
172k
    ihevce_tu_selector_debriefer(
5395
172k
        as_tu_nodes,
5396
172k
        ps_final_prms,
5397
172k
        &total_rdopt_cost,
5398
172k
#if ENABLE_INTER_ZCU_COST
5399
172k
        &ps_ctxt->i8_cu_not_coded_cost,
5400
172k
#endif
5401
172k
        &ecd_data_bytes_cons,
5402
172k
        &cu_bits,
5403
172k
        &ps_final_prms->u2_num_tus_in_cu,
5404
172k
        ps_ctxt->i4_cu_qp,
5405
172k
        cu_pos_x * 4,
5406
172k
        cu_pos_y * 4,
5407
172k
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5408
172k
        (ps_ctxt->u1_chroma_array_type == 2),
5409
172k
        POS_TL);
5410
5411
172k
    if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5412
172k
    {
5413
172k
        ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
5414
172k
    }
5415
5416
    /* Modify the cost function for this CU. */
5417
    /* loop in for 8x8 blocks */
5418
172k
    if(ps_ctxt->u1_enable_psyRDOPT)
5419
0
    {
5420
0
        UWORD8 *pu1_recon_cu;
5421
0
        WORD32 recon_stride;
5422
0
        WORD32 curr_pos_x;
5423
0
        WORD32 curr_pos_y;
5424
0
        WORD32 start_index;
5425
0
        WORD32 num_horz_cu_in_ctb;
5426
0
        WORD32 had_block_size;
5427
5428
        /* tODO: sreenivasa ctb size has to be used appropriately */
5429
0
        had_block_size = 8;
5430
0
        num_horz_cu_in_ctb = 64 / had_block_size;
5431
5432
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
5433
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
5434
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5435
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
5436
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
5437
        //+ \curr_pos_x + curr_pos_y * recon_stride;
5438
5439
        /* start index to index the source satd of curr cu int he current ctb*/
5440
0
        start_index =
5441
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
5442
5443
0
        {
5444
0
            total_rdopt_cost += ihevce_psy_rd_cost(
5445
0
                ps_ctxt->ai4_source_satd_8x8,
5446
0
                pu1_recon_cu,
5447
0
                recon_stride,
5448
0
                1,  //howz stride
5449
0
                cu_size,
5450
0
                0,  // pic type
5451
0
                0,  //layer id
5452
0
                ps_ctxt->i4_satd_lamda,  // lambda
5453
0
                start_index,
5454
0
                ps_ctxt->u1_is_input_data_hbd,
5455
0
                ps_ctxt->u4_psy_strength,
5456
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
5457
0
        }
5458
0
    }
5459
5460
172k
    ps_final_prms->u1_chroma_intra_pred_mode = 4;
5461
5462
    /* update the bytes consumed */
5463
172k
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
5464
5465
    /* store the current cu size to final prms */
5466
172k
    ps_final_prms->u1_cu_size = cu_size;
5467
    /* ------------- Chroma processing -------------- */
5468
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
5469
172k
    if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
5470
172k
       !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5471
172k
    {
5472
172k
        LWORD64 chrm_rdopt_cost;
5473
172k
        WORD32 chrm_rdopt_tu_bits;
5474
5475
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
5476
172k
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
5477
5478
172k
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
5479
172k
            ps_ctxt,
5480
172k
            curr_buf_idx,
5481
172k
            0, /* TU mode : Don't care in Inter patrh */
5482
172k
            ps_chrm_cu_buf_prms->pu1_curr_src,
5483
172k
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
5484
172k
            ps_chrm_cu_buf_prms->pu1_cu_left,
5485
172k
            ps_chrm_cu_buf_prms->pu1_cu_top,
5486
172k
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
5487
172k
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
5488
172k
            (cu_pos_x >> 1),
5489
172k
            (cu_pos_y >> 1),
5490
172k
            &chrm_rdopt_tu_bits,
5491
172k
            i4_alpha_stim_multiplier,
5492
172k
            u1_is_cu_noisy);
5493
5494
172k
#if WEIGH_CHROMA_COST
5495
172k
        chrm_rdopt_cost = (LWORD64)(
5496
172k
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
5497
172k
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
5498
172k
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
5499
172k
#endif
5500
5501
172k
#if CHROMA_RDOPT_ENABLE
5502
172k
        total_rdopt_cost += chrm_rdopt_cost;
5503
172k
#endif
5504
172k
        cu_bits += chrm_rdopt_tu_bits;
5505
5506
        /* during chroma evaluation if skip decision was over written     */
5507
        /* then the current skip candidate is set to a non skip candidate */
5508
172k
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
5509
5510
        /* cu bits for chroma residual if chroma rdopt is on       */
5511
        /* if zero_cbf eval is disabled then cu bits will be zero  */
5512
172k
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
5513
5514
172k
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
5515
172k
        {
5516
            /* Early exit : If the current running cost exceeds
5517
            the prev. best mode cost, break */
5518
172k
            if(total_rdopt_cost > prev_best_rdopt_cost)
5519
60.9k
            {
5520
60.9k
                return (total_rdopt_cost);
5521
60.9k
            }
5522
172k
        }
5523
172k
    }
5524
0
    else
5525
0
    {}
5526
5527
111k
#if SHRINK_INTER_TUTREE
5528
    /* ------------- Quadtree TU split  optimization ------------  */
5529
111k
    if(ps_final_prms->u1_is_cu_coded)
5530
60.9k
    {
5531
60.9k
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
5532
60.9k
            &ps_final_prms->as_tu_enc_loop[0],
5533
60.9k
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
5534
60.9k
            &ps_final_prms->s_recon_datastore,
5535
60.9k
            ps_final_prms->u2_num_tus_in_cu,
5536
60.9k
            (ps_ctxt->u1_chroma_array_type == 2));
5537
60.9k
    }
5538
111k
#endif
5539
5540
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
5541
111k
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
5542
111k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5543
111k
                .s_cabac_ctxt.au1_ctxt_models[0] +
5544
111k
            IHEVC_CAB_COEFFX_PREFIX,
5545
111k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
5546
111k
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
5547
5548
    /* -------- Bit estimate for RD opt -------------- */
5549
111k
    {
5550
111k
        nbr_avail_flags_t s_nbr;
5551
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
5552
111k
        WORD32 cbf_bits, header_bits;
5553
5554
        /* get the neighbour availability flags for current cu  */
5555
111k
        ihevce_get_only_nbr_flag(
5556
111k
            &s_nbr,
5557
111k
            ps_ctxt->pu1_ctb_nbr_map,
5558
111k
            ps_ctxt->i4_nbr_map_strd,
5559
111k
            cu_pos_x,
5560
111k
            cu_pos_y,
5561
111k
            (cu_size >> 2),
5562
111k
            (cu_size >> 2));
5563
5564
        /* call the entropy rdo encode to get the bit estimate for current cu */
5565
111k
        header_bits = ihevce_entropy_rdo_encode_cu(
5566
111k
            &ps_ctxt->s_rdopt_entropy_ctxt,
5567
111k
            ps_final_prms,
5568
111k
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
5569
111k
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
5570
111k
            cu_size,
5571
111k
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
5572
111k
                                           : s_nbr.u1_top_avail,
5573
111k
            s_nbr.u1_left_avail,
5574
111k
            &ps_final_prms->pu1_cu_coeffs[0],
5575
111k
            &cbf_bits);
5576
5577
111k
        cu_bits += header_bits;
5578
5579
        /* cbf bits are excluded from header bits, instead considered as texture bits */
5580
        /* incase if zero cbf eval is disabled then texture bits gets added here */
5581
111k
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
5582
111k
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
5583
5584
111k
#if RDOPT_ENABLE
5585
        /* add the cost of coding the header bits */
5586
111k
        total_rdopt_cost +=
5587
111k
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
5588
5589
111k
#if ENABLE_INTER_ZCU_COST
5590
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
5591
111k
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
5592
60.9k
        {
5593
60.9k
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
5594
5595
60.9k
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
5596
37.8k
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
5597
5598
60.9k
            cab_ctxt_t *ps_cab_ctxt =
5599
60.9k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
5600
5601
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
5602
60.9k
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
5603
5604
            /* account for coding qt_root_cbf = 0 */
5605
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
5606
60.9k
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
5607
60.9k
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
5608
211
                u4_cu_hdr_bits_q12 = 0;
5609
60.7k
            else
5610
60.7k
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
5611
5612
            /* add the cost of coding the header bits */
5613
60.9k
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
5614
60.9k
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
5615
60.9k
                ps_ctxt->i8_cl_ssd_lambda_qf,
5616
60.9k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
5617
5618
60.9k
            if(ps_ctxt->u1_enable_psyRDOPT)
5619
0
            {
5620
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
5621
0
            }
5622
5623
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
5624
60.9k
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
5625
96
            {
5626
96
                WORD32 tx_size;
5627
5628
                /* force cu as not coded and update the cost */
5629
96
                ps_final_prms->u1_is_cu_coded = 0;
5630
96
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5631
96
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5632
5633
96
                total_rdopt_cost = i8_cu_not_coded_cost;
5634
5635
                /* reset num TUs to 1 unless cu size id 64 */
5636
96
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
5637
96
                trans_size = (64 == cu_size) ? 32 : cu_size;
5638
96
                GETRANGE(tx_size, trans_size);
5639
5640
                /* reset the bytes consumed */
5641
96
                ps_final_prms->i4_num_bytes_ecd_data = 0;
5642
5643
                /* reset texture related bits and roll back header bits*/
5644
96
                ps_final_prms->u4_cu_cbf_bits = 0;
5645
96
                ps_final_prms->u4_cu_luma_res_bits = 0;
5646
96
                ps_final_prms->u4_cu_chroma_res_bits = 0;
5647
96
                ps_final_prms->u4_cu_hdr_bits =
5648
96
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
5649
5650
                /* update cabac model with qtroot cbf = 0 decision */
5651
96
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
5652
96
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
5653
5654
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
5655
96
                memcpy(
5656
96
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5657
96
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5658
96
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5659
5660
                /* mark all tus as not coded for final eval */
5661
192
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5662
96
                {
5663
96
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5664
96
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5665
5666
96
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5667
96
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5668
5669
96
                    num_4x4_in_tu = trans_size >> 2;
5670
5671
96
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5672
96
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5673
96
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5674
5675
96
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5676
96
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5677
96
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5678
5679
96
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5680
96
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5681
5682
96
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5683
96
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5684
96
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5685
5686
                    /* reset cbf for the all 4x4 in TU */
5687
96
                    {
5688
96
                        WORD32 i, j;
5689
96
                        nbr_4x4_t *ps_tmp_4x4;
5690
96
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5691
5692
512
                        for(i = 0; i < num_4x4_in_tu; i++)
5693
416
                        {
5694
2.64k
                            for(j = 0; j < num_4x4_in_tu; j++)
5695
2.23k
                            {
5696
2.23k
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5697
2.23k
                            }
5698
                            /* row level update*/
5699
416
                            ps_tmp_4x4 += num_4x4_in_cu;
5700
416
                        }
5701
96
                    }
5702
96
                }
5703
96
            }
5704
60.9k
        }
5705
111k
#endif /* ENABLE_INTER_ZCU_COST */
5706
5707
111k
#endif /* RDOPT_ENABLE */
5708
111k
    }
5709
5710
111k
    return (total_rdopt_cost);
5711
172k
}
5712
#endif
5713
5714
/*!
5715
******************************************************************************
5716
* \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
5717
*
5718
* \brief
5719
*    Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
5720
*
5721
* \param[in] ps_ctxt       enc_loop module ctxt pointer
5722
* \param[in] ps_inter_cand pointer to inter candidate structure
5723
* \param[in] cu_size         Current CU size
5724
* \param[in] cu_pos_x        cu position x w.r.t to ctb
5725
* \param[in] cu_pos_y        cu position y w.r.t to ctb
5726
* \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
5727
* \param[in] ps_top_nbr_4x4  top neighbour 4x4 structure pointer
5728
* \param[in] ps_topleft_nbr_4x4  top left neighbour 4x4 structure pointer
5729
* \param[in] nbr_4x4_left_strd  left neighbour 4x4 buffer stride
5730
* \param[in] curr_buf_idx Current Buffer index
5731
*
5732
* \return
5733
*    Rdopt cost
5734
*
5735
* \author
5736
*  Ittiam
5737
*
5738
*****************************************************************************
5739
*/
5740
LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
5741
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5742
    cu_inter_cand_t *ps_inter_cand,
5743
    WORD32 cu_size,
5744
    WORD32 cu_pos_x,
5745
    WORD32 cu_pos_y,
5746
    nbr_4x4_t *ps_left_nbr_4x4,
5747
    nbr_4x4_t *ps_top_nbr_4x4,
5748
    nbr_4x4_t *ps_topleft_nbr_4x4,
5749
    WORD32 nbr_4x4_left_strd,
5750
    WORD32 curr_buf_idx)
5751
301k
{
5752
    /* local variables */
5753
301k
    enc_loop_cu_final_prms_t *ps_final_prms;
5754
301k
    nbr_avail_flags_t s_nbr;
5755
301k
    nbr_4x4_t *ps_nbr_4x4;
5756
5757
301k
    UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
5758
301k
    UWORD8 *pu1_pred;
5759
301k
    WORD32 rdopt_cost;
5760
301k
    WORD32 ctr;
5761
301k
    WORD32 num_cu_part;
5762
301k
    WORD32 inter_pu_wd;
5763
301k
    WORD32 inter_pu_ht;
5764
301k
    WORD32 pred_stride;
5765
5766
    /* get the pointers based on curbuf idx */
5767
301k
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5768
301k
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5769
301k
    pu1_pred = ps_inter_cand->pu1_pred_data;
5770
5771
301k
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5772
5773
    /* store the partition mode in final prms */
5774
301k
    ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
5775
5776
    /* since encoder does not support NXN part type */
5777
    /* num parts can be either 1 or 2 only          */
5778
301k
    ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
5779
5780
301k
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
5781
5782
    /* get the 4x4 level position of current cu */
5783
301k
    cu_pos_x = cu_pos_x << 1;
5784
301k
    cu_pos_y = cu_pos_y << 1;
5785
5786
    /* populate cu level params */
5787
301k
    ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
5788
301k
    ps_final_prms->u2_num_pus_in_cu = num_cu_part;
5789
5790
    /* run a loop over all the partitons in cu */
5791
654k
    for(ctr = 0; ctr < num_cu_part; ctr++)
5792
352k
    {
5793
352k
        pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
5794
352k
        pu_t *ps_pu;
5795
352k
        WORD32 skip_or_merge_flag;
5796
352k
        UWORD8 u1_use_mvp_from_top_row;
5797
5798
352k
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
5799
5800
        /* IF AMP then each partitions can have diff wd ht */
5801
352k
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
5802
352k
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
5803
5804
        /* populate reference pic buf id for bs compute */
5805
5806
        /* L0 */
5807
352k
        if(-1 != ps_pu->mv.i1_l0_ref_idx)
5808
346k
        {
5809
346k
            ps_pu->mv.i1_l0_ref_pic_buf_id =
5810
346k
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
5811
346k
        }
5812
5813
        /* L1 */
5814
352k
        if(-1 != ps_pu->mv.i1_l1_ref_idx)
5815
58.6k
        {
5816
58.6k
            ps_pu->mv.i1_l1_ref_pic_buf_id =
5817
58.6k
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
5818
58.6k
        }
5819
5820
        /* SKIP or merge check for every part */
5821
352k
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
5822
5823
        /* ----------- MV Prediction ----------------- */
5824
352k
        if(0 == skip_or_merge_flag)
5825
122k
        {
5826
            /* get the neighbour availability flags */
5827
122k
            ihevce_get_only_nbr_flag(
5828
122k
                &s_nbr,
5829
122k
                ps_ctxt->pu1_ctb_nbr_map,
5830
122k
                ps_ctxt->i4_nbr_map_strd,
5831
122k
                cu_pos_x,
5832
122k
                cu_pos_y,
5833
122k
                inter_pu_wd >> 2,
5834
122k
                inter_pu_ht >> 2);
5835
5836
122k
            if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
5837
0
            {
5838
0
                u1_use_mvp_from_top_row = 0;
5839
0
            }
5840
122k
            else
5841
122k
            {
5842
122k
                u1_use_mvp_from_top_row = 1;
5843
122k
            }
5844
5845
122k
            if(!u1_use_mvp_from_top_row)
5846
0
            {
5847
0
                if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
5848
0
                {
5849
0
                    if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
5850
0
                    {
5851
0
                        WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
5852
5853
                        /* Ensure Top Right Sync */
5854
0
                        if(!ps_ctxt->u1_use_top_at_ctb_boundary)
5855
0
                        {
5856
0
                            curr_cu_pos_in_row =
5857
0
                                ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
5858
5859
0
                            if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
5860
0
                            {
5861
                                /* No wait for 1st row */
5862
0
                                cu_top_right_offset = -(MAX_CTB_SIZE);
5863
0
                                {
5864
0
                                    ihevce_tile_params_t *ps_col_tile_params =
5865
0
                                        ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
5866
0
                                         ps_ctxt->i4_tile_col_idx);
5867
5868
                                    /* No wait for 1st row */
5869
0
                                    cu_top_right_offset =
5870
0
                                        -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
5871
0
                                }
5872
0
                                cu_top_right_dep_pos = 0;
5873
0
                            }
5874
0
                            else
5875
0
                            {
5876
0
                                cu_top_right_offset = (cu_size) + 4;
5877
0
                                cu_top_right_dep_pos =
5878
0
                                    (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
5879
0
                            }
5880
5881
0
                            ihevce_dmgr_chk_row_row_sync(
5882
0
                                ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
5883
0
                                curr_cu_pos_in_row,
5884
0
                                cu_top_right_offset,
5885
0
                                cu_top_right_dep_pos,
5886
0
                                ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
5887
0
                                ps_ctxt->thrd_id);
5888
0
                        }
5889
5890
0
                        u1_use_mvp_from_top_row = 1;
5891
0
                    }
5892
0
                    else
5893
0
                    {
5894
0
                        s_nbr.u1_top_avail = 0;
5895
0
                        s_nbr.u1_top_lt_avail = 0;
5896
0
                        s_nbr.u1_top_rt_avail = 0;
5897
0
                    }
5898
0
                }
5899
0
                else
5900
0
                {
5901
0
                    u1_use_mvp_from_top_row = 1;
5902
0
                }
5903
0
            }
5904
            /* Call the MV prediction module to get MVP */
5905
122k
            ihevce_mv_pred(
5906
122k
                &ps_ctxt->s_mv_pred_ctxt,
5907
122k
                ps_top_nbr_4x4,
5908
122k
                ps_left_nbr_4x4,
5909
122k
                ps_topleft_nbr_4x4,
5910
122k
                nbr_4x4_left_strd,
5911
122k
                &s_nbr,
5912
122k
                NULL, /* colocated MV */
5913
122k
                ps_pu,
5914
122k
                &as_pred_mv[0],
5915
122k
                au1_is_top_used);
5916
122k
        }
5917
5918
        /* store the nbr 4x4 structure */
5919
352k
        ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
5920
352k
        ps_nbr_4x4->b1_intra_flag = 0;
5921
352k
        ps_nbr_4x4->b1_pred_l0_flag = 0;
5922
352k
        ps_nbr_4x4->b1_pred_l1_flag = 0;
5923
5924
        /* DC is default mode for inter cu, required for intra mode signalling */
5925
352k
        ps_nbr_4x4->b6_luma_intra_mode = 1;
5926
5927
        /* copy the motion vectors to neighbour structure */
5928
352k
        ps_nbr_4x4->mv = ps_pu->mv;
5929
5930
        /* copy the PU to final out pu */
5931
352k
        ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
5932
5933
        /* copy the PU to chroma */
5934
352k
        ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
5935
5936
        /* store the skip flag to final prms */
5937
352k
        ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
5938
5939
        /* MVP index & MVD calc is gated on skip/merge flag */
5940
352k
        if(0 == skip_or_merge_flag)
5941
122k
        {
5942
            /* calculate the MVDs and popluate the MVP idx for L0 */
5943
122k
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
5944
119k
            {
5945
119k
                WORD32 idx0_cost, idx1_cost;
5946
5947
                /* calculate the ABS mvd for cand 0 */
5948
119k
                idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
5949
119k
                idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
5950
5951
                /* calculate the ABS mvd for cand 1 */
5952
119k
                if(u1_use_mvp_from_top_row)
5953
119k
                {
5954
119k
                    idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
5955
119k
                    idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
5956
119k
                }
5957
0
                else
5958
0
                {
5959
0
                    idx1_cost = INT_MAX;
5960
0
                }
5961
5962
                /* based on the least cost choose the mvp idx */
5963
119k
                if(idx0_cost <= idx1_cost)
5964
93.8k
                {
5965
93.8k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5966
93.8k
                        as_pred_mv[0].s_l0_mv.i2_mvx;
5967
93.8k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5968
93.8k
                        as_pred_mv[0].s_l0_mv.i2_mvy;
5969
5970
93.8k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
5971
93.8k
                }
5972
25.5k
                else
5973
25.5k
                {
5974
25.5k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5975
25.5k
                        as_pred_mv[1].s_l0_mv.i2_mvx;
5976
25.5k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5977
25.5k
                        as_pred_mv[1].s_l0_mv.i2_mvy;
5978
5979
25.5k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
5980
25.5k
                }
5981
5982
                /* set the pred l0 flag for neighbour storage */
5983
119k
                ps_nbr_4x4->b1_pred_l0_flag = 1;
5984
119k
            }
5985
            /* calculate the MVDs and popluate the MVP idx for L1 */
5986
122k
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
5987
5.01k
            {
5988
5.01k
                WORD32 idx0_cost, idx1_cost;
5989
5990
                /* calculate the ABS mvd for cand 0 */
5991
5.01k
                idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
5992
5.01k
                idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
5993
5994
                /* calculate the ABS mvd for cand 1 */
5995
5.01k
                if(u1_use_mvp_from_top_row)
5996
5.01k
                {
5997
5.01k
                    idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
5998
5.01k
                    idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
5999
5.01k
                }
6000
0
                else
6001
0
                {
6002
0
                    idx1_cost = INT_MAX;
6003
0
                }
6004
6005
                /* based on the least cost choose the mvp idx */
6006
5.01k
                if(idx0_cost <= idx1_cost)
6007
3.50k
                {
6008
3.50k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6009
3.50k
                        as_pred_mv[0].s_l1_mv.i2_mvx;
6010
3.50k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6011
3.50k
                        as_pred_mv[0].s_l1_mv.i2_mvy;
6012
6013
3.50k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
6014
3.50k
                }
6015
1.51k
                else
6016
1.51k
                {
6017
1.51k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6018
1.51k
                        as_pred_mv[1].s_l1_mv.i2_mvx;
6019
1.51k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6020
1.51k
                        as_pred_mv[1].s_l1_mv.i2_mvy;
6021
6022
1.51k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
6023
1.51k
                }
6024
6025
                /* set the pred l1 flag for neighbour storage */
6026
5.01k
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6027
5.01k
            }
6028
6029
            /* set the merge flag to 0 */
6030
122k
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
6031
122k
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
6032
122k
        }
6033
230k
        else
6034
230k
        {
6035
            /* copy the merge index from candidate */
6036
230k
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
6037
6038
230k
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
6039
6040
230k
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
6041
226k
            {
6042
                /* set the pred l0 flag for neighbour storage */
6043
226k
                ps_nbr_4x4->b1_pred_l0_flag = 1;
6044
226k
            }
6045
6046
            /* calculate the MVDs and popluate the MVP idx for L1 */
6047
230k
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
6048
13.7k
            {
6049
                /* set the pred l1 flag for neighbour storage */
6050
13.7k
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6051
13.7k
            }
6052
230k
        }
6053
6054
        /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
6055
352k
        rdopt_cost = 0;
6056
6057
        /* copy the MV to colocated Mv structure */
6058
352k
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
6059
352k
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
6060
352k
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
6061
352k
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
6062
352k
        ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
6063
352k
        ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
6064
6065
        /* replicate neighbour 4x4 strcuture for entire partition */
6066
352k
        {
6067
352k
            WORD32 i, j;
6068
352k
            nbr_4x4_t *ps_tmp_4x4;
6069
6070
352k
            ps_tmp_4x4 = ps_nbr_4x4;
6071
6072
1.49M
            for(i = 0; i < (inter_pu_ht >> 2); i++)
6073
1.14M
            {
6074
6.31M
                for(j = 0; j < (inter_pu_wd >> 2); j++)
6075
5.17M
                {
6076
5.17M
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
6077
5.17M
                }
6078
                /* row level update*/
6079
1.14M
                ps_tmp_4x4 += (cu_size >> 2);
6080
1.14M
            }
6081
352k
        }
6082
        /* set the neighbour map to 1 */
6083
352k
        ihevce_set_inter_nbr_map(
6084
352k
            ps_ctxt->pu1_ctb_nbr_map,
6085
352k
            ps_ctxt->i4_nbr_map_strd,
6086
352k
            cu_pos_x,
6087
352k
            cu_pos_y,
6088
352k
            (inter_pu_wd >> 2),
6089
352k
            (inter_pu_ht >> 2),
6090
352k
            1);
6091
        /* ----------- Motion Compensation for Luma ----------- */
6092
#if !ENABLE_MIXED_INTER_MODE_EVAL
6093
        {
6094
            IV_API_CALL_STATUS_T valid_mv_cand;
6095
6096
            /*If the inter candidate is neither merge cand nor skip cand
6097
            then calculate the mc.*/
6098
            if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
6099
            {
6100
                valid_mv_cand =
6101
                    ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
6102
6103
                /* assert if the MC is given a valid mv candidate */
6104
                ASSERT(valid_mv_cand == IV_SUCCESS);
6105
            }
6106
        }
6107
#endif
6108
352k
        if((2 == num_cu_part) && (0 == ctr))
6109
51.5k
        {
6110
            /* 2Nx__ partion case */
6111
51.5k
            if(inter_pu_wd == cu_size)
6112
39.5k
            {
6113
39.5k
                cu_pos_y += (inter_pu_ht >> 2);
6114
39.5k
                pu1_pred += (inter_pu_ht * pred_stride);
6115
39.5k
                ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
6116
39.5k
                ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
6117
39.5k
                ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
6118
39.5k
                ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
6119
39.5k
            }
6120
6121
            /* __x2N partion case */
6122
51.5k
            if(inter_pu_ht == cu_size)
6123
11.9k
            {
6124
11.9k
                cu_pos_x += (inter_pu_wd >> 2);
6125
11.9k
                pu1_pred += inter_pu_wd;
6126
11.9k
                ps_nbr_4x4 += (inter_pu_wd >> 2);
6127
11.9k
                ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
6128
11.9k
                ps_top_nbr_4x4 += (inter_pu_wd >> 2);
6129
11.9k
                ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
6130
11.9k
                nbr_4x4_left_strd = (cu_size >> 2);
6131
11.9k
            }
6132
51.5k
        }
6133
352k
    }
6134
6135
301k
    return (rdopt_cost);
6136
301k
}
6137
6138
/*!
6139
******************************************************************************
6140
* \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
6141
*
6142
* \brief
6143
*    Coding unit processing function for chroma special modes (Non-Luma modes)
6144
*
6145
* \param[in] ps_ctxt       enc_loop module ctxt pointer
6146
* \param[in] ps_chrm_cu_buf_prms    ctxt having chroma related prms
6147
* \param[in] ps_cu_analyse      pointer to cu analyse
6148
* \param[in] rd_opt_curr_idx    index in the array of RDopt params
6149
* \param[in] tu_mode            TU_EQ_CU or other case
6150
*
6151
* \return
6152
*    Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
6153
*
6154
* \author
6155
*  Ittiam
6156
*
6157
*****************************************************************************
6158
*/
6159
UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
6160
    cu_analyse_t *ps_cu_analyse,
6161
    ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
6162
    pf_intra_pred *ppf_chroma_ip,
6163
    pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
6164
    UWORD8 *pu1_src,
6165
    WORD32 i4_src_stride,
6166
    UWORD8 *pu1_pred,
6167
    WORD32 i4_pred_stride,
6168
    UWORD8 *pu1_ctb_nbr_map,
6169
    WORD32 i4_nbr_map_strd,
6170
    UWORD8 *pu1_ref_sub_out,
6171
    WORD32 i4_alpha_stim_multiplier,
6172
    UWORD8 u1_is_cu_noisy,
6173
    UWORD8 u1_trans_size,
6174
    UWORD8 u1_trans_idx,
6175
    UWORD8 u1_num_tus_in_cu,
6176
    UWORD8 u1_num_4x4_luma_blks_in_tu,
6177
    UWORD8 u1_enable_psyRDOPT,
6178
    UWORD8 u1_is_422)
6179
232k
{
6180
232k
    UWORD8 u1_chrm_mode;
6181
232k
    UWORD8 ctr;
6182
232k
    WORD32 i4_subtu_idx;
6183
6184
232k
    WORD32 i = 0;
6185
232k
    UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
6186
232k
    WORD32 i4_satd_had[4] = { 0 };
6187
232k
    WORD32 i4_best_satd_had = INT_MAX;
6188
232k
    UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6189
232k
    UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6190
232k
    WORD32 i4_num_sub_tus = u1_is_422 + 1;
6191
232k
    UWORD8 u1_best_chrm_mode = 0;
6192
6193
    /* Get the best satd among all possible modes */
6194
1.16M
    for(i = 0; i < 4; i++)
6195
930k
    {
6196
930k
        WORD32 left_strd = i4_src_stride;
6197
6198
930k
        u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
6199
930k
                                        : u1_chrm_modes[i];
6200
6201
        /* loop based on num tus in a cu */
6202
2.09M
        for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
6203
1.16M
        {
6204
1.16M
            WORD32 luma_nbr_flags;
6205
1.16M
            WORD32 chrm_pred_func_idx;
6206
6207
1.16M
            WORD32 i4_trans_size_m2 = u1_trans_size << 1;
6208
1.16M
            UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
6209
1.16M
                                 (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
6210
1.16M
            UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
6211
1.16M
                                  (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
6212
1.16M
            WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
6213
1.16M
            WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
6214
6215
1.16M
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6216
1.16M
                pu1_ctb_nbr_map,
6217
1.16M
                i4_nbr_map_strd,
6218
1.16M
                i4_curr_tu_pos_x,
6219
1.16M
                i4_curr_tu_pos_y,
6220
1.16M
                u1_num_4x4_luma_blks_in_tu,
6221
1.16M
                u1_num_4x4_luma_blks_in_tu);
6222
6223
2.32M
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6224
1.16M
            {
6225
1.16M
                WORD32 nbr_flags;
6226
6227
1.16M
                UWORD8 *pu1_cur_src =
6228
1.16M
                    pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
6229
1.16M
                UWORD8 *pu1_cur_pred =
6230
1.16M
                    pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
6231
1.16M
                UWORD8 *pu1_left = pu1_cur_src - 2;
6232
1.16M
                UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
6233
1.16M
                UWORD8 *pu1_top_left = pu1_top - 2;
6234
6235
1.16M
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6236
1.16M
                    luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
6237
6238
                /* call the chroma reference array substitution */
6239
1.16M
                pf_ref_substitution(
6240
1.16M
                    pu1_top_left,
6241
1.16M
                    pu1_top,
6242
1.16M
                    pu1_left,
6243
1.16M
                    left_strd,
6244
1.16M
                    u1_trans_size,
6245
1.16M
                    nbr_flags,
6246
1.16M
                    pu1_ref_sub_out,
6247
1.16M
                    1,
6248
1.16M
                    CHROMA_FMT_IDC_YUV420);
6249
6250
                /* use the look up to get the function idx */
6251
1.16M
                chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
6252
6253
                /* call the intra prediction function */
6254
1.16M
                ppf_chroma_ip[chrm_pred_func_idx](
6255
1.16M
                    pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
6256
6257
1.16M
                if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
6258
1.16M
                {
6259
                    /* compute Hadamard-transform satd : Cb */
6260
1.16M
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6261
1.16M
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6262
6263
                    /* compute Hadamard-transform satd : Cr */
6264
1.16M
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6265
1.16M
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6266
1.16M
                }
6267
0
                else
6268
0
                {
6269
0
                    WORD32 i4_satd;
6270
6271
                    /* compute Hadamard-transform satd : Cb */
6272
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6273
0
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6274
6275
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6276
0
                        pu1_cur_src,
6277
0
                        i4_src_stride,
6278
0
                        pu1_cur_pred,
6279
0
                        i4_pred_stride,
6280
0
                        i4_satd,
6281
0
                        i4_alpha_stim_multiplier,
6282
0
                        u1_trans_size,
6283
0
                        0,
6284
0
                        u1_enable_psyRDOPT,
6285
0
                        U_PLANE);
6286
6287
0
                    i4_satd_had[i] += i4_satd;
6288
6289
                    /* compute Hadamard-transform satd : Cr */
6290
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6291
0
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6292
6293
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6294
0
                        pu1_cur_src,
6295
0
                        i4_src_stride,
6296
0
                        pu1_cur_pred,
6297
0
                        i4_pred_stride,
6298
0
                        i4_satd,
6299
0
                        i4_alpha_stim_multiplier,
6300
0
                        u1_trans_size,
6301
0
                        0,
6302
0
                        u1_enable_psyRDOPT,
6303
0
                        V_PLANE);
6304
6305
0
                    i4_satd_had[i] += i4_satd;
6306
0
                }
6307
1.16M
            }
6308
6309
            /* set the neighbour map to 1 */
6310
1.16M
            ihevce_set_nbr_map(
6311
1.16M
                pu1_ctb_nbr_map,
6312
1.16M
                i4_nbr_map_strd,
6313
1.16M
                i4_curr_tu_pos_x,
6314
1.16M
                i4_curr_tu_pos_y,
6315
1.16M
                u1_num_4x4_luma_blks_in_tu,
6316
1.16M
                1);
6317
1.16M
        }
6318
6319
        /* set the neighbour map to 0 */
6320
930k
        ihevce_set_nbr_map(
6321
930k
            pu1_ctb_nbr_map,
6322
930k
            i4_nbr_map_strd,
6323
930k
            (ps_cu_analyse->b3_cu_pos_x << 1),
6324
930k
            (ps_cu_analyse->b3_cu_pos_y << 1),
6325
930k
            (ps_cu_analyse->u1_cu_size >> 2),
6326
930k
            0);
6327
6328
        /* Get the least SATD and corresponding mode */
6329
930k
        if(i4_best_satd_had > i4_satd_had[i])
6330
348k
        {
6331
348k
            i4_best_satd_had = i4_satd_had[i];
6332
348k
            u1_best_chrm_mode = u1_chrm_mode;
6333
348k
        }
6334
930k
    }
6335
6336
232k
    return u1_best_chrm_mode;
6337
232k
}
6338
6339
void ihevce_intra_chroma_pred_mode_selector(
6340
    ihevce_enc_loop_ctxt_t *ps_ctxt,
6341
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
6342
    cu_analyse_t *ps_cu_analyse,
6343
    WORD32 rd_opt_curr_idx,
6344
    WORD32 tu_mode,
6345
    WORD32 i4_alpha_stim_multiplier,
6346
    UWORD8 u1_is_cu_noisy)
6347
232k
{
6348
232k
    chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
6349
6350
232k
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
6351
6352
232k
    UWORD8 *pu1_pred;
6353
232k
    WORD32 trans_size;
6354
232k
    WORD32 num_tus_in_cu;
6355
232k
    WORD32 pred_strd;
6356
232k
    WORD32 ctr;
6357
232k
    WORD32 i4_subtu_idx;
6358
232k
    WORD32 i4_num_sub_tus;
6359
232k
    WORD32 trans_idx;
6360
232k
    WORD32 scan_idx;
6361
232k
    WORD32 num_4x4_luma_in_tu;
6362
232k
    WORD32 cu_pos_x;
6363
232k
    WORD32 cu_pos_y;
6364
6365
232k
    recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
6366
232k
                                                  &ps_ctxt->as_cu_prms[1].s_recon_datastore };
6367
6368
232k
    LWORD64 chrm_cod_cost = 0;
6369
232k
    WORD32 chrm_tu_bits = 0;
6370
232k
    WORD32 best_chrm_mode = DM_CHROMA_IDX;
6371
232k
    UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
6372
232k
    WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
6373
232k
    UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
6374
232k
    UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
6375
232k
    UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
6376
232k
    WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
6377
232k
    WORD32 cu_size = ps_cu_analyse->u1_cu_size;
6378
232k
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
6379
232k
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
6380
232k
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
6381
6382
232k
    ihevc_intra_pred_chroma_ref_substitution_fptr =
6383
232k
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
6384
232k
    i4_num_sub_tus = (u1_is_422 == 1) + 1;
6385
6386
#if DISABLE_RDOQ_INTRA
6387
    i4_perform_rdoq = 0;
6388
#endif
6389
6390
232k
    if(TU_EQ_CU == tu_mode)
6391
213k
    {
6392
213k
        num_tus_in_cu = 1;
6393
213k
        trans_size = cu_size >> 1;
6394
213k
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6395
213k
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6396
213k
    }
6397
19.5k
    else
6398
19.5k
    {
6399
19.5k
        num_tus_in_cu = 4;
6400
19.5k
        trans_size = cu_size >> 2;
6401
19.5k
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6402
6403
        /* For 8x8 CU only one TU */
6404
19.5k
        if(MIN_TU_SIZE > trans_size)
6405
0
        {
6406
0
            trans_size = MIN_TU_SIZE;
6407
0
            num_tus_in_cu = 1;
6408
            /* chroma nbr avail. is derived based on luma.
6409
            for 4x4 chrm use 8x8 luma's size */
6410
0
            num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
6411
0
        }
6412
6413
19.5k
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6414
19.5k
    }
6415
6416
    /* Can't be TU_EQ_SUBCU case */
6417
232k
    ASSERT(TU_EQ_SUBCU != tu_mode);
6418
6419
    /* translate the transform size to index */
6420
232k
    trans_idx = trans_size >> 2;
6421
6422
232k
    pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
6423
6424
232k
    pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
6425
6426
    /* for 16x16 cases */
6427
232k
    if(16 == trans_size)
6428
13.3k
    {
6429
13.3k
        trans_idx = 3;
6430
13.3k
    }
6431
6432
232k
    best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
6433
232k
        ps_cu_analyse,
6434
232k
        ihevc_intra_pred_chroma_ref_substitution_fptr,
6435
232k
        ps_ctxt->apf_chrm_ip,
6436
232k
        ps_ctxt->apf_chrm_resd_trns_had,
6437
232k
        pu1_chrm_src,
6438
232k
        chrm_src_stride,
6439
232k
        pu1_pred,
6440
232k
        pred_strd,
6441
232k
        ps_ctxt->pu1_ctb_nbr_map,
6442
232k
        ps_ctxt->i4_nbr_map_strd,
6443
232k
        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6444
232k
        i4_alpha_stim_multiplier,
6445
232k
        u1_is_cu_noisy,
6446
232k
        trans_size,
6447
232k
        trans_idx,
6448
232k
        num_tus_in_cu,
6449
232k
        num_4x4_luma_in_tu,
6450
232k
        ps_ctxt->u1_enable_psyRDOPT,
6451
232k
        u1_is_422);
6452
6453
    /* Store the best chroma mode */
6454
232k
    ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
6455
6456
    /* evaluate RDOPT cost for the Best mode */
6457
232k
    {
6458
232k
        WORD32 i4_subtu_pos_x;
6459
232k
        WORD32 i4_subtu_pos_y;
6460
232k
        UWORD8 u1_compute_spatial_ssd;
6461
6462
232k
        WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
6463
232k
        WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
6464
        /* State for prefix bin of chroma intra pred mode before CU encode */
6465
232k
        UWORD8 u1_chroma_intra_mode_prefix_state =
6466
232k
            ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
6467
232k
        WORD32 luma_trans_size = trans_size << 1;
6468
232k
        WORD32 calc_recon = 0;
6469
232k
        UWORD8 *pu1_left = pu1_cu_left;
6470
232k
        UWORD8 *pu1_top = pu1_cu_top;
6471
232k
        UWORD8 *pu1_top_left = pu1_cu_top_left;
6472
232k
        WORD32 left_strd = cu_left_stride;
6473
6474
232k
        if(ps_ctxt->i1_cu_qp_delta_enable)
6475
99.5k
        {
6476
99.5k
            ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1);
6477
99.5k
        }
6478
6479
232k
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
6480
65.9k
                                 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
6481
65.9k
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6482
6483
232k
        if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
6484
0
        {
6485
0
            u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
6486
0
                                     CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6487
0
        }
6488
6489
        /* get the 4x4 level postion of current cu */
6490
232k
        cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6491
232k
        cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6492
6493
232k
        calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
6494
6495
232k
        if(calc_recon || u1_compute_spatial_ssd)
6496
77.5k
        {
6497
77.5k
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6498
77.5k
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6499
77.5k
        }
6500
155k
        else
6501
155k
        {
6502
155k
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6503
155k
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6504
155k
        }
6505
6506
        /* loop based on num tus in a cu */
6507
523k
        for(ctr = 0; ctr < num_tus_in_cu; ctr++)
6508
291k
        {
6509
291k
            WORD16 *pi2_cur_deq_data_cb;
6510
291k
            WORD16 *pi2_cur_deq_data_cr;
6511
6512
291k
            WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
6513
291k
            WORD32 luma_nbr_flags = 0;
6514
6515
291k
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6516
291k
                ps_ctxt->pu1_ctb_nbr_map,
6517
291k
                ps_ctxt->i4_nbr_map_strd,
6518
291k
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6519
291k
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6520
291k
                (luma_trans_size >> 2),
6521
291k
                (luma_trans_size >> 2));
6522
6523
582k
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6524
291k
            {
6525
291k
                WORD32 cbf, num_bytes;
6526
291k
                LWORD64 trans_ssd_u, trans_ssd_v;
6527
291k
                UWORD8 u1_is_recon_available;
6528
6529
291k
                WORD32 trans_size_m2 = trans_size << 1;
6530
291k
                UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
6531
291k
                                      (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
6532
291k
                                      (i4_subtu_idx * trans_size * chrm_src_stride);
6533
291k
                UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
6534
291k
                                       (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
6535
291k
                                       (i4_subtu_idx * trans_size * pred_strd);
6536
291k
                WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6537
291k
                UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
6538
291k
                                             ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
6539
291k
                                        ((ctr & 1) * trans_size_m2) +
6540
291k
                                        (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
6541
291k
                                        (i4_subtu_idx * trans_size * i4_recon_stride);
6542
6543
                /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
6544
                chroma coeff/iq for high quality intra SATD special modes. Will
6545
                be over written by coeff of luma mode in chroma_rdopt call */
6546
291k
                UWORD8 *pu1_ecd_data_cb =
6547
291k
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
6548
291k
                UWORD8 *pu1_ecd_data_cr =
6549
291k
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
6550
6551
291k
                WORD32 chrm_pred_func_idx = 0;
6552
291k
                LWORD64 curr_cb_cod_cost = 0;
6553
291k
                LWORD64 curr_cr_cod_cost = 0;
6554
291k
                WORD32 nbr_flags = 0;
6555
6556
291k
                i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
6557
291k
                i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
6558
291k
                                 ((i4_subtu_idx * trans_size) >> 2);
6559
291k
                pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
6560
291k
                                      ((ctr & 1) * trans_size) +
6561
291k
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6562
291k
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6563
291k
                pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
6564
291k
                                      ((ctr & 1) * trans_size) +
6565
291k
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6566
291k
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6567
6568
                /* left cu boundary */
6569
291k
                if(0 == i4_subtu_pos_x)
6570
252k
                {
6571
252k
                    left_strd = cu_left_stride;
6572
252k
                    pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
6573
252k
                }
6574
39.0k
                else
6575
39.0k
                {
6576
39.0k
                    pu1_left = pu1_cur_recon - 2;
6577
39.0k
                    left_strd = i4_recon_stride;
6578
39.0k
                }
6579
6580
                /* top cu boundary */
6581
291k
                if(0 == i4_subtu_pos_y)
6582
252k
                {
6583
252k
                    pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
6584
252k
                }
6585
39.0k
                else
6586
39.0k
                {
6587
39.0k
                    pu1_top = pu1_cur_recon - i4_recon_stride;
6588
39.0k
                }
6589
6590
                /* by default top left is set to cu top left */
6591
291k
                pu1_top_left = pu1_cu_top_left;
6592
6593
                /* top left based on position */
6594
291k
                if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
6595
19.5k
                {
6596
19.5k
                    pu1_top_left = pu1_left - left_strd;
6597
19.5k
                }
6598
271k
                else if(0 != i4_subtu_pos_x)
6599
39.0k
                {
6600
39.0k
                    pu1_top_left = pu1_top - 2;
6601
39.0k
                }
6602
6603
                /* populate the coeffs scan idx */
6604
291k
                scan_idx = SCAN_DIAG_UPRIGHT;
6605
6606
                /* RDOPT copy States :  TU init (best until prev TU) to current */
6607
291k
                COPY_CABAC_STATES(
6608
291k
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6609
291k
                         .s_cabac_ctxt.au1_ctxt_models[0],
6610
291k
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6611
291k
                    IHEVC_CAB_CTXT_END);
6612
6613
                /* for 4x4 transforms based on intra pred mode scan is choosen*/
6614
291k
                if(4 == trans_size)
6615
203k
                {
6616
                    /* for modes from 22 upto 30 horizontal scan is used */
6617
203k
                    if((best_chrm_mode > 21) && (best_chrm_mode < 31))
6618
8.02k
                    {
6619
8.02k
                        scan_idx = SCAN_HORZ;
6620
8.02k
                    }
6621
                    /* for modes from 6 upto 14 horizontal scan is used */
6622
195k
                    else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
6623
42.2k
                    {
6624
42.2k
                        scan_idx = SCAN_VERT;
6625
42.2k
                    }
6626
203k
                }
6627
6628
291k
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6629
291k
                    luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
6630
6631
                /* call the chroma reference array substitution */
6632
291k
                ihevc_intra_pred_chroma_ref_substitution_fptr(
6633
291k
                    pu1_top_left,
6634
291k
                    pu1_top,
6635
291k
                    pu1_left,
6636
291k
                    left_strd,
6637
291k
                    trans_size,
6638
291k
                    nbr_flags,
6639
291k
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6640
291k
                    1,
6641
291k
                    CHROMA_FMT_IDC_YUV420);
6642
6643
                /* use the look up to get the function idx */
6644
291k
                chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
6645
6646
                /* call the intra prediction function */
6647
291k
                ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
6648
291k
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6649
291k
                    1,
6650
291k
                    pu1_cur_pred,
6651
291k
                    pred_strd,
6652
291k
                    trans_size,
6653
291k
                    best_chrm_mode);
6654
6655
                /* UPLANE RDOPT Loop */
6656
291k
                {
6657
291k
                    WORD32 tu_bits;
6658
6659
291k
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6660
291k
                        ps_ctxt,
6661
291k
                        pu1_cur_pred,
6662
291k
                        pred_strd,
6663
291k
                        pu1_cur_src,
6664
291k
                        chrm_src_stride,
6665
291k
                        pi2_cur_deq_data_cb,
6666
291k
                        deq_data_strd,
6667
291k
                        pu1_cur_recon,
6668
291k
                        i4_recon_stride,
6669
291k
                        pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
6670
291k
                        ps_ctxt->au1_cu_csbf,
6671
291k
                        ps_ctxt->i4_cu_csbf_strd,
6672
291k
                        trans_size,
6673
291k
                        scan_idx,
6674
291k
                        1,
6675
291k
                        &num_bytes,
6676
291k
                        &tu_bits,
6677
291k
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6678
291k
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6679
291k
                        &u1_is_recon_available,
6680
291k
                        i4_perform_sbh,
6681
291k
                        i4_perform_rdoq,
6682
291k
                        &trans_ssd_u,
6683
291k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6684
291k
                        i4_alpha_stim_multiplier,
6685
291k
                        u1_is_cu_noisy,
6686
291k
#endif
6687
291k
                        0,
6688
291k
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6689
291k
                        U_PLANE);
6690
6691
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6692
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6693
                    {
6694
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6695
                        trans_ssd_u = ihevce_inject_stim_into_distortion(
6696
                            pu1_cur_src,
6697
                            chrm_src_stride,
6698
                            pu1_cur_pred,
6699
                            pred_strd,
6700
                            trans_ssd_u,
6701
                            i4_alpha_stim_multiplier,
6702
                            trans_size,
6703
                            0,
6704
                            ps_ctxt->u1_enable_psyRDOPT,
6705
                            U_PLANE);
6706
#else
6707
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6708
                        {
6709
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6710
                                pu1_cur_src,
6711
                                chrm_src_stride,
6712
                                pu1_cur_recon,
6713
                                i4_recon_stride,
6714
                                trans_ssd_u,
6715
                                i4_alpha_stim_multiplier,
6716
                                trans_size,
6717
                                0,
6718
                                ps_ctxt->u1_enable_psyRDOPT,
6719
                                U_PLANE);
6720
                        }
6721
                        else
6722
                        {
6723
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6724
                                pu1_cur_src,
6725
                                chrm_src_stride,
6726
                                pu1_cur_pred,
6727
                                pred_strd,
6728
                                trans_ssd_u,
6729
                                i4_alpha_stim_multiplier,
6730
                                trans_size,
6731
                                0,
6732
                                ps_ctxt->u1_enable_psyRDOPT,
6733
                                U_PLANE);
6734
                        }
6735
#endif
6736
                    }
6737
#endif
6738
6739
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6740
291k
                    if(0 != cbf)
6741
103k
                    {
6742
103k
                        memcpy(
6743
103k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6744
103k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6745
103k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6746
103k
                            IHEVC_CAB_CTXT_END);
6747
103k
                    }
6748
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6749
187k
                    else
6750
187k
                    {
6751
187k
                        memcpy(
6752
187k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6753
187k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6754
187k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6755
187k
                            IHEVC_CAB_CTXT_END);
6756
187k
                    }
6757
6758
291k
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6759
46.1k
                    {
6760
46.1k
                        ihevce_chroma_it_recon_fxn(
6761
46.1k
                            ps_ctxt,
6762
46.1k
                            pi2_cur_deq_data_cb,
6763
46.1k
                            deq_data_strd,
6764
46.1k
                            pu1_cur_pred,
6765
46.1k
                            pred_strd,
6766
46.1k
                            pu1_cur_recon,
6767
46.1k
                            i4_recon_stride,
6768
46.1k
                            (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
6769
46.1k
                            trans_size,
6770
46.1k
                            cbf,
6771
46.1k
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6772
46.1k
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6773
46.1k
                            U_PLANE);
6774
46.1k
                    }
6775
6776
291k
                    ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
6777
291k
                    curr_cb_cod_cost =
6778
291k
                        trans_ssd_u +
6779
291k
                        COMPUTE_RATE_COST_CLIP30(
6780
291k
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6781
291k
                    chrm_tu_bits += tu_bits;
6782
291k
                    ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
6783
291k
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
6784
291k
                        num_bytes;
6785
291k
                }
6786
6787
                /* VPLANE RDOPT Loop */
6788
291k
                {
6789
291k
                    WORD32 tu_bits;
6790
6791
291k
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6792
291k
                        ps_ctxt,
6793
291k
                        pu1_cur_pred,
6794
291k
                        pred_strd,
6795
291k
                        pu1_cur_src,
6796
291k
                        chrm_src_stride,
6797
291k
                        pi2_cur_deq_data_cr,
6798
291k
                        deq_data_strd,
6799
291k
                        pu1_cur_recon,
6800
291k
                        i4_recon_stride,
6801
291k
                        pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
6802
291k
                        ps_ctxt->au1_cu_csbf,
6803
291k
                        ps_ctxt->i4_cu_csbf_strd,
6804
291k
                        trans_size,
6805
291k
                        scan_idx,
6806
291k
                        1,
6807
291k
                        &num_bytes,
6808
291k
                        &tu_bits,
6809
291k
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6810
291k
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6811
291k
                        &u1_is_recon_available,
6812
291k
                        i4_perform_sbh,
6813
291k
                        i4_perform_rdoq,
6814
291k
                        &trans_ssd_v,
6815
291k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6816
291k
                        i4_alpha_stim_multiplier,
6817
291k
                        u1_is_cu_noisy,
6818
291k
#endif
6819
291k
                        0,
6820
291k
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6821
291k
                        V_PLANE);
6822
6823
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6824
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6825
                    {
6826
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6827
                        trans_ssd_v = ihevce_inject_stim_into_distortion(
6828
                            pu1_cur_src,
6829
                            chrm_src_stride,
6830
                            pu1_cur_pred,
6831
                            pred_strd,
6832
                            trans_ssd_v,
6833
                            i4_alpha_stim_multiplier,
6834
                            trans_size,
6835
                            0,
6836
                            ps_ctxt->u1_enable_psyRDOPT,
6837
                            V_PLANE);
6838
#else
6839
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6840
                        {
6841
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6842
                                pu1_cur_src,
6843
                                chrm_src_stride,
6844
                                pu1_cur_recon,
6845
                                i4_recon_stride,
6846
                                trans_ssd_v,
6847
                                i4_alpha_stim_multiplier,
6848
                                trans_size,
6849
                                0,
6850
                                ps_ctxt->u1_enable_psyRDOPT,
6851
                                V_PLANE);
6852
                        }
6853
                        else
6854
                        {
6855
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6856
                                pu1_cur_src,
6857
                                chrm_src_stride,
6858
                                pu1_cur_pred,
6859
                                pred_strd,
6860
                                trans_ssd_v,
6861
                                i4_alpha_stim_multiplier,
6862
                                trans_size,
6863
                                0,
6864
                                ps_ctxt->u1_enable_psyRDOPT,
6865
                                V_PLANE);
6866
                        }
6867
#endif
6868
                    }
6869
#endif
6870
6871
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6872
291k
                    if(0 != cbf)
6873
95.1k
                    {
6874
95.1k
                        COPY_CABAC_STATES(
6875
95.1k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6876
95.1k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6877
95.1k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6878
95.1k
                            IHEVC_CAB_CTXT_END);
6879
95.1k
                    }
6880
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6881
196k
                    else
6882
196k
                    {
6883
196k
                        COPY_CABAC_STATES(
6884
196k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6885
196k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6886
196k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6887
196k
                            IHEVC_CAB_CTXT_END);
6888
196k
                    }
6889
6890
291k
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6891
46.1k
                    {
6892
46.1k
                        ihevce_chroma_it_recon_fxn(
6893
46.1k
                            ps_ctxt,
6894
46.1k
                            pi2_cur_deq_data_cr,
6895
46.1k
                            deq_data_strd,
6896
46.1k
                            pu1_cur_pred,
6897
46.1k
                            pred_strd,
6898
46.1k
                            pu1_cur_recon,
6899
46.1k
                            i4_recon_stride,
6900
46.1k
                            (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
6901
46.1k
                            trans_size,
6902
46.1k
                            cbf,
6903
46.1k
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6904
46.1k
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6905
46.1k
                            V_PLANE);
6906
46.1k
                    }
6907
6908
291k
                    ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
6909
291k
                    curr_cr_cod_cost =
6910
291k
                        trans_ssd_v +
6911
291k
                        COMPUTE_RATE_COST_CLIP30(
6912
291k
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6913
291k
                    chrm_tu_bits += tu_bits;
6914
291k
                    ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
6915
291k
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
6916
291k
                        num_bytes;
6917
291k
                }
6918
6919
291k
                chrm_cod_cost += curr_cb_cod_cost;
6920
291k
                chrm_cod_cost += curr_cr_cod_cost;
6921
291k
            }
6922
6923
            /* set the neighbour map to 1 */
6924
291k
            ihevce_set_nbr_map(
6925
291k
                ps_ctxt->pu1_ctb_nbr_map,
6926
291k
                ps_ctxt->i4_nbr_map_strd,
6927
291k
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6928
291k
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6929
291k
                (luma_trans_size >> 2),
6930
291k
                1);
6931
291k
        }
6932
6933
        /* set the neighbour map to 0 */
6934
232k
        ihevce_set_nbr_map(
6935
232k
            ps_ctxt->pu1_ctb_nbr_map,
6936
232k
            ps_ctxt->i4_nbr_map_strd,
6937
232k
            (ps_cu_analyse->b3_cu_pos_x << 1),
6938
232k
            (ps_cu_analyse->b3_cu_pos_y << 1),
6939
232k
            (ps_cu_analyse->u1_cu_size >> 2),
6940
232k
            0);
6941
6942
        /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
6943
        /* This is done by adding the bits for signalling chroma mode (0-3)    */
6944
        /* and subtracting the bits for chroma mode same as luma mode (4)      */
6945
232k
#if CHROMA_RDOPT_ENABLE
6946
232k
        {
6947
            /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
6948
232k
            WORD32 bits_frac_1 =
6949
232k
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
6950
6951
232k
            WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
6952
6953
            /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
6954
232k
            WORD32 bits_for_mode4 =
6955
232k
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
6956
6957
            /* accumulate into final rd cost for chroma */
6958
232k
            ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
6959
232k
                (bits_for_mode_0to3 - bits_for_mode4),
6960
232k
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
6961
232k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
6962
6963
232k
            chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
6964
232k
        }
6965
232k
#endif
6966
6967
232k
        if(ps_ctxt->u1_enable_psyRDOPT)
6968
0
        {
6969
0
            UWORD8 *pu1_recon_cu;
6970
0
            WORD32 recon_stride;
6971
0
            WORD32 curr_pos_x;
6972
0
            WORD32 curr_pos_y;
6973
0
            WORD32 start_index;
6974
0
            WORD32 num_horz_cu_in_ctb;
6975
0
            WORD32 had_block_size;
6976
6977
            /* tODO: sreenivasa ctb size has to be used appropriately */
6978
0
            had_block_size = 8;
6979
0
            num_horz_cu_in_ctb = 2 * 64 / had_block_size;
6980
0
            curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6981
0
            curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6982
0
            recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6983
0
            pu1_recon_cu =
6984
0
                aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)];  //
6985
6986
            /* start index to index the source satd of curr cu int he current ctb*/
6987
0
            start_index = 2 * (curr_pos_x / had_block_size) +
6988
0
                          (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
6989
6990
0
            {
6991
0
                chrm_cod_cost += ihevce_psy_rd_cost_croma(
6992
0
                    ps_ctxt->ai4_source_chroma_satd,
6993
0
                    pu1_recon_cu,
6994
0
                    recon_stride,
6995
0
                    1,  //
6996
0
                    cu_size,
6997
0
                    0,  // pic type
6998
0
                    0,  //layer id
6999
0
                    ps_ctxt->i4_satd_lamda,  // lambda
7000
0
                    start_index,
7001
0
                    ps_ctxt->u1_is_input_data_hbd,  // 8 bit
7002
0
                    ps_ctxt->u1_chroma_array_type,
7003
0
                    &ps_ctxt->s_cmn_opt_func
7004
7005
0
                );  // chroma subsampling 420
7006
0
            }
7007
0
        }
7008
7009
232k
        ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
7010
232k
        ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
7011
7012
232k
        memcpy(
7013
232k
            &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
7014
232k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7015
232k
            IHEVC_CAB_CTXT_END);
7016
232k
    }
7017
232k
}
7018
7019
/*!
7020
******************************************************************************
7021
* \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
7022
*
7023
* \brief
7024
*    Coding unit processing function for chroma
7025
*
7026
* \param[in] ps_ctxt    enc_loop module ctxt pointer
7027
* \param[in] rd_opt_curr_idx index in the array of RDopt params
7028
* \param[in] func_proc_mode TU_EQ_CU or other case
7029
* \param[in] pu1_chrm_src  pointer to source data buffer
7030
* \param[in] chrm_src_stride   source buffer stride
7031
* \param[in] pu1_cu_left pointer to left recon data buffer
7032
* \param[in] pu1_cu_top  pointer to top recon data buffer
7033
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
7034
* \param[in] left_stride left recon buffer stride
7035
* \param[out] cu_pos_x position x of current CU in CTB
7036
* \param[out] cu_pos_y position y of current CU in CTB
7037
* \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
7038
*
7039
* \return
7040
*    Chroma coding cost (cb adn Cr included)
7041
*
7042
* \author
7043
*  Ittiam
7044
*
7045
*****************************************************************************
7046
*/
7047
LWORD64 ihevce_chroma_cu_prcs_rdopt(
7048
    ihevce_enc_loop_ctxt_t *ps_ctxt,
7049
    WORD32 rd_opt_curr_idx,
7050
    WORD32 func_proc_mode,
7051
    UWORD8 *pu1_chrm_src,
7052
    WORD32 chrm_src_stride,
7053
    UWORD8 *pu1_cu_left,
7054
    UWORD8 *pu1_cu_top,
7055
    UWORD8 *pu1_cu_top_left,
7056
    WORD32 cu_left_stride,
7057
    WORD32 cu_pos_x,
7058
    WORD32 cu_pos_y,
7059
    WORD32 *pi4_chrm_tu_bits,
7060
    WORD32 i4_alpha_stim_multiplier,
7061
    UWORD8 u1_is_cu_noisy)
7062
1.12M
{
7063
1.12M
    tu_enc_loop_out_t *ps_tu;
7064
1.12M
    tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
7065
7066
1.12M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
7067
7068
1.12M
    UWORD8 *pu1_pred;
7069
1.12M
    UWORD8 *pu1_recon;
7070
1.12M
    WORD32 i4_recon_stride;
7071
1.12M
    WORD32 cu_size, trans_size = 0;
7072
1.12M
    WORD32 pred_strd;
7073
1.12M
    WORD32 ctr, i4_subtu_idx;
7074
1.12M
    WORD32 scan_idx;
7075
1.12M
    WORD32 u1_is_cu_coded_old;
7076
1.12M
    WORD32 init_bytes_offset;
7077
7078
1.12M
    enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
7079
1.12M
    recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
7080
7081
1.12M
    WORD32 total_bytes_offset = 0;
7082
1.12M
    LWORD64 chrm_cod_cost = 0;
7083
1.12M
    WORD32 chrm_tu_bits = 0;
7084
1.12M
    WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
7085
1.12M
    LWORD64 i8_ssd_cb = 0;
7086
1.12M
    WORD32 i4_bits_cb = 0;
7087
1.12M
    LWORD64 i8_ssd_cr = 0;
7088
1.12M
    WORD32 i4_bits_cr = 0;
7089
1.12M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
7090
1.12M
    UWORD8 u1_num_tus =
7091
        /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
7092
1.12M
        (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
7093
1.12M
            ? 1
7094
1.12M
            : ps_best_cu_prms->u2_num_tus_in_cu;
7095
1.12M
    UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
7096
1.12M
    UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
7097
323k
                                    (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
7098
257k
                                    CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7099
    /* Get the RDOPT cost of the best CU mode for early_exit */
7100
1.12M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
7101
    /* Get the current running RDOPT (Luma RDOPT) for early_exit */
7102
1.12M
    LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
7103
1.12M
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
7104
1.12M
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
7105
7106
1.12M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
7107
1.12M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
7108
7109
1.12M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
7110
0
    {
7111
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
7112
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7113
0
    }
7114
7115
    /* Store the init bytes offset from luma */
7116
1.12M
    init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
7117
7118
    /* Unused pred buffer in merge_skip_pred_data_t structure is used as
7119
    Chroma pred storage buf. for final_recon function.
7120
    The buffer is split into two and used as a ping-pong buffer */
7121
1.12M
    pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
7122
1.12M
               rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
7123
1.12M
                                  (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
7124
7125
1.12M
    pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
7126
7127
1.12M
    pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
7128
1.12M
    i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
7129
1.12M
    cu_size = ps_best_cu_prms->u1_cu_size;
7130
1.12M
    chrm_tu_bits = 0;
7131
7132
    /* get the first TU pointer */
7133
1.12M
    ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7134
    /* get the first TU enc_loop temp prms pointer */
7135
1.12M
    ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7136
7137
1.12M
    if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7138
896k
    {
7139
        /* Mode signalled by intra prediction for luma */
7140
896k
        luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
7141
7142
#if DISABLE_RDOQ_INTRA
7143
        i4_perform_rdoq = 0;
7144
#endif
7145
896k
    }
7146
7147
230k
    else
7148
230k
    {
7149
230k
        UWORD8 *pu1_pred_org = pu1_pred;
7150
7151
        /* ------ Motion Compensation for Chroma -------- */
7152
511k
        for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
7153
280k
        {
7154
280k
            pu_t *ps_pu;
7155
280k
            WORD32 inter_pu_wd;
7156
280k
            WORD32 inter_pu_ht;
7157
7158
280k
            ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
7159
7160
280k
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
7161
280k
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
7162
280k
            inter_pu_ht <<= u1_is_422;
7163
7164
280k
            ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
7165
7166
280k
            if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
7167
100k
            {
7168
                /* 2Nx__ partion case */
7169
100k
                if(inter_pu_wd == cu_size)
7170
77.8k
                {
7171
77.8k
                    pu1_pred += (inter_pu_ht * pred_strd);
7172
77.8k
                }
7173
7174
                /* __x2N partion case */
7175
100k
                if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
7176
23.1k
                {
7177
23.1k
                    pu1_pred += inter_pu_wd;
7178
23.1k
                }
7179
100k
            }
7180
280k
        }
7181
7182
        /* restore the pred pointer to start for transform loop */
7183
230k
        pu1_pred = pu1_pred_org;
7184
230k
    }
7185
7186
    /* Used to store back only the luma based info. if SATD based chorma
7187
    mode also comes */
7188
1.12M
    u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
7189
7190
    /* evaluate chroma candidates (same as luma) and
7191
    if INTRA & HIGH_QUALITY compare with best SATD mode */
7192
1.12M
    {
7193
1.12M
        WORD32 calc_recon = 0, deq_data_strd;
7194
1.12M
        WORD16 *pi2_deq_data;
7195
1.12M
        UWORD8 *pu1_ecd_data;
7196
1.12M
        UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
7197
7198
1.12M
        pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
7199
1.12M
        pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
7200
1.12M
        deq_data_strd = cu_size;
7201
        /* update ecd buffer for storing coeff. */
7202
1.12M
        pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
7203
1.12M
        pu1_ecd_data += init_bytes_offset;
7204
        /* store chroma starting index */
7205
1.12M
        ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
7206
7207
        /* get the first TU pointer */
7208
1.12M
        ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7209
1.12M
        ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7210
7211
        /* Reset total_bytes_offset for each candidate */
7212
1.12M
        chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
7213
1.12M
                                          : luma_pred_mode;
7214
7215
1.12M
        total_bytes_offset = 0;
7216
7217
1.12M
        if(TU_EQ_SUBCU == func_proc_mode)
7218
141k
        {
7219
141k
            func_proc_mode = TU_EQ_CU_DIV2;
7220
141k
        }
7221
7222
        /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
7223
        TU_EQ_CU_DIV2 and  TU_EQ_SUBCU case */
7224
1.12M
        if(8 == cu_size)
7225
803k
        {
7226
803k
            func_proc_mode = TU_EQ_CU;
7227
803k
        }
7228
7229
        /* loop based on num tus in a cu */
7230
1.12M
        if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
7231
693k
           (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
7232
693k
            (chrm_pred_mode !=
7233
693k
             ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
7234
960k
        {
7235
            /* loop based on num tus in a cu */
7236
2.06M
            for(ctr = 0; ctr < u1_num_tus; ctr++)
7237
1.36M
            {
7238
1.36M
                WORD32 num_bytes = 0;
7239
1.36M
                LWORD64 curr_cb_cod_cost = 0;
7240
1.36M
                LWORD64 curr_cr_cod_cost = 0;
7241
1.36M
                WORD32 chrm_pred_func_idx = 0;
7242
1.36M
                UWORD8 u1_is_early_exit_condition_satisfied = 0;
7243
7244
                /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7   */
7245
                /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
7246
1.36M
                ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
7247
1.36M
                ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
7248
1.36M
                ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7249
1.36M
                ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7250
1.36M
                ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7251
1.36M
                ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7252
1.36M
                ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
7253
1.36M
                ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
7254
1.36M
                ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
7255
1.36M
                ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
7256
7257
                /* TU level inits */
7258
                /* check if chroma present flag is set */
7259
1.36M
                if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7260
1.16M
                {
7261
                    /* RDOPT copy States :  TU init (best until prev TU) to current */
7262
1.16M
                    COPY_CABAC_STATES(
7263
1.16M
                        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7264
1.16M
                             .s_cabac_ctxt.au1_ctxt_models[0],
7265
1.16M
                        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7266
1.16M
                        IHEVC_CAB_CTXT_END);
7267
7268
                    /* get the current transform size */
7269
1.16M
                    trans_size = ps_tu->s_tu.b3_size;
7270
1.16M
                    trans_size = (1 << (trans_size + 1)); /* in chroma units */
7271
7272
                    /* since 2x2 transform is not allowed for chroma*/
7273
1.16M
                    if(2 == trans_size)
7274
295k
                    {
7275
295k
                        trans_size = 4;
7276
295k
                    }
7277
1.16M
                }
7278
7279
2.47M
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
7280
1.36M
                {
7281
1.36M
                    WORD32 cbf;
7282
1.36M
                    UWORD8 u1_is_recon_available;
7283
7284
1.36M
                    WORD32 nbr_flags = 0;
7285
1.36M
                    WORD32 zero_cols = 0;
7286
1.36M
                    WORD32 zero_rows = 0;
7287
7288
                    /* check if chroma present flag is set */
7289
1.36M
                    if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7290
1.16M
                    {
7291
1.16M
                        UWORD8 *pu1_cur_pred;
7292
1.16M
                        UWORD8 *pu1_cur_recon;
7293
1.16M
                        UWORD8 *pu1_cur_src;
7294
1.16M
                        WORD16 *pi2_cur_deq_data;
7295
1.16M
                        WORD32 curr_pos_x, curr_pos_y;
7296
1.16M
                        LWORD64 trans_ssd_u, trans_ssd_v;
7297
7298
                        /* get the current sub-tu posx and posy w.r.t to cu */
7299
1.16M
                        curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
7300
1.16M
                        curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
7301
1.16M
                                     (i4_subtu_idx * trans_size);
7302
7303
                        /* 420sp case only vertical height will be half */
7304
1.16M
                        if(u1_is_422 == 0)
7305
1.16M
                        {
7306
1.16M
                            curr_pos_y >>= 1;
7307
1.16M
                        }
7308
7309
                        /* increment the pointers to start of current Sub-TU */
7310
1.16M
                        pu1_cur_recon = (pu1_recon + curr_pos_x);
7311
1.16M
                        pu1_cur_recon += (curr_pos_y * i4_recon_stride);
7312
1.16M
                        pu1_cur_src = (pu1_chrm_src + curr_pos_x);
7313
1.16M
                        pu1_cur_src += (curr_pos_y * chrm_src_stride);
7314
1.16M
                        pu1_cur_pred = (pu1_pred + curr_pos_x);
7315
1.16M
                        pu1_cur_pred += (curr_pos_y * pred_strd);
7316
1.16M
                        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
7317
1.16M
                        pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
7318
7319
                        /* populate the coeffs scan idx */
7320
1.16M
                        scan_idx = SCAN_DIAG_UPRIGHT;
7321
7322
                        /* perform intra prediction only for Intra case */
7323
1.16M
                        if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7324
838k
                        {
7325
838k
                            UWORD8 *pu1_top_left;
7326
838k
                            UWORD8 *pu1_top;
7327
838k
                            UWORD8 *pu1_left;
7328
838k
                            WORD32 left_strd;
7329
7330
838k
                            calc_recon = !u1_compute_spatial_ssd &&
7331
666k
                                         ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
7332
103k
                                         (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
7333
103k
                                          ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
7334
103k
                                          ((u1_num_tus == 4) && (ctr < 3)));
7335
7336
                            /* left cu boundary */
7337
838k
                            if(0 == curr_pos_x)
7338
766k
                            {
7339
766k
                                pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
7340
766k
                                left_strd = cu_left_stride;
7341
766k
                            }
7342
72.4k
                            else
7343
72.4k
                            {
7344
72.4k
                                pu1_left = pu1_cur_recon - 2;
7345
72.4k
                                left_strd = i4_recon_stride;
7346
72.4k
                            }
7347
7348
                            /* top cu boundary */
7349
838k
                            if(0 == curr_pos_y)
7350
768k
                            {
7351
768k
                                pu1_top = pu1_cu_top + curr_pos_x;
7352
768k
                            }
7353
70.6k
                            else
7354
70.6k
                            {
7355
70.6k
                                pu1_top = pu1_cur_recon - i4_recon_stride;
7356
70.6k
                            }
7357
7358
                            /* by default top left is set to cu top left */
7359
838k
                            pu1_top_left = pu1_cu_top_left;
7360
7361
                            /* top left based on position */
7362
838k
                            if((0 != curr_pos_y) && (0 == curr_pos_x))
7363
36.4k
                            {
7364
36.4k
                                pu1_top_left = pu1_left - cu_left_stride;
7365
36.4k
                            }
7366
802k
                            else if(0 != curr_pos_x)
7367
72.4k
                            {
7368
72.4k
                                pu1_top_left = pu1_top - 2;
7369
72.4k
                            }
7370
7371
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
7372
838k
                            if(4 == trans_size)
7373
696k
                            {
7374
                                /* for modes from 22 upto 30 horizontal scan is used */
7375
696k
                                if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
7376
109k
                                {
7377
109k
                                    scan_idx = SCAN_HORZ;
7378
109k
                                }
7379
                                /* for modes from 6 upto 14 horizontal scan is used */
7380
586k
                                else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
7381
270k
                                {
7382
270k
                                    scan_idx = SCAN_VERT;
7383
270k
                                }
7384
696k
                            }
7385
7386
838k
                            nbr_flags = ihevce_get_intra_chroma_tu_nbr(
7387
838k
                                ps_best_cu_prms->au4_nbr_flags[ctr],
7388
838k
                                i4_subtu_idx,
7389
838k
                                trans_size,
7390
838k
                                u1_is_422);
7391
7392
                            /* call the chroma reference array substitution */
7393
838k
                            ihevc_intra_pred_chroma_ref_substitution_fptr(
7394
838k
                                pu1_top_left,
7395
838k
                                pu1_top,
7396
838k
                                pu1_left,
7397
838k
                                left_strd,
7398
838k
                                trans_size,
7399
838k
                                nbr_flags,
7400
838k
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7401
838k
                                1,
7402
838k
                                CHROMA_FMT_IDC_YUV420);
7403
7404
                            /* use the look up to get the function idx */
7405
838k
                            chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
7406
7407
                            /* call the intra prediction function */
7408
838k
                            ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
7409
838k
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7410
838k
                                1,
7411
838k
                                pu1_cur_pred,
7412
838k
                                pred_strd,
7413
838k
                                trans_size,
7414
838k
                                chrm_pred_mode);
7415
838k
                        }
7416
7417
1.16M
                        if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
7418
235k
                        {
7419
235k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] =
7420
235k
                                !ps_best_cu_prms->u1_skip_flag;
7421
235k
                        }
7422
932k
                        else if(!ctr && !i4_subtu_idx)
7423
724k
                        {
7424
724k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
7425
724k
                        }
7426
                        /************************************************************/
7427
                        /* recon loop is done for all cases including skip cu       */
7428
                        /* This is because skipping chroma reisdual based on luma   */
7429
                        /* skip decision can lead to chroma artifacts               */
7430
                        /************************************************************/
7431
                        /************************************************************/
7432
                        /*In the high quality and medium speed modes, wherein chroma*/
7433
                        /*and luma costs are included in the total cost calculation */
7434
                        /*the cost is just a ssd cost, and not that obtained through*/
7435
                        /*iq_it path                                                */
7436
                        /************************************************************/
7437
1.16M
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7438
1.10M
                        {
7439
1.10M
                            WORD32 tu_bits;
7440
7441
1.10M
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7442
1.10M
                                ps_ctxt,
7443
1.10M
                                pu1_cur_pred,
7444
1.10M
                                pred_strd,
7445
1.10M
                                pu1_cur_src,
7446
1.10M
                                chrm_src_stride,
7447
1.10M
                                pi2_cur_deq_data,
7448
1.10M
                                deq_data_strd,
7449
1.10M
                                pu1_cur_recon,
7450
1.10M
                                i4_recon_stride,
7451
1.10M
                                pu1_ecd_data + total_bytes_offset,
7452
1.10M
                                ps_ctxt->au1_cu_csbf,
7453
1.10M
                                ps_ctxt->i4_cu_csbf_strd,
7454
1.10M
                                trans_size,
7455
1.10M
                                scan_idx,
7456
1.10M
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7457
1.10M
                                &num_bytes,
7458
1.10M
                                &tu_bits,
7459
1.10M
                                &zero_cols,
7460
1.10M
                                &zero_rows,
7461
1.10M
                                &u1_is_recon_available,
7462
1.10M
                                i4_perform_sbh,
7463
1.10M
                                i4_perform_rdoq,
7464
1.10M
                                &trans_ssd_u,
7465
1.10M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7466
1.10M
                                i4_alpha_stim_multiplier,
7467
1.10M
                                u1_is_cu_noisy,
7468
1.10M
#endif
7469
1.10M
                                ps_best_cu_prms->u1_skip_flag,
7470
1.10M
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7471
1.10M
                                U_PLANE);
7472
7473
1.10M
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7474
238k
                            {
7475
238k
                                ps_recon_datastore
7476
238k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7477
238k
                                                                        [i4_subtu_idx] = 0;
7478
238k
                            }
7479
866k
                            else
7480
866k
                            {
7481
866k
                                ps_recon_datastore
7482
866k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7483
866k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7484
866k
                            }
7485
7486
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7487
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7488
                            {
7489
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7490
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7491
                                    pu1_cur_src,
7492
                                    chrm_src_stride,
7493
                                    pu1_cur_pred,
7494
                                    pred_strd,
7495
                                    trans_ssd_u,
7496
                                    i4_alpha_stim_multiplier,
7497
                                    trans_size,
7498
                                    0,
7499
                                    ps_ctxt->u1_enable_psyRDOPT,
7500
                                    U_PLANE);
7501
#else
7502
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7503
                                {
7504
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7505
                                        pu1_cur_src,
7506
                                        chrm_src_stride,
7507
                                        pu1_cur_recon,
7508
                                        i4_recon_stride,
7509
                                        trans_ssd_u,
7510
                                        i4_alpha_stim_multiplier,
7511
                                        trans_size,
7512
                                        0,
7513
                                        ps_ctxt->u1_enable_psyRDOPT,
7514
                                        U_PLANE);
7515
                                }
7516
                                else
7517
                                {
7518
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7519
                                        pu1_cur_src,
7520
                                        chrm_src_stride,
7521
                                        pu1_cur_pred,
7522
                                        pred_strd,
7523
                                        trans_ssd_u,
7524
                                        i4_alpha_stim_multiplier,
7525
                                        trans_size,
7526
                                        0,
7527
                                        ps_ctxt->u1_enable_psyRDOPT,
7528
                                        U_PLANE);
7529
                                }
7530
#endif
7531
                            }
7532
#endif
7533
7534
1.10M
                            curr_cb_cod_cost =
7535
1.10M
                                trans_ssd_u +
7536
1.10M
                                COMPUTE_RATE_COST_CLIP30(
7537
1.10M
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7538
7539
1.10M
                            chrm_tu_bits += tu_bits;
7540
1.10M
                            i4_bits_cb += tu_bits;
7541
7542
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7543
1.10M
                            if(0 != cbf)
7544
611k
                            {
7545
611k
                                COPY_CABAC_STATES(
7546
611k
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7547
611k
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7548
611k
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7549
611k
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7550
611k
                                    IHEVC_CAB_CTXT_END);
7551
611k
                            }
7552
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7553
493k
                            else
7554
493k
                            {
7555
493k
                                COPY_CABAC_STATES(
7556
493k
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7557
493k
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7558
493k
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7559
493k
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7560
493k
                                    IHEVC_CAB_CTXT_END);
7561
493k
                            }
7562
7563
                            /* If Intra and TU=CU/2, need recon for next TUs */
7564
1.10M
                            if(calc_recon)
7565
80.9k
                            {
7566
80.9k
                                ihevce_chroma_it_recon_fxn(
7567
80.9k
                                    ps_ctxt,
7568
80.9k
                                    pi2_cur_deq_data,
7569
80.9k
                                    deq_data_strd,
7570
80.9k
                                    pu1_cur_pred,
7571
80.9k
                                    pred_strd,
7572
80.9k
                                    pu1_cur_recon,
7573
80.9k
                                    i4_recon_stride,
7574
80.9k
                                    (pu1_ecd_data + total_bytes_offset),
7575
80.9k
                                    trans_size,
7576
80.9k
                                    cbf,
7577
80.9k
                                    zero_cols,
7578
80.9k
                                    zero_rows,
7579
80.9k
                                    U_PLANE);
7580
7581
80.9k
                                ps_recon_datastore
7582
80.9k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7583
80.9k
                                                                        [i4_subtu_idx] = 0;
7584
80.9k
                            }
7585
1.02M
                            else
7586
1.02M
                            {
7587
1.02M
                                ps_recon_datastore
7588
1.02M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7589
1.02M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7590
1.02M
                            }
7591
1.10M
                        }
7592
63.9k
                        else
7593
63.9k
                        {
7594
                            /* num bytes is set to 0 */
7595
63.9k
                            num_bytes = 0;
7596
7597
                            /* cbf is returned as 0 */
7598
63.9k
                            cbf = 0;
7599
7600
63.9k
                            curr_cb_cod_cost = trans_ssd_u =
7601
7602
63.9k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7603
63.9k
                                    pu1_cur_pred,
7604
63.9k
                                    pu1_cur_src,
7605
63.9k
                                    pred_strd,
7606
63.9k
                                    chrm_src_stride,
7607
63.9k
                                    trans_size,
7608
63.9k
                                    trans_size,
7609
63.9k
                                    U_PLANE);
7610
7611
63.9k
                            if(u1_compute_spatial_ssd)
7612
19.6k
                            {
7613
                                /* buffer copy fromp pred to recon */
7614
7615
19.6k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7616
19.6k
                                    pu1_cur_pred,
7617
19.6k
                                    pred_strd,
7618
19.6k
                                    pu1_cur_recon,
7619
19.6k
                                    i4_recon_stride,
7620
19.6k
                                    trans_size,
7621
19.6k
                                    trans_size,
7622
19.6k
                                    U_PLANE);
7623
7624
19.6k
                                ps_recon_datastore
7625
19.6k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7626
19.6k
                                                                        [i4_subtu_idx] = 0;
7627
19.6k
                            }
7628
7629
63.9k
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7630
0
                            {
7631
0
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7632
0
                                    pu1_cur_src,
7633
0
                                    chrm_src_stride,
7634
0
                                    pu1_cur_pred,
7635
0
                                    pred_strd,
7636
0
                                    trans_ssd_u,
7637
0
                                    i4_alpha_stim_multiplier,
7638
0
                                    trans_size,
7639
0
                                    0,
7640
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7641
0
                                    U_PLANE);
7642
0
                            }
7643
7644
63.9k
#if ENABLE_INTER_ZCU_COST
7645
#if !WEIGH_CHROMA_COST
7646
                            /* cbf = 0, accumulate cu not coded cost */
7647
                            ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
7648
#else
7649
                            /* cbf = 0, accumulate cu not coded cost */
7650
7651
63.9k
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7652
63.9k
                                (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7653
63.9k
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7654
63.9k
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7655
63.9k
#endif
7656
63.9k
#endif
7657
63.9k
                        }
7658
7659
#if !WEIGH_CHROMA_COST
7660
                        curr_rdopt_cost += curr_cb_cod_cost;
7661
#else
7662
1.16M
                        curr_rdopt_cost +=
7663
1.16M
                            ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7664
1.16M
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7665
1.16M
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7666
1.16M
#endif
7667
1.16M
                        chrm_cod_cost += curr_cb_cod_cost;
7668
1.16M
                        i8_ssd_cb += trans_ssd_u;
7669
7670
1.16M
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7671
1.16M
                        {
7672
                            /* Early exit : If the current running cost exceeds
7673
                            the prev. best mode cost, break */
7674
1.16M
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7675
106k
                            {
7676
106k
                                u1_is_early_exit_condition_satisfied = 1;
7677
106k
                                break;
7678
106k
                            }
7679
1.16M
                        }
7680
7681
                        /* inter cu is coded if any of the tu is coded in it */
7682
1.06M
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7683
7684
                        /* update CB related params */
7685
1.06M
                        ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
7686
1.06M
                            total_bytes_offset + init_bytes_offset;
7687
7688
1.06M
                        if(0 == i4_subtu_idx)
7689
1.06M
                        {
7690
1.06M
                            ps_tu->s_tu.b1_cb_cbf = cbf;
7691
1.06M
                        }
7692
0
                        else
7693
0
                        {
7694
0
                            ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
7695
0
                        }
7696
7697
1.06M
                        total_bytes_offset += num_bytes;
7698
7699
1.06M
                        ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
7700
1.06M
                        ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
7701
1.06M
                        ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
7702
7703
                        /* recon loop is done for non skip cases */
7704
1.06M
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7705
1.03M
                        {
7706
1.03M
                            WORD32 tu_bits;
7707
7708
1.03M
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7709
1.03M
                                ps_ctxt,
7710
1.03M
                                pu1_cur_pred,
7711
1.03M
                                pred_strd,
7712
1.03M
                                pu1_cur_src,
7713
1.03M
                                chrm_src_stride,
7714
1.03M
                                pi2_cur_deq_data + trans_size,
7715
1.03M
                                deq_data_strd,
7716
1.03M
                                pu1_cur_recon,
7717
1.03M
                                i4_recon_stride,
7718
1.03M
                                pu1_ecd_data + total_bytes_offset,
7719
1.03M
                                ps_ctxt->au1_cu_csbf,
7720
1.03M
                                ps_ctxt->i4_cu_csbf_strd,
7721
1.03M
                                trans_size,
7722
1.03M
                                scan_idx,
7723
1.03M
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7724
1.03M
                                &num_bytes,
7725
1.03M
                                &tu_bits,
7726
1.03M
                                &zero_cols,
7727
1.03M
                                &zero_rows,
7728
1.03M
                                &u1_is_recon_available,
7729
1.03M
                                i4_perform_sbh,
7730
1.03M
                                i4_perform_rdoq,
7731
1.03M
                                &trans_ssd_v,
7732
1.03M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7733
1.03M
                                i4_alpha_stim_multiplier,
7734
1.03M
                                u1_is_cu_noisy,
7735
1.03M
#endif
7736
1.03M
                                ps_best_cu_prms->u1_skip_flag,
7737
1.03M
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7738
1.03M
                                V_PLANE);
7739
7740
1.03M
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7741
224k
                            {
7742
224k
                                ps_recon_datastore
7743
224k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7744
224k
                                                                        [i4_subtu_idx] = 0;
7745
224k
                            }
7746
811k
                            else
7747
811k
                            {
7748
811k
                                ps_recon_datastore
7749
811k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7750
811k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7751
811k
                            }
7752
7753
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7754
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7755
                            {
7756
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7757
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7758
                                    pu1_cur_src,
7759
                                    chrm_src_stride,
7760
                                    pu1_cur_pred,
7761
                                    pred_strd,
7762
                                    trans_ssd_v,
7763
                                    i4_alpha_stim_multiplier,
7764
                                    trans_size,
7765
                                    0,
7766
                                    ps_ctxt->u1_enable_psyRDOPT,
7767
                                    V_PLANE);
7768
#else
7769
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7770
                                {
7771
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7772
                                        pu1_cur_src,
7773
                                        chrm_src_stride,
7774
                                        pu1_cur_recon,
7775
                                        i4_recon_stride,
7776
                                        trans_ssd_v,
7777
                                        i4_alpha_stim_multiplier,
7778
                                        trans_size,
7779
                                        0,
7780
                                        ps_ctxt->u1_enable_psyRDOPT,
7781
                                        V_PLANE);
7782
                                }
7783
                                else
7784
                                {
7785
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7786
                                        pu1_cur_src,
7787
                                        chrm_src_stride,
7788
                                        pu1_cur_pred,
7789
                                        pred_strd,
7790
                                        trans_ssd_v,
7791
                                        i4_alpha_stim_multiplier,
7792
                                        trans_size,
7793
                                        0,
7794
                                        ps_ctxt->u1_enable_psyRDOPT,
7795
                                        V_PLANE);
7796
                                }
7797
#endif
7798
                            }
7799
#endif
7800
7801
1.03M
                            curr_cr_cod_cost =
7802
1.03M
                                trans_ssd_v +
7803
1.03M
                                COMPUTE_RATE_COST_CLIP30(
7804
1.03M
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7805
1.03M
                            chrm_tu_bits += tu_bits;
7806
1.03M
                            i4_bits_cr += tu_bits;
7807
7808
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7809
1.03M
                            if(0 != cbf)
7810
531k
                            {
7811
531k
                                COPY_CABAC_STATES(
7812
531k
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7813
531k
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7814
531k
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7815
531k
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7816
531k
                                    IHEVC_CAB_CTXT_END);
7817
531k
                            }
7818
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7819
504k
                            else
7820
504k
                            {
7821
504k
                                COPY_CABAC_STATES(
7822
504k
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7823
504k
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7824
504k
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7825
504k
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7826
504k
                                    IHEVC_CAB_CTXT_END);
7827
504k
                            }
7828
7829
                            /* If Intra and TU=CU/2, need recon for next TUs */
7830
1.03M
                            if(calc_recon)
7831
76.6k
                            {
7832
76.6k
                                ihevce_chroma_it_recon_fxn(
7833
76.6k
                                    ps_ctxt,
7834
76.6k
                                    (pi2_cur_deq_data + trans_size),
7835
76.6k
                                    deq_data_strd,
7836
76.6k
                                    pu1_cur_pred,
7837
76.6k
                                    pred_strd,
7838
76.6k
                                    pu1_cur_recon,
7839
76.6k
                                    i4_recon_stride,
7840
76.6k
                                    (pu1_ecd_data + total_bytes_offset),
7841
76.6k
                                    trans_size,
7842
76.6k
                                    cbf,
7843
76.6k
                                    zero_cols,
7844
76.6k
                                    zero_rows,
7845
76.6k
                                    V_PLANE);
7846
7847
76.6k
                                ps_recon_datastore
7848
76.6k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7849
76.6k
                                                                        [i4_subtu_idx] = 0;
7850
76.6k
                            }
7851
959k
                            else
7852
959k
                            {
7853
959k
                                ps_recon_datastore
7854
959k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7855
959k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7856
959k
                            }
7857
1.03M
                        }
7858
26.0k
                        else
7859
26.0k
                        {
7860
                            /* num bytes is set to 0 */
7861
26.0k
                            num_bytes = 0;
7862
7863
                            /* cbf is returned as 0 */
7864
26.0k
                            cbf = 0;
7865
7866
26.0k
                            curr_cr_cod_cost = trans_ssd_v =
7867
7868
26.0k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7869
26.0k
                                    pu1_cur_pred,
7870
26.0k
                                    pu1_cur_src,
7871
26.0k
                                    pred_strd,
7872
26.0k
                                    chrm_src_stride,
7873
26.0k
                                    trans_size,
7874
26.0k
                                    trans_size,
7875
26.0k
                                    V_PLANE);
7876
7877
26.0k
                            if(u1_compute_spatial_ssd)
7878
10.5k
                            {
7879
                                /* buffer copy fromp pred to recon */
7880
10.5k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7881
10.5k
                                    pu1_cur_pred,
7882
10.5k
                                    pred_strd,
7883
10.5k
                                    pu1_cur_recon,
7884
10.5k
                                    i4_recon_stride,
7885
10.5k
                                    trans_size,
7886
10.5k
                                    trans_size,
7887
10.5k
                                    V_PLANE);
7888
7889
10.5k
                                ps_recon_datastore
7890
10.5k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7891
10.5k
                                                                        [i4_subtu_idx] = 0;
7892
10.5k
                            }
7893
7894
26.0k
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7895
0
                            {
7896
0
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7897
0
                                    pu1_cur_src,
7898
0
                                    chrm_src_stride,
7899
0
                                    pu1_cur_pred,
7900
0
                                    pred_strd,
7901
0
                                    trans_ssd_v,
7902
0
                                    i4_alpha_stim_multiplier,
7903
0
                                    trans_size,
7904
0
                                    0,
7905
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7906
0
                                    V_PLANE);
7907
0
                            }
7908
7909
26.0k
#if ENABLE_INTER_ZCU_COST
7910
#if !WEIGH_CHROMA_COST
7911
                            /* cbf = 0, accumulate cu not coded cost */
7912
                            ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
7913
#else
7914
                            /* cbf = 0, accumulate cu not coded cost */
7915
7916
26.0k
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7917
26.0k
                                (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7918
26.0k
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7919
26.0k
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7920
26.0k
#endif
7921
26.0k
#endif
7922
26.0k
                        }
7923
7924
#if !WEIGH_CHROMA_COST
7925
                        curr_rdopt_cost += curr_cr_cod_cost;
7926
#else
7927
1.06M
                        curr_rdopt_cost +=
7928
1.06M
                            ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7929
1.06M
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7930
1.06M
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7931
1.06M
#endif
7932
7933
1.06M
                        chrm_cod_cost += curr_cr_cod_cost;
7934
1.06M
                        i8_ssd_cr += trans_ssd_v;
7935
7936
1.06M
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7937
1.06M
                        {
7938
                            /* Early exit : If the current running cost exceeds
7939
                            the prev. best mode cost, break */
7940
1.06M
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7941
148k
                            {
7942
148k
                                u1_is_early_exit_condition_satisfied = 1;
7943
148k
                                break;
7944
148k
                            }
7945
1.06M
                        }
7946
7947
                        /* inter cu is coded if any of the tu is coded in it */
7948
913k
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7949
7950
                        /* update CR related params */
7951
913k
                        ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
7952
913k
                            total_bytes_offset + init_bytes_offset;
7953
7954
913k
                        if(0 == i4_subtu_idx)
7955
913k
                        {
7956
913k
                            ps_tu->s_tu.b1_cr_cbf = cbf;
7957
913k
                        }
7958
0
                        else
7959
0
                        {
7960
0
                            ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
7961
0
                        }
7962
7963
913k
                        total_bytes_offset += num_bytes;
7964
7965
913k
                        ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
7966
913k
                        ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
7967
913k
                        ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
7968
913k
                    }
7969
195k
                    else
7970
195k
                    {
7971
195k
                        ps_recon_datastore
7972
195k
                            ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
7973
195k
                            UCHAR_MAX;
7974
195k
                        ps_recon_datastore
7975
195k
                            ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
7976
195k
                            UCHAR_MAX;
7977
195k
                    }
7978
1.36M
                }
7979
7980
1.36M
                if(u1_is_early_exit_condition_satisfied)
7981
255k
                {
7982
255k
                    break;
7983
255k
                }
7984
7985
                /* loop increments */
7986
1.10M
                ps_tu++;
7987
1.10M
                ps_tu_temp_prms++;
7988
1.10M
            }
7989
7990
            /* Signal as luma mode. HIGH_QUALITY may update it */
7991
960k
            ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
7992
7993
            /* modify the cost chrm_cod_cost */
7994
960k
            if(ps_ctxt->u1_enable_psyRDOPT)
7995
0
            {
7996
0
                UWORD8 *pu1_recon_cu;
7997
0
                WORD32 recon_stride;
7998
0
                WORD32 curr_pos_x;
7999
0
                WORD32 curr_pos_y;
8000
0
                WORD32 start_index;
8001
0
                WORD32 num_horz_cu_in_ctb;
8002
0
                WORD32 had_block_size;
8003
                /* tODO: sreenivasa ctb size has to be used appropriately */
8004
0
                had_block_size = 8;
8005
0
                num_horz_cu_in_ctb = 2 * 64 / had_block_size;
8006
8007
0
                curr_pos_x = cu_pos_x << 3; /* pel units */
8008
0
                curr_pos_y = cu_pos_y << 3; /* pel units */
8009
0
                recon_stride = i4_recon_stride;
8010
0
                pu1_recon_cu = pu1_recon;
8011
8012
                /* start index to index the source satd of curr cu int he current ctb*/
8013
0
                start_index = 2 * (curr_pos_x / had_block_size) +
8014
0
                              (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
8015
8016
0
                {
8017
0
                    chrm_cod_cost += ihevce_psy_rd_cost_croma(
8018
0
                        ps_ctxt->ai4_source_chroma_satd,
8019
0
                        pu1_recon,
8020
0
                        recon_stride,
8021
0
                        1,  //
8022
0
                        cu_size,
8023
0
                        0,  // pic type
8024
0
                        0,  //layer id
8025
0
                        ps_ctxt->i4_satd_lamda,  // lambda
8026
0
                        start_index,
8027
0
                        ps_ctxt->u1_is_input_data_hbd,  // 8 bit
8028
0
                        ps_ctxt->u1_chroma_array_type,
8029
0
                        &ps_ctxt->s_cmn_opt_func
8030
8031
0
                    );  // chroma subsampling 420
8032
0
                }
8033
0
            }
8034
960k
        }
8035
167k
        else
8036
167k
        {
8037
167k
            u1_is_mode_eq_chroma_satd_mode = 1;
8038
167k
            chrm_cod_cost = MAX_COST_64;
8039
167k
        }
8040
8041
        /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
8042
1.12M
        if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
8043
896k
           (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
8044
693k
        {
8045
693k
            if(64 == cu_size)
8046
3.03k
            {
8047
3.03k
                ASSERT(TU_EQ_CU != func_proc_mode);
8048
3.03k
            }
8049
8050
693k
            if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
8051
693k
                   .i8_chroma_best_rdopt < chrm_cod_cost)
8052
332k
            {
8053
332k
                UWORD8 *pu1_src;
8054
332k
                UWORD8 *pu1_ecd_data_src_cb;
8055
332k
                UWORD8 *pu1_ecd_data_src_cr;
8056
8057
332k
                chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
8058
332k
                    &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
8059
8060
332k
                UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
8061
332k
                WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
8062
332k
                WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
8063
8064
332k
                pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
8065
332k
                chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
8066
332k
                chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
8067
332k
                chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
8068
8069
332k
                if(u1_is_mode_eq_chroma_satd_mode)
8070
167k
                {
8071
167k
                    chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
8072
167k
                }
8073
8074
                /*Resetting total_num_bytes_to 0*/
8075
332k
                total_bytes_offset = 0;
8076
8077
                /* Update the CABAC state corresponding to chroma only */
8078
                /* Chroma Cbf */
8079
332k
                memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
8080
                /* Chroma transform skip */
8081
332k
                memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
8082
                /* Chroma last coeff x prefix */
8083
332k
                memcpy(
8084
332k
                    pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
8085
332k
                    pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
8086
332k
                    3);
8087
                /* Chroma last coeff y prefix */
8088
332k
                memcpy(
8089
332k
                    pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
8090
332k
                    pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
8091
332k
                    3);
8092
                /* Chroma csbf */
8093
332k
                memcpy(
8094
332k
                    pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8095
332k
                    pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8096
332k
                    2);
8097
                /* Chroma sig coeff flags */
8098
332k
                memcpy(
8099
332k
                    pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
8100
                /* Chroma absgt1 flags */
8101
332k
                memcpy(
8102
332k
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8103
332k
                    pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8104
332k
                    8);
8105
                /* Chroma absgt2 flags */
8106
332k
                memcpy(
8107
332k
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8108
332k
                    pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8109
332k
                    2);
8110
8111
332k
                ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
8112
332k
                ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8113
8114
                /* update to luma decision as we update chroma in final mode */
8115
332k
                ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
8116
8117
725k
                for(ctr = 0; ctr < u1_num_tus; ctr++)
8118
393k
                {
8119
787k
                    for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
8120
393k
                    {
8121
393k
                        WORD32 cbf;
8122
393k
                        WORD32 num_bytes;
8123
8124
393k
                        pu1_ecd_data_src_cb =
8125
393k
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
8126
393k
                        pu1_ecd_data_src_cr =
8127
393k
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
8128
8129
                        /* check if chroma present flag is set */
8130
393k
                        if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
8131
393k
                        {
8132
393k
                            UWORD8 *pu1_cur_pred_dest;
8133
393k
                            UWORD8 *pu1_cur_pred_src;
8134
393k
                            WORD32 pred_src_strd;
8135
393k
                            WORD16 *pi2_cur_deq_data_dest;
8136
393k
                            WORD16 *pi2_cur_deq_data_src_cb;
8137
393k
                            WORD16 *pi2_cur_deq_data_src_cr;
8138
393k
                            WORD32 deq_src_strd;
8139
8140
393k
                            WORD32 curr_pos_x, curr_pos_y;
8141
8142
393k
                            trans_size = ps_tu->s_tu.b3_size;
8143
393k
                            trans_size = (1 << (trans_size + 1)); /* in chroma units */
8144
8145
                            /*Deriving stride values*/
8146
393k
                            pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
8147
393k
                            deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
8148
8149
                            /* since 2x2 transform is not allowed for chroma*/
8150
393k
                            if(2 == trans_size)
8151
78.9k
                            {
8152
78.9k
                                trans_size = 4;
8153
78.9k
                            }
8154
8155
                            /* get the current tu posx and posy w.r.t to cu */
8156
393k
                            curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
8157
393k
                            curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
8158
393k
                                         (i4_subtu_idx * trans_size);
8159
8160
                            /* 420sp case only vertical height will be half */
8161
393k
                            if(0 == u1_is_422)
8162
393k
                            {
8163
393k
                                curr_pos_y >>= 1;
8164
393k
                            }
8165
8166
                            /* increment the pointers to start of current TU  */
8167
393k
                            pu1_cur_pred_src =
8168
393k
                                ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
8169
393k
                            pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
8170
393k
                            pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
8171
393k
                            pu1_cur_pred_dest += (curr_pos_y * pred_strd);
8172
8173
393k
                            pi2_cur_deq_data_src_cb =
8174
393k
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
8175
393k
                            pi2_cur_deq_data_src_cr =
8176
393k
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
8177
393k
                            pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
8178
393k
                            pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
8179
393k
                            pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
8180
393k
                            pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
8181
8182
                            /*Overwriting deq data with that belonging to the winning special mode
8183
                            (luma mode !=  chroma mode)
8184
                            ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
8185
                            correspondingly manipulate to copy WORD16 data*/
8186
8187
393k
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8188
393k
                                (UWORD8 *)pi2_cur_deq_data_dest,
8189
393k
                                (deq_data_strd << 1),
8190
393k
                                (UWORD8 *)pi2_cur_deq_data_src_cb,
8191
393k
                                (deq_src_strd << 1),
8192
393k
                                (trans_size << 1),
8193
393k
                                trans_size);
8194
8195
393k
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8196
393k
                                (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
8197
393k
                                (deq_data_strd << 1),
8198
393k
                                (UWORD8 *)pi2_cur_deq_data_src_cr,
8199
393k
                                (deq_src_strd << 1),
8200
393k
                                (trans_size << 1),
8201
393k
                                trans_size);
8202
8203
                            /*Overwriting pred data with that belonging to the winning special mode
8204
                            (luma mode !=  chroma mode)*/
8205
8206
393k
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8207
393k
                                pu1_cur_pred_dest,
8208
393k
                                pred_strd,
8209
393k
                                pu1_cur_pred_src,
8210
393k
                                pred_src_strd,
8211
393k
                                (trans_size << 1),
8212
393k
                                trans_size);
8213
8214
393k
                            num_bytes = ps_chr_intra_satd_ctxt
8215
393k
                                            ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
8216
393k
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
8217
                            /* inter cu is coded if any of the tu is coded in it */
8218
393k
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8219
8220
                            /* update CB related params */
8221
393k
                            ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
8222
393k
                                total_bytes_offset + init_bytes_offset;
8223
8224
393k
                            if(0 == i4_subtu_idx)
8225
393k
                            {
8226
393k
                                ps_tu->s_tu.b1_cb_cbf = cbf;
8227
393k
                            }
8228
0
                            else
8229
0
                            {
8230
0
                                ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
8231
0
                            }
8232
8233
                            /*Overwriting the cb ecd data corresponding to the special mode*/
8234
393k
                            if(0 != num_bytes)
8235
212k
                            {
8236
212k
                                memcpy(
8237
212k
                                    (pu1_ecd_data + total_bytes_offset),
8238
212k
                                    pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
8239
212k
                                    num_bytes);
8240
212k
                            }
8241
8242
393k
                            total_bytes_offset += num_bytes;
8243
393k
                            ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
8244
393k
                            ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
8245
8246
393k
                            num_bytes = ps_chr_intra_satd_ctxt
8247
393k
                                            ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
8248
393k
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
8249
                            /* inter cu is coded if any of the tu is coded in it */
8250
393k
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8251
8252
                            /*Overwriting the cr ecd data corresponding to the special mode*/
8253
393k
                            if(0 != num_bytes)
8254
193k
                            {
8255
193k
                                memcpy(
8256
193k
                                    (pu1_ecd_data + total_bytes_offset),
8257
193k
                                    pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
8258
193k
                                    num_bytes);
8259
193k
                            }
8260
8261
                            /* update CR related params */
8262
393k
                            ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
8263
393k
                                total_bytes_offset + init_bytes_offset;
8264
8265
393k
                            if(0 == i4_subtu_idx)
8266
393k
                            {
8267
393k
                                ps_tu->s_tu.b1_cr_cbf = cbf;
8268
393k
                            }
8269
0
                            else
8270
0
                            {
8271
0
                                ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
8272
0
                            }
8273
8274
393k
                            total_bytes_offset += num_bytes;
8275
393k
                            ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
8276
8277
                            /*Updating zero rows and zero cols*/
8278
393k
                            ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
8279
393k
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
8280
393k
                            ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
8281
393k
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
8282
393k
                            ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
8283
393k
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
8284
393k
                            ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
8285
393k
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
8286
8287
393k
                            ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
8288
8289
393k
                            if((u1_num_tus > 1) &&
8290
82.1k
                               ps_recon_datastore->au1_is_chromaRecon_available[2])
8291
82.1k
                            {
8292
82.1k
                                ps_recon_datastore
8293
82.1k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8294
82.1k
                                                                        [i4_subtu_idx] = 2;
8295
82.1k
                                ps_recon_datastore
8296
82.1k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8297
82.1k
                                                                        [i4_subtu_idx] = 2;
8298
82.1k
                            }
8299
311k
                            else if(
8300
311k
                                (1 == u1_num_tus) &&
8301
311k
                                ps_recon_datastore->au1_is_chromaRecon_available[1])
8302
71.3k
                            {
8303
71.3k
                                ps_recon_datastore
8304
71.3k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8305
71.3k
                                                                        [i4_subtu_idx] = 1;
8306
71.3k
                                ps_recon_datastore
8307
71.3k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8308
71.3k
                                                                        [i4_subtu_idx] = 1;
8309
71.3k
                            }
8310
240k
                            else
8311
240k
                            {
8312
240k
                                ps_recon_datastore
8313
240k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8314
240k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8315
240k
                                ps_recon_datastore
8316
240k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8317
240k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8318
240k
                            }
8319
393k
                        }
8320
393k
                    }
8321
8322
                    /* loop increments */
8323
393k
                    ps_tu++;
8324
393k
                    ps_tu_temp_prms++;
8325
393k
                }
8326
332k
            }
8327
8328
693k
            if(!u1_is_422)
8329
693k
            {
8330
693k
                if(chrm_pred_mode == luma_pred_mode)
8331
528k
                {
8332
528k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8333
528k
                }
8334
164k
                else if(chrm_pred_mode == 0)
8335
31.7k
                {
8336
31.7k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8337
31.7k
                }
8338
133k
                else if(chrm_pred_mode == 1)
8339
50.2k
                {
8340
50.2k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8341
50.2k
                }
8342
82.9k
                else if(chrm_pred_mode == 10)
8343
73.7k
                {
8344
73.7k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8345
73.7k
                }
8346
9.22k
                else if(chrm_pred_mode == 26)
8347
9.22k
                {
8348
9.22k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8349
9.22k
                }
8350
0
                else
8351
0
                {
8352
0
                    ASSERT(0); /*Should not come here*/
8353
0
                }
8354
693k
            }
8355
0
            else
8356
0
            {
8357
0
                if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
8358
0
                {
8359
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8360
0
                }
8361
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
8362
0
                {
8363
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8364
0
                }
8365
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
8366
0
                {
8367
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8368
0
                }
8369
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
8370
0
                {
8371
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8372
0
                }
8373
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
8374
0
                {
8375
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8376
0
                }
8377
0
                else
8378
0
                {
8379
0
                    ASSERT(0); /*Should not come here*/
8380
0
                }
8381
0
            }
8382
693k
        }
8383
8384
        /* Store the actual chroma mode */
8385
1.12M
        ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
8386
1.12M
    }
8387
8388
    /* update the total bytes produced */
8389
1.12M
    ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
8390
8391
    /* store the final chrm bits accumulated */
8392
1.12M
    *pi4_chrm_tu_bits = chrm_tu_bits;
8393
8394
1.12M
    return (chrm_cod_cost);
8395
1.12M
}
8396
8397
/*!
8398
******************************************************************************
8399
* \if Function name : ihevce_final_rdopt_mode_prcs \endif
8400
*
8401
* \brief
8402
*    Final RDOPT mode process function. Performs Recon computation for the
8403
*    final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
8404
*
8405
* \param[in] pv_ctxt : pointer to enc_loop module
8406
* \param[in] ps_prms : pointer to struct containing requisite parameters
8407
*
8408
* \return
8409
*    None
8410
*
8411
* \author
8412
*  Ittiam
8413
*
8414
*****************************************************************************
8415
*/
8416
void ihevce_final_rdopt_mode_prcs(
8417
    ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
8418
369k
{
8419
369k
    enc_loop_cu_final_prms_t *ps_best_cu_prms;
8420
369k
    tu_enc_loop_out_t *ps_tu_enc_loop;
8421
369k
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
8422
369k
    nbr_avail_flags_t s_nbr;
8423
369k
    recon_datastore_t *ps_recon_datastore;
8424
8425
369k
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
8426
369k
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
8427
369k
    ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
8428
8429
369k
    WORD32 num_tu_in_cu;
8430
369k
    LWORD64 rd_opt_cost;
8431
369k
    WORD32 ctr;
8432
369k
    WORD32 i4_subtu_idx;
8433
369k
    WORD32 cu_size;
8434
369k
    WORD32 cu_pos_x, cu_pos_y;
8435
369k
    WORD32 chrm_present_flag = 1;
8436
369k
    WORD32 num_bytes, total_bytes = 0;
8437
369k
    WORD32 chrm_ctr = 0;
8438
369k
    WORD32 u1_is_cu_coded;
8439
369k
    UWORD8 *pu1_old_ecd_data;
8440
369k
    UWORD8 *pu1_chrm_old_ecd_data;
8441
369k
    UWORD8 *pu1_cur_pred;
8442
369k
    WORD16 *pi2_deq_data;
8443
369k
    WORD16 *pi2_chrm_deq_data;
8444
369k
    WORD16 *pi2_cur_deq_data;
8445
369k
    WORD16 *pi2_cur_deq_data_chrm;
8446
369k
    UWORD8 *pu1_cur_luma_recon;
8447
369k
    UWORD8 *pu1_cur_chroma_recon;
8448
369k
    UWORD8 *pu1_cur_src;
8449
369k
    UWORD8 *pu1_cur_src_chrm;
8450
369k
    UWORD8 *pu1_cur_pred_chrm;
8451
369k
    UWORD8 *pu1_intra_pred_mode;
8452
369k
    UWORD32 *pu4_nbr_flags;
8453
369k
    LWORD64 i8_ssd;
8454
8455
369k
    cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
8456
369k
    cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
8457
369k
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
8458
8459
369k
    WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
8460
369k
    WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
8461
369k
    UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
8462
369k
    WORD32 src_strd = ps_prms->src_strd;
8463
369k
    UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
8464
369k
    WORD32 pred_strd = ps_prms->pred_strd;
8465
369k
    UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
8466
369k
    WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
8467
369k
    UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
8468
369k
    UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
8469
369k
    WORD32 csbf_strd = ps_prms->csbf_strd;
8470
369k
    UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
8471
369k
    WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
8472
369k
    UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
8473
369k
    WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
8474
369k
    UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
8475
369k
    UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
8476
369k
    UWORD8 u1_cu_size = ps_prms->u1_cu_size;
8477
369k
    WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
8478
369k
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
8479
369k
    UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
8480
    /* Get the Chroma pointer and parameters */
8481
369k
    UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
8482
369k
    WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
8483
369k
    UWORD8 u1_compute_spatial_ssd_luma = 0;
8484
369k
    UWORD8 u1_compute_spatial_ssd_chroma = 0;
8485
    /* Get the pointer for function selector */
8486
369k
    ihevc_intra_pred_luma_ref_substitution_fptr =
8487
369k
        ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
8488
8489
369k
    ihevc_intra_pred_ref_filtering_fptr =
8490
369k
        ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
8491
8492
369k
    ihevc_intra_pred_chroma_ref_substitution_fptr =
8493
369k
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
8494
8495
    /* Get the best CU parameters */
8496
369k
    ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
8497
369k
    num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
8498
369k
    cu_size = ps_best_cu_prms->u1_cu_size;
8499
369k
    cu_pos_x = u1_cu_pos_x;
8500
369k
    cu_pos_y = u1_cu_pos_y;
8501
369k
    pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
8502
369k
    pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
8503
369k
    ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
8504
8505
    /* get the first TU pointer */
8506
369k
    ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8507
    /* get the first TU only enc_loop prms pointer */
8508
369k
    ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8509
    /*modify quant related param in ctxt based on current cu qp*/
8510
369k
    if((ps_ctxt->i1_cu_qp_delta_enable))
8511
131k
    {
8512
        /*recompute quant related param at every cu level*/
8513
131k
        ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
8514
8515
        /* get frame level lambda params */
8516
131k
        ihevce_get_cl_cu_lambda_prms(
8517
131k
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
8518
131k
    }
8519
8520
369k
    ps_best_cu_prms->i8_cu_ssd = 0;
8521
369k
    ps_best_cu_prms->u4_cu_open_intra_sad = 0;
8522
8523
    /* For skip case : Set TU_size = CU_size and make cbf = 0
8524
    so that same TU loop can be used for all modes */
8525
369k
    if(PRED_MODE_SKIP == packed_pred_mode)
8526
6.95k
    {
8527
13.9k
        for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8528
6.97k
        {
8529
6.97k
            ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
8530
8531
6.97k
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
8532
8533
6.97k
            ps_tu_enc_loop++;
8534
6.97k
            ps_tu_enc_loop_temp_prms++;
8535
6.97k
        }
8536
8537
        /* go back to the first TU pointer */
8538
6.95k
        ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8539
6.95k
        ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8540
6.95k
    }
8541
    /**   For inter case, pred calculation is outside the loop     **/
8542
369k
    if(PRED_MODE_INTRA != packed_pred_mode)
8543
72.5k
    {
8544
        /**------------- Compute pred data if required --------------**/
8545
72.5k
        if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8546
0
        {
8547
0
            nbr_4x4_t *ps_topleft_nbr_4x4;
8548
0
            nbr_4x4_t *ps_left_nbr_4x4;
8549
0
            nbr_4x4_t *ps_top_nbr_4x4;
8550
0
            WORD32 nbr_4x4_left_strd;
8551
8552
0
            ps_best_inter_cand->pu1_pred_data = pu1_pred;
8553
0
            ps_best_inter_cand->i4_pred_data_stride = pred_strd;
8554
8555
            /* Get the CU nbr information */
8556
0
            ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
8557
0
            ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
8558
0
            ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
8559
0
            nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
8560
8561
            /* MVP ,MVD calc and Motion compensation */
8562
0
            rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
8563
0
                ps_ctxt,
8564
0
                ps_best_inter_cand,
8565
0
                u1_cu_size,
8566
0
                cu_pos_x,
8567
0
                cu_pos_y,
8568
0
                ps_left_nbr_4x4,
8569
0
                ps_top_nbr_4x4,
8570
0
                ps_topleft_nbr_4x4,
8571
0
                nbr_4x4_left_strd,
8572
0
                rd_opt_best_idx);
8573
0
        }
8574
8575
        /** ------ Motion Compensation for Chroma -------- **/
8576
72.5k
        if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
8577
28.0k
        {
8578
28.0k
            UWORD8 *pu1_cur_pred;
8579
28.0k
            pu1_cur_pred = pu1_pred_chrm;
8580
8581
            /* run a loop over all the partitons in cu */
8582
56.8k
            for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
8583
28.7k
            {
8584
28.7k
                pu_t *ps_pu;
8585
28.7k
                WORD32 inter_pu_wd, inter_pu_ht;
8586
8587
28.7k
                ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
8588
8589
                /* IF AMP then each partitions can have diff wd ht */
8590
28.7k
                inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
8591
28.7k
                inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
8592
28.7k
                inter_pu_ht <<= u1_is_422;
8593
                /* chroma mc func */
8594
28.7k
                ihevce_chroma_inter_pred_pu(
8595
28.7k
                    &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
8596
28.7k
                if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
8597
1.45k
                {
8598
                    /* 2Nx__ partion case */
8599
1.45k
                    if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
8600
864
                    {
8601
864
                        pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
8602
864
                    }
8603
                    /* __x2N partion case */
8604
1.45k
                    if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
8605
588
                    {
8606
588
                        pu1_cur_pred += inter_pu_wd;
8607
588
                    }
8608
1.45k
                }
8609
28.7k
            }
8610
28.0k
        }
8611
72.5k
    }
8612
369k
    pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
8613
369k
    pi2_chrm_deq_data =
8614
369k
        &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
8615
369k
    pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
8616
369k
    pu1_chrm_old_ecd_data =
8617
369k
        &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
8618
8619
    /* default value for cu coded flag */
8620
369k
    u1_is_cu_coded = 0;
8621
8622
    /* If we are re-computing coeff, set sad to 0 and start accumulating */
8623
    /* else use the best cand. sad from RDOPT stage                    */
8624
369k
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8625
0
    {
8626
        /*init of ssd of CU accuumulated over all TU*/
8627
0
        ps_best_cu_prms->u4_cu_sad = 0;
8628
8629
        /* reset the luma residual bits */
8630
0
        ps_best_cu_prms->u4_cu_luma_res_bits = 0;
8631
0
    }
8632
8633
369k
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
8634
109k
    {
8635
        /* reset the chroma residual bits */
8636
109k
        ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
8637
109k
    }
8638
8639
369k
    if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
8640
369k
       (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
8641
109k
    {
8642
        /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
8643
        the quantized coefficients might be changed.
8644
        We are copying only those states which correspond to the header from the cabac state
8645
        of the previous CU, because the header is going to be recomputed for this condition*/
8646
109k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
8647
109k
        memcpy(
8648
109k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
8649
109k
            &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
8650
109k
            IHEVC_CAB_COEFFX_PREFIX);
8651
8652
109k
        if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
8653
0
        {
8654
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8655
0
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8656
0
                (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
8657
0
                 IHEVC_CAB_COEFFX_PREFIX),
8658
0
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8659
0
        }
8660
109k
        else
8661
109k
        {
8662
109k
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8663
109k
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8664
109k
                (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8665
109k
                      .s_cabac_ctxt.au1_ctxt_models[0] +
8666
109k
                 IHEVC_CAB_COEFFX_PREFIX),
8667
109k
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8668
109k
        }
8669
109k
        ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
8670
109k
    }
8671
259k
    else
8672
259k
    {
8673
259k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
8674
259k
    }
8675
8676
    /* Zero cbf tool is disabled for intra CUs */
8677
369k
    if(PRED_MODE_INTRA == packed_pred_mode)
8678
296k
    {
8679
#if ENABLE_ZERO_CBF_IN_INTRA
8680
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8681
#else
8682
296k
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8683
296k
#endif
8684
296k
    }
8685
72.5k
    else
8686
72.5k
    {
8687
#if DISABLE_ZERO_ZBF_IN_INTER
8688
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8689
#else
8690
72.5k
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8691
72.5k
#endif
8692
72.5k
    }
8693
8694
    /** Loop for all tu blocks in current cu and do reconstruction **/
8695
1.36M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8696
993k
    {
8697
993k
        tu_t *ps_tu;
8698
993k
        WORD32 trans_size, num_4x4_in_tu;
8699
993k
        WORD32 cbf, zero_rows, zero_cols;
8700
993k
        WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
8701
993k
        WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
8702
993k
        WORD32 luma_pred_mode, chroma_pred_mode = 0;
8703
993k
        UWORD8 au1_is_recon_available[2];
8704
8705
993k
        ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
8706
8707
993k
        u1_compute_spatial_ssd_luma = 0;
8708
993k
        u1_compute_spatial_ssd_chroma = 0;
8709
8710
993k
        trans_size = 1 << (ps_tu->b3_size + 2);
8711
993k
        num_4x4_in_tu = (trans_size >> 2);
8712
993k
        cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
8713
993k
        cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
8714
8715
        /* populate the coeffs scan idx */
8716
993k
        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
8717
8718
        /* get the current pos x and pos y in pixels */
8719
993k
        cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
8720
993k
        cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
8721
8722
        /* Update pointers based on the location */
8723
993k
        pu1_cur_src = pu1_src + cu_pos_x_in_pix;
8724
993k
        pu1_cur_src += (cu_pos_y_in_pix * src_strd);
8725
993k
        pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
8726
993k
        pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
8727
8728
993k
        pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
8729
993k
        pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
8730
8731
993k
        pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
8732
993k
        pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
8733
8734
993k
        pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
8735
993k
        pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
8736
993k
                            (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
8737
8738
993k
        pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
8739
993k
        pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
8740
993k
                             (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
8741
8742
993k
        pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
8743
993k
        pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
8744
993k
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
8745
8746
993k
        pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
8747
993k
        pi2_cur_deq_data_chrm +=
8748
993k
            ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
8749
8750
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
8751
993k
        chrm_present_flag = 1; /* by default chroma present is set to 1*/
8752
8753
993k
        if(4 == trans_size)
8754
721k
        {
8755
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
8756
721k
            if(0 != chrm_ctr)
8757
540k
            {
8758
540k
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
8759
540k
            }
8760
8761
            /* increment the chrm ctr unconditionally */
8762
721k
            chrm_ctr++;
8763
            /* after ctr reached 4 reset it */
8764
721k
            if(4 == chrm_ctr)
8765
180k
            {
8766
180k
                chrm_ctr = 0;
8767
180k
            }
8768
721k
        }
8769
8770
        /**------------- Compute pred data if required --------------**/
8771
993k
        if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
8772
777k
        {
8773
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8774
777k
            luma_pred_mode = *pu1_intra_pred_mode;
8775
8776
777k
            if((ps_ctxt->i4_rc_pass == 1) ||
8777
777k
               (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8778
0
            {
8779
0
                WORD32 nbr_flags;
8780
0
                WORD32 luma_pred_func_idx;
8781
0
                UWORD8 *pu1_left;
8782
0
                UWORD8 *pu1_top;
8783
0
                UWORD8 *pu1_top_left;
8784
0
                WORD32 left_strd;
8785
8786
                /* left cu boundary */
8787
0
                if(0 == cu_pos_x_in_pix)
8788
0
                {
8789
0
                    left_strd = ps_cu_nbr_prms->cu_left_stride;
8790
0
                    pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
8791
0
                }
8792
0
                else
8793
0
                {
8794
0
                    pu1_left = pu1_cur_luma_recon - 1;
8795
0
                    left_strd = recon_luma_strd;
8796
0
                }
8797
8798
                /* top cu boundary */
8799
0
                if(0 == cu_pos_y_in_pix)
8800
0
                {
8801
0
                    pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
8802
0
                }
8803
0
                else
8804
0
                {
8805
0
                    pu1_top = pu1_cur_luma_recon - recon_luma_strd;
8806
0
                }
8807
8808
                /* by default top left is set to cu top left */
8809
0
                pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
8810
8811
                /* top left based on position */
8812
0
                if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
8813
0
                {
8814
0
                    pu1_top_left = pu1_left - left_strd;
8815
0
                }
8816
0
                else if(0 != cu_pos_x_in_pix)
8817
0
                {
8818
0
                    pu1_top_left = pu1_top - 1;
8819
0
                }
8820
8821
                /* get the neighbour availability flags */
8822
0
                nbr_flags = ihevce_get_nbr_intra(
8823
0
                    &s_nbr,
8824
0
                    ps_ctxt->pu1_ctb_nbr_map,
8825
0
                    ps_ctxt->i4_nbr_map_strd,
8826
0
                    cu_pos_x_in_4x4,
8827
0
                    cu_pos_y_in_4x4,
8828
0
                    num_4x4_in_tu);
8829
8830
0
                if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
8831
0
                {
8832
                    /* copy the nbr flags for chroma reuse */
8833
0
                    if(4 != trans_size)
8834
0
                    {
8835
0
                        *pu4_nbr_flags = nbr_flags;
8836
0
                    }
8837
0
                    else if(1 == chrm_present_flag)
8838
0
                    {
8839
                        /* compute the avail flags assuming luma trans is 8x8 */
8840
                        /* get the neighbour availability flags */
8841
0
                        *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8842
0
                            ps_ctxt->pu1_ctb_nbr_map,
8843
0
                            ps_ctxt->i4_nbr_map_strd,
8844
0
                            cu_pos_x_in_4x4,
8845
0
                            cu_pos_y_in_4x4,
8846
0
                            (num_4x4_in_tu << 1),
8847
0
                            (num_4x4_in_tu << 1));
8848
0
                    }
8849
8850
                    /* call reference array substitution */
8851
0
                    ihevc_intra_pred_luma_ref_substitution_fptr(
8852
0
                        pu1_top_left,
8853
0
                        pu1_top,
8854
0
                        pu1_left,
8855
0
                        left_strd,
8856
0
                        trans_size,
8857
0
                        nbr_flags,
8858
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8859
0
                        1);
8860
8861
                    /* call reference filtering */
8862
0
                    ihevc_intra_pred_ref_filtering_fptr(
8863
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8864
0
                        trans_size,
8865
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8866
0
                        luma_pred_mode,
8867
0
                        ps_ctxt->i1_strong_intra_smoothing_enable_flag);
8868
8869
                    /* use the look up to get the function idx */
8870
0
                    luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
8871
8872
                    /* call the intra prediction function */
8873
0
                    ps_ctxt->apf_lum_ip[luma_pred_func_idx](
8874
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8875
0
                        1,
8876
0
                        pu1_cur_pred,
8877
0
                        pred_strd,
8878
0
                        trans_size,
8879
0
                        luma_pred_mode);
8880
0
                }
8881
0
            }
8882
777k
            else if(
8883
777k
                (1 == chrm_present_flag) &&
8884
329k
                (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
8885
90.7k
            {
8886
90.7k
                WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
8887
8888
90.7k
                if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
8889
40.0k
                {
8890
40.0k
                    temp_num_4x4_in_tu = num_4x4_in_tu << 1;
8891
40.0k
                }
8892
8893
90.7k
                *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8894
90.7k
                    ps_ctxt->pu1_ctb_nbr_map,
8895
90.7k
                    ps_ctxt->i4_nbr_map_strd,
8896
90.7k
                    cu_pos_x_in_4x4,
8897
90.7k
                    cu_pos_y_in_4x4,
8898
90.7k
                    temp_num_4x4_in_tu,
8899
90.7k
                    temp_num_4x4_in_tu);
8900
90.7k
            }
8901
8902
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8903
777k
            chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
8904
777k
        }
8905
8906
993k
        if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8907
0
        {
8908
0
            WORD32 temp_bits;
8909
0
            LWORD64 temp_cost;
8910
0
            UWORD32 u4_tu_sad;
8911
0
            WORD32 perform_sbh, perform_rdoq;
8912
8913
0
            if(PRED_MODE_INTRA == packed_pred_mode)
8914
0
            {
8915
                /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
8916
0
                if(trans_size < 16)
8917
0
                {
8918
                    /* for modes from 22 upto 30 horizontal scan is used */
8919
0
                    if((luma_pred_mode > 21) && (luma_pred_mode < 31))
8920
0
                    {
8921
0
                        ps_ctxt->i4_scan_idx = SCAN_HORZ;
8922
0
                    }
8923
                    /* for modes from 6 upto 14 horizontal scan is used */
8924
0
                    else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
8925
0
                    {
8926
0
                        ps_ctxt->i4_scan_idx = SCAN_VERT;
8927
0
                    }
8928
0
                }
8929
0
            }
8930
8931
            /* RDOPT copy States :  TU init (best until prev TU) to current */
8932
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8933
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8934
0
                        .s_cabac_ctxt.au1_ctxt_models[0] +
8935
0
                    IHEVC_CAB_COEFFX_PREFIX,
8936
0
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
8937
0
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
8938
8939
0
            if(ps_prms->u1_recompute_sbh_and_rdoq)
8940
0
            {
8941
0
                perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
8942
0
                perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
8943
0
            }
8944
0
            else
8945
0
            {
8946
                /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
8947
0
                perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
8948
                /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
8949
                we would have to do RDOQ again.*/
8950
0
                perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
8951
0
            }
8952
8953
#if DISABLE_RDOQ_INTRA
8954
            if(PRED_MODE_INTRA == packed_pred_mode)
8955
            {
8956
                perform_rdoq = 0;
8957
            }
8958
#endif
8959
            /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
8960
            so that all candidates and best candidate are quantized with same rounding factor  */
8961
0
            if(1 == perform_rdoq)
8962
0
            {
8963
0
                ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
8964
0
            }
8965
8966
0
            cbf = ihevce_t_q_iq_ssd_scan_fxn(
8967
0
                ps_ctxt,
8968
0
                pu1_cur_pred,
8969
0
                pred_strd,
8970
0
                pu1_cur_src,
8971
0
                src_strd,
8972
0
                pi2_cur_deq_data,
8973
0
                cu_size, /*deq_data stride is cu_size*/
8974
0
                pu1_cur_luma_recon,
8975
0
                recon_luma_strd,
8976
0
                pu1_final_ecd_data,
8977
0
                pu1_csbf_buf,
8978
0
                csbf_strd,
8979
0
                trans_size,
8980
0
                packed_pred_mode,
8981
0
                &temp_cost,
8982
0
                &num_bytes,
8983
0
                &temp_bits,
8984
0
                &u4_tu_sad,
8985
0
                &zero_cols,
8986
0
                &zero_rows,
8987
0
                &au1_is_recon_available[0],
8988
0
                perform_rdoq,  //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
8989
0
                perform_sbh,
8990
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
8991
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
8992
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
8993
0
                                          (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
8994
0
                                             100.0,
8995
0
                ps_prms->u1_is_cu_noisy,
8996
0
#endif
8997
0
                u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
8998
0
                1 /*early cbf*/
8999
0
            );  //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
9000
9001
            /* Accumulate luma residual bits */
9002
0
            ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
9003
9004
            /* RDOPT copy States :  New updated after curr TU to TU init */
9005
0
            if(0 != cbf)
9006
0
            {
9007
                /* update to new state only if CBF is non zero */
9008
0
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9009
0
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9010
0
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9011
0
                            .s_cabac_ctxt.au1_ctxt_models[0] +
9012
0
                        IHEVC_CAB_COEFFX_PREFIX,
9013
0
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9014
0
            }
9015
9016
            /* accumulate the TU sad into cu sad */
9017
0
            ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
9018
0
            ps_tu->b1_y_cbf = cbf;
9019
0
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
9020
9021
            /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
9022
0
            if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
9023
0
            {
9024
0
                WORD32 num_4x4_in_cu = u1_cu_size >> 2;
9025
0
                nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
9026
0
                ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
9027
0
                ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
9028
                /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
9029
0
                ps_cur_nbr_4x4->b1_y_cbf = cbf;
9030
                /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
9031
0
                ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
9032
                /* Qp and cbf are stored for the all 4x4 in TU */
9033
0
                {
9034
0
                    WORD32 i, j;
9035
0
                    nbr_4x4_t *ps_tmp_4x4;
9036
0
                    ps_tmp_4x4 = ps_cur_nbr_4x4;
9037
9038
0
                    for(i = 0; i < num_4x4_in_tu; i++)
9039
0
                    {
9040
0
                        for(j = 0; j < num_4x4_in_tu; j++)
9041
0
                        {
9042
0
                            ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
9043
0
                            ps_tmp_4x4[j].b1_y_cbf = cbf;
9044
0
                        }
9045
                        /* row level update*/
9046
0
                        ps_tmp_4x4 += num_4x4_in_cu;
9047
0
                    }
9048
0
                }
9049
0
            }
9050
0
        }
9051
993k
        else
9052
993k
        {
9053
993k
            zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
9054
993k
            zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
9055
9056
993k
            if(ps_prms->u1_will_cabac_state_change)
9057
993k
            {
9058
993k
                num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
9059
993k
            }
9060
0
            else
9061
0
            {
9062
0
                num_bytes = 0;
9063
0
            }
9064
9065
            /* copy luma ecd data to final buffer */
9066
993k
            memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
9067
9068
993k
            pu1_old_ecd_data += num_bytes;
9069
9070
993k
            au1_is_recon_available[0] = 0;
9071
993k
        }
9072
9073
        /**-------- Compute Recon data (Do IT & Recon) : Luma  -----------**/
9074
993k
        if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9075
993k
           (!u1_compute_spatial_ssd_luma ||
9076
0
            (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
9077
993k
        {
9078
993k
            if(!ps_recon_datastore->u1_is_lumaRecon_available ||
9079
695k
               (ps_recon_datastore->u1_is_lumaRecon_available &&
9080
695k
                (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
9081
441k
            {
9082
441k
                ihevce_it_recon_fxn(
9083
441k
                    ps_ctxt,
9084
441k
                    pi2_cur_deq_data,
9085
441k
                    cu_size,
9086
441k
                    pu1_cur_pred,
9087
441k
                    pred_strd,
9088
441k
                    pu1_cur_luma_recon,
9089
441k
                    recon_luma_strd,
9090
441k
                    pu1_final_ecd_data,
9091
441k
                    trans_size,
9092
441k
                    packed_pred_mode,
9093
441k
                    ps_tu->b1_y_cbf,
9094
441k
                    zero_cols,
9095
441k
                    zero_rows);
9096
441k
            }
9097
551k
            else if(
9098
551k
                ps_recon_datastore->u1_is_lumaRecon_available &&
9099
551k
                (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
9100
551k
            {
9101
551k
                UWORD8 *pu1_recon_src =
9102
551k
                    ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
9103
551k
                         [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
9104
551k
                    cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
9105
9106
551k
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
9107
551k
                    pu1_cur_luma_recon,
9108
551k
                    recon_luma_strd,
9109
551k
                    pu1_recon_src,
9110
551k
                    ps_recon_datastore->i4_lumaRecon_stride,
9111
551k
                    trans_size,
9112
551k
                    trans_size);
9113
551k
            }
9114
993k
        }
9115
9116
993k
        if(ps_prms->u1_will_cabac_state_change)
9117
993k
        {
9118
993k
            ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
9119
993k
        }
9120
9121
993k
        pu1_final_ecd_data += num_bytes;
9122
        /* update total bytes consumed */
9123
993k
        total_bytes += num_bytes;
9124
9125
993k
        u1_is_cu_coded |= ps_tu->b1_y_cbf;
9126
9127
        /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
9128
993k
        if(1 == chrm_present_flag)
9129
452k
        {
9130
452k
            pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
9131
452k
            pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
9132
452k
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
9133
9134
452k
            pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
9135
452k
            pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
9136
452k
                                 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
9137
9138
452k
            pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
9139
452k
            pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
9140
452k
                                    (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
9141
9142
452k
            pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
9143
452k
            pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
9144
452k
                                     (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
9145
9146
452k
            if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
9147
0
               (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
9148
0
               (PRED_MODE_INTRA != packed_pred_mode))
9149
0
            {
9150
0
                WORD32 i4_num_bytes;
9151
0
                UWORD8 *pu1_chroma_pred;
9152
0
                UWORD8 *pu1_chroma_recon;
9153
0
                WORD16 *pi2_chroma_deq;
9154
0
                UWORD32 u4_zero_col;
9155
0
                UWORD32 u4_zero_row;
9156
9157
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9158
0
                {
9159
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9160
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9161
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9162
9163
0
                    if(0 == u1_is_422)
9164
0
                    {
9165
0
                        i4_subtu_pos_y >>= 1;
9166
0
                    }
9167
9168
0
                    pu1_chroma_pred =
9169
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9170
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9171
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9172
0
                    pi2_chroma_deq =
9173
0
                        pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
9174
9175
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9176
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9177
9178
0
                    if(ps_prms->u1_will_cabac_state_change)
9179
0
                    {
9180
0
                        i4_num_bytes =
9181
0
                            ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9182
0
                    }
9183
0
                    else
9184
0
                    {
9185
0
                        i4_num_bytes = 0;
9186
0
                    }
9187
9188
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9189
9190
0
                    pu1_old_ecd_data += i4_num_bytes;
9191
9192
0
                    au1_is_recon_available[U_PLANE] = 0;
9193
9194
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9195
0
                       (!u1_compute_spatial_ssd_chroma ||
9196
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9197
0
                    {
9198
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9199
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9200
0
                            (UCHAR_MAX ==
9201
0
                             ps_recon_datastore
9202
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9203
0
                        {
9204
0
                            ihevce_chroma_it_recon_fxn(
9205
0
                                ps_ctxt,
9206
0
                                pi2_chroma_deq,
9207
0
                                cu_size,
9208
0
                                pu1_chroma_pred,
9209
0
                                pred_chrm_strd,
9210
0
                                pu1_chroma_recon,
9211
0
                                recon_chrma_strd,
9212
0
                                pu1_final_ecd_data,
9213
0
                                chroma_trans_size,
9214
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9215
0
                                u4_zero_col,
9216
0
                                u4_zero_row,
9217
0
                                U_PLANE);
9218
0
                        }
9219
0
                        else if(
9220
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9221
0
                            (UCHAR_MAX !=
9222
0
                             ps_recon_datastore
9223
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9224
0
                        {
9225
0
                            UWORD8 *pu1_recon_src =
9226
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9227
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9228
0
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9229
0
                                i4_subtu_pos_x +
9230
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9231
9232
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9233
0
                                pu1_recon_src,
9234
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9235
0
                                pu1_chroma_recon,
9236
0
                                recon_chrma_strd,
9237
0
                                chroma_trans_size,
9238
0
                                chroma_trans_size,
9239
0
                                U_PLANE);
9240
0
                        }
9241
0
                    }
9242
9243
0
                    u1_is_cu_coded |=
9244
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9245
9246
0
                    pu1_final_ecd_data += i4_num_bytes;
9247
0
                    total_bytes += i4_num_bytes;
9248
0
                }
9249
9250
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9251
0
                {
9252
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9253
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9254
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9255
9256
0
                    if(0 == u1_is_422)
9257
0
                    {
9258
0
                        i4_subtu_pos_y >>= 1;
9259
0
                    }
9260
9261
0
                    pu1_chroma_pred =
9262
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9263
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9264
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9265
0
                    pi2_chroma_deq = pi2_cur_deq_data_chrm +
9266
0
                                     (i4_subtu_idx * chroma_trans_size * cu_size) +
9267
0
                                     chroma_trans_size;
9268
9269
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9270
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9271
9272
0
                    if(ps_prms->u1_will_cabac_state_change)
9273
0
                    {
9274
0
                        i4_num_bytes =
9275
0
                            ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9276
0
                    }
9277
0
                    else
9278
0
                    {
9279
0
                        i4_num_bytes = 0;
9280
0
                    }
9281
9282
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9283
9284
0
                    pu1_old_ecd_data += i4_num_bytes;
9285
9286
0
                    au1_is_recon_available[V_PLANE] = 0;
9287
9288
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9289
0
                       (!u1_compute_spatial_ssd_chroma ||
9290
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9291
0
                    {
9292
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9293
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9294
0
                            (UCHAR_MAX ==
9295
0
                             ps_recon_datastore
9296
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9297
0
                        {
9298
0
                            ihevce_chroma_it_recon_fxn(
9299
0
                                ps_ctxt,
9300
0
                                pi2_chroma_deq,
9301
0
                                cu_size,
9302
0
                                pu1_chroma_pred,
9303
0
                                pred_chrm_strd,
9304
0
                                pu1_chroma_recon,
9305
0
                                recon_chrma_strd,
9306
0
                                pu1_final_ecd_data,
9307
0
                                chroma_trans_size,
9308
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9309
0
                                u4_zero_col,
9310
0
                                u4_zero_row,
9311
0
                                V_PLANE);
9312
0
                        }
9313
0
                        else if(
9314
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9315
0
                            (UCHAR_MAX !=
9316
0
                             ps_recon_datastore
9317
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9318
0
                        {
9319
0
                            UWORD8 *pu1_recon_src =
9320
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9321
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9322
0
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9323
0
                                i4_subtu_pos_x +
9324
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9325
9326
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9327
0
                                pu1_recon_src,
9328
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9329
0
                                pu1_chroma_recon,
9330
0
                                recon_chrma_strd,
9331
0
                                chroma_trans_size,
9332
0
                                chroma_trans_size,
9333
0
                                V_PLANE);
9334
0
                        }
9335
0
                    }
9336
9337
0
                    u1_is_cu_coded |=
9338
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9339
9340
0
                    pu1_final_ecd_data += i4_num_bytes;
9341
0
                    total_bytes += i4_num_bytes;
9342
0
                }
9343
0
            }
9344
452k
            else
9345
452k
            {
9346
452k
                WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
9347
9348
904k
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9349
452k
                {
9350
452k
                    WORD32 cb_cbf, cr_cbf;
9351
452k
                    WORD32 cb_num_bytes, cr_num_bytes;
9352
9353
452k
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9354
9355
452k
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9356
452k
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9357
9358
452k
                    if(0 == u1_is_422)
9359
452k
                    {
9360
452k
                        i4_subtu_pos_y >>= 1;
9361
452k
                    }
9362
9363
452k
                    pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
9364
452k
                    pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9365
452k
                    pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9366
452k
                    pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
9367
9368
452k
                    if((PRED_MODE_INTRA == packed_pred_mode) &&
9369
329k
                       (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
9370
90.7k
                    {
9371
90.7k
                        WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
9372
90.7k
                        UWORD8 *pu1_left_chrm;
9373
90.7k
                        UWORD8 *pu1_top_chrm;
9374
90.7k
                        UWORD8 *pu1_top_left_chrm;
9375
9376
90.7k
                        nbr_flags = ihevce_get_intra_chroma_tu_nbr(
9377
90.7k
                            *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
9378
9379
                        /* left cu boundary */
9380
90.7k
                        if(0 == i4_subtu_pos_x)
9381
84.4k
                        {
9382
84.4k
                            left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
9383
84.4k
                            pu1_left_chrm =
9384
84.4k
                                ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
9385
84.4k
                        }
9386
6.21k
                        else
9387
6.21k
                        {
9388
6.21k
                            pu1_left_chrm = pu1_cur_chroma_recon - 2;
9389
6.21k
                            left_strd_chrm = recon_chrma_strd;
9390
6.21k
                        }
9391
9392
                        /* top cu boundary */
9393
90.7k
                        if(0 == i4_subtu_pos_y)
9394
84.4k
                        {
9395
84.4k
                            pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
9396
84.4k
                        }
9397
6.21k
                        else
9398
6.21k
                        {
9399
6.21k
                            pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
9400
6.21k
                        }
9401
9402
                        /* by default top left is set to cu top left */
9403
90.7k
                        pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
9404
9405
                        /* top left based on position */
9406
90.7k
                        if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
9407
3.10k
                        {
9408
3.10k
                            pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
9409
3.10k
                        }
9410
87.5k
                        else if(0 != i4_subtu_pos_x)
9411
6.21k
                        {
9412
6.21k
                            pu1_top_left_chrm = pu1_top_chrm - 2;
9413
6.21k
                        }
9414
9415
                        /* call the chroma reference array substitution */
9416
90.7k
                        ihevc_intra_pred_chroma_ref_substitution_fptr(
9417
90.7k
                            pu1_top_left_chrm,
9418
90.7k
                            pu1_top_chrm,
9419
90.7k
                            pu1_left_chrm,
9420
90.7k
                            left_strd_chrm,
9421
90.7k
                            chroma_trans_size,
9422
90.7k
                            nbr_flags,
9423
90.7k
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9424
90.7k
                            1,
9425
90.7k
                            CHROMA_FMT_IDC_YUV420);
9426
9427
                        /* use the look up to get the function idx */
9428
90.7k
                        chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
9429
9430
                        /* call the intra prediction function */
9431
90.7k
                        ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
9432
90.7k
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9433
90.7k
                            1,
9434
90.7k
                            pu1_cur_pred_chrm,
9435
90.7k
                            pred_chrm_strd,
9436
90.7k
                            chroma_trans_size,
9437
90.7k
                            chroma_pred_mode);
9438
90.7k
                    }
9439
9440
                    /**---------- Compute iq&coeff data if required : Chroma ------------**/
9441
452k
                    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
9442
138k
                    {
9443
138k
                        WORD32 perform_sbh, perform_rdoq, temp_bits;
9444
9445
138k
                        if(ps_prms->u1_recompute_sbh_and_rdoq)
9446
0
                        {
9447
0
                            perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
9448
0
                            perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
9449
0
                        }
9450
138k
                        else
9451
138k
                        {
9452
                            /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
9453
138k
                            perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
9454
                            /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
9455
                        we would have to do RDOQ again.*/
9456
138k
                            perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
9457
138k
                        }
9458
9459
                        /* populate the coeffs scan idx */
9460
138k
                        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
9461
9462
138k
                        if(PRED_MODE_INTRA == packed_pred_mode)
9463
90.7k
                        {
9464
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
9465
90.7k
                            if(4 == chroma_trans_size)
9466
77.1k
                            {
9467
                                /* for modes from 22 upto 30 horizontal scan is used */
9468
77.1k
                                if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
9469
5.43k
                                {
9470
5.43k
                                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
9471
5.43k
                                }
9472
                                /* for modes from 6 upto 14 horizontal scan is used */
9473
71.6k
                                else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
9474
42.9k
                                {
9475
42.9k
                                    ps_ctxt->i4_scan_idx = SCAN_VERT;
9476
42.9k
                                }
9477
77.1k
                            }
9478
90.7k
                        }
9479
9480
#if DISABLE_RDOQ_INTRA
9481
                        if(PRED_MODE_INTRA == packed_pred_mode)
9482
                        {
9483
                            perform_rdoq = 0;
9484
                        }
9485
#endif
9486
9487
                        /* RDOPT copy States :  TU init (best until prev TU) to current */
9488
138k
                        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9489
138k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9490
138k
                                    .s_cabac_ctxt.au1_ctxt_models[0] +
9491
138k
                                IHEVC_CAB_COEFFX_PREFIX,
9492
138k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9493
138k
                            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9494
9495
138k
                        ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
9496
                        /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
9497
                    so that all candidates and best candidate are quantized with same rounding factor  */
9498
138k
                        if(1 == perform_rdoq)
9499
0
                        {
9500
0
                            ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
9501
0
                        }
9502
9503
138k
                        if(!ps_best_cu_prms->u1_skip_flag ||
9504
1.13k
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9505
138k
                        {
9506
                            /* Cb */
9507
138k
                            cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9508
138k
                                ps_ctxt,
9509
138k
                                pu1_cur_pred_chrm,
9510
138k
                                pred_chrm_strd,
9511
138k
                                pu1_cur_src_chrm,
9512
138k
                                src_chrm_strd,
9513
138k
                                pi2_cur_deq_data_chrm,
9514
138k
                                cu_size,
9515
138k
                                pu1_chrm_recon,
9516
138k
                                recon_chrma_strd,
9517
138k
                                pu1_final_ecd_data,
9518
138k
                                pu1_csbf_buf,
9519
138k
                                csbf_strd,
9520
138k
                                chroma_trans_size,
9521
138k
                                ps_ctxt->i4_scan_idx,
9522
138k
                                (PRED_MODE_INTRA == packed_pred_mode),
9523
138k
                                &cb_num_bytes,
9524
138k
                                &temp_bits,
9525
138k
                                &cb_zero_col,
9526
138k
                                &cb_zero_row,
9527
138k
                                &au1_is_recon_available[U_PLANE],
9528
138k
                                perform_sbh,
9529
138k
                                perform_rdoq,
9530
138k
                                &i8_ssd,
9531
138k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9532
138k
                                !ps_ctxt->u1_is_refPic
9533
138k
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9534
138k
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9535
136k
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9536
136k
                                          100.0,
9537
138k
                                ps_prms->u1_is_cu_noisy,
9538
138k
#endif
9539
138k
                                ps_best_cu_prms->u1_skip_flag &&
9540
1.13k
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9541
138k
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9542
138k
                                                              : FREQUENCY_DOMAIN_SSD,
9543
138k
                                U_PLANE);
9544
138k
                        }
9545
0
                        else
9546
0
                        {
9547
0
                            cb_cbf = 0;
9548
0
                            temp_bits = 0;
9549
0
                            cb_num_bytes = 0;
9550
0
                            au1_is_recon_available[U_PLANE] = 0;
9551
0
                            cb_zero_col = 0;
9552
0
                            cb_zero_row = 0;
9553
0
                        }
9554
9555
                        /* Accumulate chroma residual bits */
9556
138k
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9557
9558
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9559
138k
                        if(0 != cb_cbf)
9560
61.4k
                        {
9561
61.4k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9562
61.4k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9563
61.4k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9564
61.4k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9565
61.4k
                                    IHEVC_CAB_COEFFX_PREFIX,
9566
61.4k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9567
61.4k
                        }
9568
                        /* RDOPT copy States :  Restoring back the Cb init state to Cr */
9569
76.7k
                        else
9570
76.7k
                        {
9571
76.7k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9572
76.7k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9573
76.7k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9574
76.7k
                                    IHEVC_CAB_COEFFX_PREFIX,
9575
76.7k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9576
76.7k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9577
76.7k
                        }
9578
9579
138k
                        if(!ps_best_cu_prms->u1_skip_flag ||
9580
1.13k
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9581
138k
                        {
9582
                            /* Cr */
9583
138k
                            cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9584
138k
                                ps_ctxt,
9585
138k
                                pu1_cur_pred_chrm,
9586
138k
                                pred_chrm_strd,
9587
138k
                                pu1_cur_src_chrm,
9588
138k
                                src_chrm_strd,
9589
138k
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9590
138k
                                cu_size,
9591
138k
                                pu1_chrm_recon,
9592
138k
                                recon_chrma_strd,
9593
138k
                                pu1_final_ecd_data + cb_num_bytes,
9594
138k
                                pu1_csbf_buf,
9595
138k
                                csbf_strd,
9596
138k
                                chroma_trans_size,
9597
138k
                                ps_ctxt->i4_scan_idx,
9598
138k
                                (PRED_MODE_INTRA == packed_pred_mode),
9599
138k
                                &cr_num_bytes,
9600
138k
                                &temp_bits,
9601
138k
                                &cr_zero_col,
9602
138k
                                &cr_zero_row,
9603
138k
                                &au1_is_recon_available[V_PLANE],
9604
138k
                                perform_sbh,
9605
138k
                                perform_rdoq,
9606
138k
                                &i8_ssd,
9607
138k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9608
138k
                                !ps_ctxt->u1_is_refPic
9609
138k
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9610
138k
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9611
136k
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9612
136k
                                          100.0,
9613
138k
                                ps_prms->u1_is_cu_noisy,
9614
138k
#endif
9615
138k
                                ps_best_cu_prms->u1_skip_flag &&
9616
1.13k
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9617
138k
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9618
138k
                                                              : FREQUENCY_DOMAIN_SSD,
9619
138k
                                V_PLANE);
9620
138k
                        }
9621
0
                        else
9622
0
                        {
9623
0
                            cr_cbf = 0;
9624
0
                            temp_bits = 0;
9625
0
                            cr_num_bytes = 0;
9626
0
                            au1_is_recon_available[V_PLANE] = 0;
9627
0
                            cr_zero_col = 0;
9628
0
                            cr_zero_row = 0;
9629
0
                        }
9630
9631
                        /* Accumulate chroma residual bits */
9632
138k
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9633
9634
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9635
138k
                        if(0 != cr_cbf)
9636
58.0k
                        {
9637
58.0k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9638
58.0k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9639
58.0k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9640
58.0k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9641
58.0k
                                    IHEVC_CAB_COEFFX_PREFIX,
9642
58.0k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9643
58.0k
                        }
9644
9645
138k
                        if(0 == i4_subtu_idx)
9646
138k
                        {
9647
138k
                            ps_tu->b1_cb_cbf = cb_cbf;
9648
138k
                            ps_tu->b1_cr_cbf = cr_cbf;
9649
138k
                        }
9650
0
                        else
9651
0
                        {
9652
0
                            ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
9653
0
                            ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
9654
0
                        }
9655
138k
                    }
9656
313k
                    else
9657
313k
                    {
9658
313k
                        cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9659
313k
                        cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9660
313k
                        cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9661
313k
                        cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9662
9663
313k
                        if(ps_prms->u1_will_cabac_state_change)
9664
313k
                        {
9665
313k
                            cb_num_bytes =
9666
313k
                                ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9667
313k
                        }
9668
0
                        else
9669
0
                        {
9670
0
                            cb_num_bytes = 0;
9671
0
                        }
9672
9673
313k
                        if(ps_prms->u1_will_cabac_state_change)
9674
313k
                        {
9675
313k
                            cr_num_bytes =
9676
313k
                                ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9677
313k
                        }
9678
0
                        else
9679
0
                        {
9680
0
                            cr_num_bytes = 0;
9681
0
                        }
9682
9683
                        /* copy cb ecd data to final buffer */
9684
313k
                        memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
9685
9686
313k
                        pu1_chrm_old_ecd_data += cb_num_bytes;
9687
9688
                        /* copy cb ecd data to final buffer */
9689
313k
                        memcpy(
9690
313k
                            (pu1_final_ecd_data + cb_num_bytes),
9691
313k
                            pu1_chrm_old_ecd_data,
9692
313k
                            cr_num_bytes);
9693
9694
313k
                        pu1_chrm_old_ecd_data += cr_num_bytes;
9695
9696
313k
                        au1_is_recon_available[U_PLANE] = 0;
9697
313k
                        au1_is_recon_available[V_PLANE] = 0;
9698
313k
                    }
9699
9700
                    /**-------- Compute Recon data (Do IT & Recon) : Chroma  -----------**/
9701
452k
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9702
452k
                       (!u1_compute_spatial_ssd_chroma ||
9703
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9704
452k
                    {
9705
452k
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9706
68.7k
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9707
68.7k
                            (UCHAR_MAX ==
9708
68.7k
                             ps_recon_datastore
9709
68.7k
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9710
424k
                        {
9711
424k
                            ihevce_chroma_it_recon_fxn(
9712
424k
                                ps_ctxt,
9713
424k
                                pi2_cur_deq_data_chrm,
9714
424k
                                cu_size,
9715
424k
                                pu1_cur_pred_chrm,
9716
424k
                                pred_chrm_strd,
9717
424k
                                pu1_cur_chroma_recon,
9718
424k
                                recon_chrma_strd,
9719
424k
                                pu1_final_ecd_data,
9720
424k
                                chroma_trans_size,
9721
424k
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9722
424k
                                cb_zero_col,
9723
424k
                                cb_zero_row,
9724
424k
                                U_PLANE);
9725
424k
                        }
9726
27.6k
                        else if(
9727
27.6k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9728
27.6k
                            (UCHAR_MAX !=
9729
27.6k
                             ps_recon_datastore
9730
27.6k
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9731
27.6k
                        {
9732
27.6k
                            UWORD8 *pu1_recon_src =
9733
27.6k
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9734
27.6k
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9735
27.6k
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9736
27.6k
                                i4_subtu_pos_x +
9737
27.6k
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9738
9739
27.6k
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9740
27.6k
                                pu1_recon_src,
9741
27.6k
                                ps_recon_datastore->i4_lumaRecon_stride,
9742
27.6k
                                pu1_cur_chroma_recon,
9743
27.6k
                                recon_chrma_strd,
9744
27.6k
                                chroma_trans_size,
9745
27.6k
                                chroma_trans_size,
9746
27.6k
                                U_PLANE);
9747
27.6k
                        }
9748
452k
                    }
9749
9750
452k
                    u1_is_cu_coded |=
9751
452k
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9752
9753
452k
                    if(ps_prms->u1_will_cabac_state_change)
9754
452k
                    {
9755
452k
                        ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
9756
452k
                    }
9757
9758
452k
                    pu1_final_ecd_data += cb_num_bytes;
9759
                    /* update total bytes consumed */
9760
452k
                    total_bytes += cb_num_bytes;
9761
9762
452k
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9763
452k
                       (!u1_compute_spatial_ssd_chroma ||
9764
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9765
452k
                    {
9766
452k
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9767
68.7k
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9768
68.7k
                            (UCHAR_MAX ==
9769
68.7k
                             ps_recon_datastore
9770
68.7k
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9771
424k
                        {
9772
424k
                            ihevce_chroma_it_recon_fxn(
9773
424k
                                ps_ctxt,
9774
424k
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9775
424k
                                cu_size,
9776
424k
                                pu1_cur_pred_chrm,
9777
424k
                                pred_chrm_strd,
9778
424k
                                pu1_cur_chroma_recon,
9779
424k
                                recon_chrma_strd,
9780
424k
                                pu1_final_ecd_data,
9781
424k
                                chroma_trans_size,
9782
424k
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9783
424k
                                cr_zero_col,
9784
424k
                                cr_zero_row,
9785
424k
                                V_PLANE);
9786
424k
                        }
9787
27.6k
                        else if(
9788
27.6k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9789
27.6k
                            (UCHAR_MAX !=
9790
27.6k
                             ps_recon_datastore
9791
27.6k
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9792
27.6k
                        {
9793
27.6k
                            UWORD8 *pu1_recon_src =
9794
27.6k
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9795
27.6k
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9796
27.6k
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9797
27.6k
                                i4_subtu_pos_x +
9798
27.6k
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9799
9800
27.6k
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9801
27.6k
                                pu1_recon_src,
9802
27.6k
                                ps_recon_datastore->i4_lumaRecon_stride,
9803
27.6k
                                pu1_cur_chroma_recon,
9804
27.6k
                                recon_chrma_strd,
9805
27.6k
                                chroma_trans_size,
9806
27.6k
                                chroma_trans_size,
9807
27.6k
                                V_PLANE);
9808
27.6k
                        }
9809
452k
                    }
9810
9811
452k
                    u1_is_cu_coded |=
9812
452k
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9813
9814
452k
                    if(ps_prms->u1_will_cabac_state_change)
9815
452k
                    {
9816
452k
                        ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
9817
452k
                    }
9818
9819
452k
                    pu1_final_ecd_data += cr_num_bytes;
9820
                    /* update total bytes consumed */
9821
452k
                    total_bytes += cr_num_bytes;
9822
452k
                }
9823
452k
            }
9824
452k
        }
9825
540k
        else
9826
540k
        {
9827
540k
            ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
9828
540k
            ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
9829
540k
            ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
9830
540k
            ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
9831
540k
            ps_tu->b1_cb_cbf = 0;
9832
540k
            ps_tu->b1_cr_cbf = 0;
9833
540k
            ps_tu->b1_cb_cbf_subtu1 = 0;
9834
540k
            ps_tu->b1_cr_cbf_subtu1 = 0;
9835
540k
        }
9836
9837
        /* Update to next TU */
9838
993k
        ps_tu_enc_loop++;
9839
993k
        ps_tu_enc_loop_temp_prms++;
9840
9841
993k
        pu4_nbr_flags++;
9842
993k
        pu1_intra_pred_mode++;
9843
9844
        /*Do not set the nbr map for last pu in cu */
9845
993k
        if((num_tu_in_cu - 1) != ctr)
9846
623k
        {
9847
            /* set the neighbour map to 1 */
9848
623k
            ihevce_set_nbr_map(
9849
623k
                ps_ctxt->pu1_ctb_nbr_map,
9850
623k
                ps_ctxt->i4_nbr_map_strd,
9851
623k
                cu_pos_x_in_4x4,
9852
623k
                cu_pos_y_in_4x4,
9853
623k
                (trans_size >> 2),
9854
623k
                1);
9855
623k
        }
9856
993k
    }
9857
9858
369k
    if(ps_prms->u1_will_cabac_state_change)
9859
369k
    {
9860
369k
        ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
9861
9862
        /* Modify skip flag, if luma is skipped & Chroma is coded */
9863
369k
        if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
9864
865
        {
9865
865
            ps_best_cu_prms->u1_skip_flag = 0;
9866
865
        }
9867
369k
    }
9868
9869
    /* during chroma evaluation if skip decision was over written     */
9870
    /* then the current skip candidate is set to a non skip candidate */
9871
369k
    if(PRED_MODE_INTRA != packed_pred_mode)
9872
72.5k
    {
9873
72.5k
        ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
9874
72.5k
    }
9875
9876
    /**------------- Compute header data if required --------------**/
9877
369k
    if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
9878
109k
    {
9879
109k
        WORD32 cbf_bits;
9880
109k
        WORD32 cu_bits;
9881
109k
        WORD32 unit_4x4_size = cu_size >> 2;
9882
9883
        /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
9884
        be copied as the base reference for the next cu
9885
        Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
9886
        luma and chroma are being reevaluated*/
9887
109k
        COPY_CABAC_STATES(
9888
109k
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9889
109k
                 .s_cabac_ctxt.au1_ctxt_models[0],
9890
109k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
9891
109k
            IHEVC_CAB_CTXT_END);
9892
9893
        /* get the neighbour availability flags for current cu  */
9894
109k
        ihevce_get_only_nbr_flag(
9895
109k
            &s_nbr,
9896
109k
            ps_ctxt->pu1_ctb_nbr_map,
9897
109k
            ps_ctxt->i4_nbr_map_strd,
9898
109k
            (cu_pos_x << 1),
9899
109k
            (cu_pos_y << 1),
9900
109k
            unit_4x4_size,
9901
109k
            unit_4x4_size);
9902
9903
109k
        cu_bits = ihevce_entropy_rdo_encode_cu(
9904
109k
            &ps_ctxt->s_rdopt_entropy_ctxt,
9905
109k
            ps_best_cu_prms,
9906
109k
            cu_pos_x,
9907
109k
            cu_pos_y,
9908
109k
            cu_size,
9909
109k
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
9910
109k
                                           : s_nbr.u1_top_avail,
9911
109k
            s_nbr.u1_left_avail,
9912
109k
            (pu1_final_ecd_data - total_bytes),
9913
109k
            &cbf_bits);
9914
9915
        /* cbf bits are excluded from header bits, instead considered as texture bits */
9916
109k
        ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
9917
109k
        ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
9918
109k
    }
9919
9920
369k
    if(ps_prms->u1_will_cabac_state_change)
9921
369k
    {
9922
369k
        ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
9923
369k
    }
9924
369k
}
9925
9926
/*!
9927
******************************************************************************
9928
* \if Function name : ihevce_set_eval_flags \endif
9929
*
9930
* \brief
9931
*    Function which decides which eval flags have to be set based on present
9932
*    and RDOQ conditions
9933
*
9934
* \param[in] ps_ctxt : encoder ctxt pointer
9935
* \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
9936
*
9937
* \return
9938
*    None
9939
*
9940
* \author
9941
*  Ittiam
9942
*
9943
*****************************************************************************
9944
*/
9945
void ihevce_set_eval_flags(
9946
    ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
9947
369k
{
9948
369k
    WORD32 count = 0;
9949
9950
369k
    ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
9951
9952
369k
    ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
9953
369k
        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
9954
9955
369k
    if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
9956
0
    {
9957
0
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
9958
0
    }
9959
369k
    else
9960
369k
    {
9961
369k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
9962
369k
    }
9963
9964
369k
    if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
9965
369k
       (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
9966
0
    {
9967
        /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
9968
        RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
9969
        for the current CU will change. Therefore, we need to reevaluate the pred data*/
9970
0
        if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
9971
0
           (ps_enc_loop_bestprms->u1_intra_flag == 1))
9972
0
        {
9973
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
9974
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
9975
0
        }
9976
0
        if(ps_enc_loop_bestprms->u1_skip_flag == 1)
9977
0
        {
9978
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9979
0
            {
9980
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9981
0
                    .b1_eval_luma_iq_and_coeff_data = 0;
9982
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9983
0
                    .b1_eval_chroma_iq_and_coeff_data = 0;
9984
0
            }
9985
0
        }
9986
0
        else
9987
0
        {
9988
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9989
0
            {
9990
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9991
0
                    .b1_eval_luma_iq_and_coeff_data = 1;
9992
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9993
0
                    .b1_eval_chroma_iq_and_coeff_data = 1;
9994
0
            }
9995
0
        }
9996
0
    }
9997
369k
    else
9998
369k
    {
9999
369k
        switch(ps_ctxt->i4_quality_preset)
10000
369k
        {
10001
184k
        case IHEVCE_QUALITY_P0:
10002
214k
        case IHEVCE_QUALITY_P2:
10003
259k
        case IHEVCE_QUALITY_P3:
10004
259k
        {
10005
962k
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10006
702k
            {
10007
702k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10008
702k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10009
702k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10010
702k
                    .b1_eval_chroma_iq_and_coeff_data =
10011
702k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10012
702k
            }
10013
10014
259k
            break;
10015
214k
        }
10016
30.9k
        case IHEVCE_QUALITY_P4:
10017
55.0k
        case IHEVCE_QUALITY_P5:
10018
55.0k
        {
10019
205k
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10020
150k
            {
10021
150k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10022
150k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10023
150k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10024
150k
                    .b1_eval_chroma_iq_and_coeff_data =
10025
150k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10026
150k
            }
10027
10028
55.0k
            break;
10029
30.9k
        }
10030
54.3k
        case IHEVCE_QUALITY_P6:
10031
54.3k
        {
10032
195k
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10033
140k
            {
10034
140k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10035
140k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10036
140k
#if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
10037
140k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10038
140k
                    .b1_eval_chroma_iq_and_coeff_data =
10039
140k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10040
#else
10041
                if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
10042
                   (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
10043
                {
10044
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10045
                        .b1_eval_chroma_iq_and_coeff_data =
10046
                        ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
10047
                }
10048
                else
10049
                {
10050
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10051
                        .b1_eval_chroma_iq_and_coeff_data =
10052
                        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10053
                }
10054
#endif
10055
140k
            }
10056
10057
54.3k
            break;
10058
30.9k
        }
10059
0
        default:
10060
0
        {
10061
0
            break;
10062
30.9k
        }
10063
369k
        }
10064
369k
    }
10065
10066
    /* Not recomputing Luma pred-data and header data for any preset now */
10067
369k
    ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
10068
369k
}
10069
10070
/**
10071
******************************************************************************
10072
*
10073
*  @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
10074
*         (not coded children) into a parent node(not coded).
10075
*
10076
*  @par   Description
10077
*         This is required post RDO evaluation as TU decisions are
10078
*         pre-determined(pre RDO) based on recursive SATD,
10079
*         while the quad children TU's can be skipped during RDO
10080
*
10081
*         The shrink process is applied iteratively till there are no
10082
*         more modes to shrink
10083
*
10084
*  @param[inout]   ps_tu_enc_loop
10085
*       pointer to tu enc loop params of inter cu
10086
*
10087
*  @param[inout]   ps_tu_enc_loop_temp_prms
10088
*       pointer to temp tu enc loop params of inter cu
10089
*
10090
*  @param[in]   num_tu_in_cu
10091
*       number of tus in cu
10092
*
10093
*  @return      modified number of tus in cu
10094
*
10095
******************************************************************************
10096
*/
10097
WORD32 ihevce_shrink_inter_tu_tree(
10098
    tu_enc_loop_out_t *ps_tu_enc_loop,
10099
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
10100
    recon_datastore_t *ps_recon_datastore,
10101
    WORD32 num_tu_in_cu,
10102
    UWORD8 u1_is_422)
10103
97.1k
{
10104
97.1k
    WORD32 recurse = 1;
10105
97.1k
    WORD32 ctr;
10106
10107
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10108
    /* Post RDO, if all 4 child nodes are not coded the overheads of split TU    */
10109
    /* flags and cbf flags are saved by merging to parent node and marking       */
10110
    /* parent TU as not coded                                                    */
10111
    /*                                                                           */
10112
    /*                               ParentTUSplit=1                             */
10113
    /*                                      |                                    */
10114
    /*       ---------------------------------------------------------           */
10115
    /*       |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded)      */
10116
    /*                                     ||                                    */
10117
    /*                                     \/                                    */
10118
    /*                                                                           */
10119
    /*                              ParentTUSplit=0 (Not Coded)                  */
10120
    /*                                                                           */
10121
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10122
113k
    while((num_tu_in_cu > 4) && recurse)
10123
16.7k
    {
10124
16.7k
        recurse = 0;
10125
10126
        /* Validate inter CU */
10127
        //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
10128
10129
        /* loop for all tu blocks in current cu */
10130
96.8k
        for(ctr = 0; ctr < num_tu_in_cu;)
10131
80.0k
        {
10132
            /* Get current tu posx, posy and size */
10133
80.0k
            WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
10134
80.0k
            WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
10135
            /* +1 is for parents size */
10136
80.0k
            WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
10137
10138
            /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
10139
80.0k
            WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
10140
80.0k
            eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
10141
10142
            /* As TUs are published in encode order (Z SCAN),                      */
10143
            /* Four consecutive TUS of same size implies we have hit leaf nodes.   */
10144
80.0k
            if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
10145
58.9k
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
10146
49.6k
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
10147
46.9k
               eval_merge)
10148
46.0k
            {
10149
46.0k
                WORD32 merge_parent = 1;
10150
10151
                /* If any leaf noded is coded, it cannot be merged to parent */
10152
46.0k
                if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
10153
6.74k
                   (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
10154
10155
5.84k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
10156
4.52k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
10157
4.51k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
10158
10159
4.51k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
10160
2.74k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
10161
2.71k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
10162
10163
2.70k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
10164
2.01k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
10165
2.01k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
10166
44.0k
                {
10167
44.0k
                    merge_parent = 0;
10168
44.0k
                }
10169
10170
46.0k
                if(u1_is_422)
10171
0
                {
10172
0
                    if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
10173
0
                       (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
10174
10175
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
10176
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
10177
10178
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
10179
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
10180
10181
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
10182
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
10183
0
                    {
10184
0
                        merge_parent = 0;
10185
0
                    }
10186
0
                }
10187
10188
46.0k
                if(merge_parent)
10189
2.01k
                {
10190
                    /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
10191
10192
2.01k
                    if(ps_recon_datastore->u1_is_lumaRecon_available)
10193
0
                    {
10194
0
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
10195
10196
0
                        memmove(
10197
0
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
10198
0
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
10199
0
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10200
0
                    }
10201
10202
2.01k
                    if(ps_recon_datastore->au1_is_chromaRecon_available[0])
10203
0
                    {
10204
0
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
10205
0
                            UCHAR_MAX;
10206
0
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
10207
0
                            UCHAR_MAX;
10208
10209
0
                        memmove(
10210
0
                            &ps_recon_datastore
10211
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
10212
0
                            &ps_recon_datastore
10213
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
10214
0
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10215
10216
0
                        memmove(
10217
0
                            &ps_recon_datastore
10218
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
10219
0
                            &ps_recon_datastore
10220
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
10221
0
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10222
10223
0
                        if(u1_is_422)
10224
0
                        {
10225
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
10226
0
                                UCHAR_MAX;
10227
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
10228
0
                                UCHAR_MAX;
10229
10230
0
                            memmove(
10231
0
                                &ps_recon_datastore
10232
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
10233
0
                                &ps_recon_datastore
10234
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
10235
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10236
10237
0
                            memmove(
10238
0
                                &ps_recon_datastore
10239
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
10240
0
                                &ps_recon_datastore
10241
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
10242
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10243
0
                        }
10244
0
                    }
10245
10246
                    /* Parent node size is one more than that of child */
10247
2.01k
                    ps_tu_enc_loop[ctr].s_tu.b3_size++;
10248
10249
2.01k
                    ctr++;
10250
10251
                    /* move the subsequent TUs to next element */
10252
2.01k
                    ASSERT(num_tu_in_cu >= (ctr + 3));
10253
2.01k
                    memmove(
10254
2.01k
                        (void *)(ps_tu_enc_loop + ctr),
10255
2.01k
                        (void *)(ps_tu_enc_loop + ctr + 3),
10256
2.01k
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
10257
10258
                    /* Also memmove the temp TU params */
10259
2.01k
                    memmove(
10260
2.01k
                        (void *)(ps_tu_enc_loop_temp_prms + ctr),
10261
2.01k
                        (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
10262
2.01k
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
10263
10264
                    /* Number of TUs in CU are now less by 3 */
10265
2.01k
                    num_tu_in_cu -= 3;
10266
10267
                    /* Recurse again as new parent also be can be merged later */
10268
2.01k
                    recurse = 1;
10269
2.01k
                }
10270
44.0k
                else
10271
44.0k
                {
10272
                    /* Go to next set of leaf nodes */
10273
44.0k
                    ctr += 4;
10274
44.0k
                }
10275
46.0k
            }
10276
33.9k
            else
10277
33.9k
            {
10278
33.9k
                ctr++;
10279
33.9k
            }
10280
80.0k
        }
10281
16.7k
    }
10282
10283
    /* return the modified num TUs*/
10284
97.1k
    ASSERT(num_tu_in_cu > 0);
10285
97.1k
    return (num_tu_in_cu);
10286
97.1k
}
10287
10288
UWORD8 ihevce_intra_mode_nxn_hash_updater(
10289
    UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
10290
463k
{
10291
463k
    WORD32 i;
10292
463k
    WORD32 i4_mode;
10293
10294
1.85M
    for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
10295
1.38M
    {
10296
1.38M
        if(pu1_mode_array[i] < 35)
10297
1.38M
        {
10298
1.38M
            if(pu1_mode_array[i] != 0)
10299
1.22M
            {
10300
1.22M
                i4_mode = pu1_mode_array[i] - 1;
10301
10302
1.22M
                if(!pu1_hash_table[i4_mode])
10303
521k
                {
10304
521k
                    pu1_hash_table[i4_mode] = 1;
10305
521k
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10306
521k
                    u1_num_ipe_modes++;
10307
521k
                }
10308
1.22M
            }
10309
10310
1.38M
            if(pu1_mode_array[i] != 34)
10311
1.38M
            {
10312
1.38M
                i4_mode = pu1_mode_array[i] + 1;
10313
10314
1.38M
                if((!pu1_hash_table[i4_mode]))
10315
674k
                {
10316
674k
                    pu1_hash_table[i4_mode] = 1;
10317
674k
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10318
674k
                    u1_num_ipe_modes++;
10319
674k
                }
10320
1.38M
            }
10321
1.38M
        }
10322
1.38M
    }
10323
10324
463k
    if(!pu1_hash_table[INTRA_PLANAR])
10325
228k
    {
10326
228k
        pu1_hash_table[INTRA_PLANAR] = 1;
10327
228k
        pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
10328
228k
        u1_num_ipe_modes++;
10329
228k
    }
10330
10331
463k
    if(!pu1_hash_table[INTRA_DC])
10332
227k
    {
10333
227k
        pu1_hash_table[INTRA_DC] = 1;
10334
227k
        pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
10335
227k
        u1_num_ipe_modes++;
10336
227k
    }
10337
10338
463k
    return u1_num_ipe_modes;
10339
463k
}
10340
10341
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
10342
WORD32 ihevce_determine_tu_tree_distribution(
10343
    cu_inter_cand_t *ps_cu_data,
10344
    me_func_selector_t *ps_func_selector,
10345
    WORD16 *pi2_scratch_mem,
10346
    UWORD8 *pu1_inp,
10347
    WORD32 i4_inp_stride,
10348
    WORD32 i4_lambda,
10349
    UWORD8 u1_lambda_q_shift,
10350
    UWORD8 u1_cu_size,
10351
    UWORD8 u1_max_tr_depth)
10352
{
10353
    err_prms_t s_err_prms;
10354
10355
    PF_SAD_FXN_TU_REC pf_err_compute[4];
10356
10357
    WORD32 i4_satd;
10358
10359
    s_err_prms.pi4_sad_grid = &i4_satd;
10360
    s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
10361
    s_err_prms.pu1_inp = pu1_inp;
10362
    s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
10363
    s_err_prms.i4_inp_stride = i4_inp_stride;
10364
    s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
10365
    s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
10366
10367
    if(u1_cu_size == 64)
10368
    {
10369
        s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
10370
    }
10371
    else
10372
    {
10373
        s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
10374
    }
10375
10376
    pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
10377
    pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
10378
    pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
10379
    pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
10380
10381
    i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
10382
        &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
10383
10384
    if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
10385
    {
10386
        ps_cu_data->ai4_tu_split_flag[0] = 1;
10387
    }
10388
10389
    return i4_satd;
10390
}
10391
#endif
10392
10393
void ihevce_populate_nbr_4x4_with_pu_data(
10394
    nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
10395
36.6k
{
10396
36.6k
    WORD32 i, j;
10397
10398
36.6k
    nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
10399
10400
36.6k
    WORD32 ht = (ps_pu->b4_ht + 1);
10401
36.6k
    WORD32 wd = (ps_pu->b4_wd + 1);
10402
10403
36.6k
    ps_nbr_4x4->b1_intra_flag = 0;
10404
36.6k
    ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
10405
36.6k
    ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
10406
36.6k
    ps_nbr_4x4->mv = ps_pu->mv;
10407
10408
130k
    for(i = 0; i < ht; i++)
10409
93.3k
    {
10410
488k
        for(j = 0; j < wd; j++)
10411
395k
        {
10412
395k
            ps_tmp_4x4[j] = *ps_nbr_4x4;
10413
395k
        }
10414
10415
93.3k
        ps_tmp_4x4 += i4_nbr_buf_stride;
10416
93.3k
    }
10417
36.6k
}
10418
10419
void ihevce_call_luma_inter_pred_rdopt_pass1(
10420
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
10421
0
{
10422
0
    pu_t *ps_pu;
10423
0
    UWORD8 *pu1_pred;
10424
0
    WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
10425
0
    WORD32 inter_pu_wd, inter_pu_ht;
10426
10427
0
    pu1_pred = ps_inter_cand->pu1_pred_data_scr;
10428
0
    pred_stride = ps_inter_cand->i4_pred_data_stride;
10429
0
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
10430
10431
0
    for(ctr = 0; ctr < num_cu_part; ctr++)
10432
0
    {
10433
0
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
10434
10435
        /* IF AMP then each partitions can have diff wd ht */
10436
0
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
10437
0
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
10438
10439
0
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
10440
        //if(0 == skip_or_merge_flag)
10441
0
        {
10442
0
            ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
10443
0
        }
10444
0
        if((2 == num_cu_part) && (0 == ctr))
10445
0
        {
10446
            /* 2Nx__ partion case */
10447
0
            if(inter_pu_wd == cu_size)
10448
0
            {
10449
0
                pu1_pred += (inter_pu_ht * pred_stride);
10450
0
            }
10451
10452
            /* __x2N partion case */
10453
0
            if(inter_pu_ht == cu_size)
10454
0
            {
10455
0
                pu1_pred += inter_pu_wd;
10456
0
            }
10457
0
        }
10458
0
    }
10459
0
}
10460
10461
LWORD64 ihevce_it_recon_ssd(
10462
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10463
    UWORD8 *pu1_src,
10464
    WORD32 i4_src_strd,
10465
    UWORD8 *pu1_pred,
10466
    WORD32 i4_pred_strd,
10467
    WORD16 *pi2_deq_data,
10468
    WORD32 i4_deq_data_strd,
10469
    UWORD8 *pu1_recon,
10470
    WORD32 i4_recon_stride,
10471
    UWORD8 *pu1_ecd_data,
10472
    UWORD8 u1_trans_size,
10473
    UWORD8 u1_pred_mode,
10474
    WORD32 i4_cbf,
10475
    WORD32 i4_zero_col,
10476
    WORD32 i4_zero_row,
10477
    CHROMA_PLANE_ID_T e_chroma_plane)
10478
1.65M
{
10479
1.65M
    if(NULL_PLANE == e_chroma_plane)
10480
1.01M
    {
10481
1.01M
        ihevce_it_recon_fxn(
10482
1.01M
            ps_ctxt,
10483
1.01M
            pi2_deq_data,
10484
1.01M
            i4_deq_data_strd,
10485
1.01M
            pu1_pred,
10486
1.01M
            i4_pred_strd,
10487
1.01M
            pu1_recon,
10488
1.01M
            i4_recon_stride,
10489
1.01M
            pu1_ecd_data,
10490
1.01M
            u1_trans_size,
10491
1.01M
            u1_pred_mode,
10492
1.01M
            i4_cbf,
10493
1.01M
            i4_zero_col,
10494
1.01M
            i4_zero_row);
10495
10496
1.01M
        return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
10497
1.01M
            pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
10498
1.01M
            e_chroma_plane);
10499
1.01M
    }
10500
642k
    else
10501
642k
    {
10502
642k
        ihevce_chroma_it_recon_fxn(
10503
642k
            ps_ctxt,
10504
642k
            pi2_deq_data,
10505
642k
            i4_deq_data_strd,
10506
642k
            pu1_pred,
10507
642k
            i4_pred_strd,
10508
642k
            pu1_recon,
10509
642k
            i4_recon_stride,
10510
642k
            pu1_ecd_data,
10511
642k
            u1_trans_size,
10512
642k
            i4_cbf,
10513
642k
            i4_zero_col,
10514
642k
            i4_zero_row,
10515
642k
            e_chroma_plane);
10516
10517
642k
        return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10518
642k
            pu1_recon,
10519
642k
            pu1_src,
10520
642k
            i4_recon_stride,
10521
642k
            i4_src_strd,
10522
642k
            u1_trans_size,
10523
642k
            u1_trans_size,
10524
642k
            e_chroma_plane);
10525
642k
    }
10526
1.65M
}
10527
10528
/*!
10529
******************************************************************************
10530
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
10531
*
10532
* \brief
10533
*    Transform unit level (Chroma) enc_loop function
10534
*
10535
* \param[in] ps_ctxt    enc_loop module ctxt pointer
10536
* \param[in] pu1_pred       pointer to predicted data buffer
10537
* \param[in] pred_strd      predicted buffer stride
10538
* \param[in] pu1_src    pointer to source data buffer
10539
* \param[in] src_strd   source buffer stride
10540
* \param[in] pi2_deq_data   pointer to store iq data
10541
* \param[in] deq_data_strd  iq data buffer stride
10542
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
10543
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
10544
*                           block
10545
* \param[out] csbf_strd     csbf buffer stride
10546
* \param[in] trans_size     transform size (4, 8, 16)
10547
* \param[in] intra_flag     0:Inter/Skip 1:Intra
10548
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
10549
*                           coeff buffer
10550
the current TU in RDopt Mode
10551
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
10552
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
10553
*
10554
* \return
10555
*    CBF of the current block
10556
*
10557
* \author
10558
*  Ittiam
10559
*
10560
*****************************************************************************
10561
*/
10562
WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
10563
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10564
    UWORD8 *pu1_pred,
10565
    WORD32 pred_strd,
10566
    UWORD8 *pu1_src,
10567
    WORD32 src_strd,
10568
    WORD16 *pi2_deq_data,
10569
    WORD32 deq_data_strd,
10570
    UWORD8 *pu1_recon,
10571
    WORD32 i4_recon_stride,
10572
    UWORD8 *pu1_ecd_data,
10573
    UWORD8 *pu1_csbf_buf,
10574
    WORD32 csbf_strd,
10575
    WORD32 trans_size,
10576
    WORD32 i4_scan_idx,
10577
    WORD32 intra_flag,
10578
    WORD32 *pi4_coeff_off,
10579
    WORD32 *pi4_tu_bits,
10580
    WORD32 *pi4_zero_col,
10581
    WORD32 *pi4_zero_row,
10582
    UWORD8 *pu1_is_recon_available,
10583
    WORD32 i4_perform_sbh,
10584
    WORD32 i4_perform_rdoq,
10585
    LWORD64 *pi8_cost,
10586
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10587
    WORD32 i4_alpha_stim_multiplier,
10588
    UWORD8 u1_is_cu_noisy,
10589
#endif
10590
    UWORD8 u1_is_skip,
10591
    SSD_TYPE_T e_ssd_type,
10592
    CHROMA_PLANE_ID_T e_chroma_plane)
10593
2.99M
{
10594
2.99M
    WORD32 trans_idx, cbf, u4_blk_sad;
10595
2.99M
    WORD16 *pi2_quant_coeffs;
10596
2.99M
    WORD16 *pi2_trans_values;
10597
2.99M
    WORD32 quant_scale_mat_offset;
10598
2.99M
    WORD32 *pi4_trans_scratch;
10599
2.99M
    WORD32 *pi4_subBlock2csbfId_map = NULL;
10600
10601
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10602
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
10603
#endif
10604
10605
2.99M
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
10606
10607
2.99M
    WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
10608
2.38M
                             (!intra_flag && ENABLE_INTER_ZCU_COST);
10609
2.99M
    WORD32 i4_perform_coeff_level_rdoq =
10610
2.99M
        (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
10611
2.20M
        (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
10612
10613
2.99M
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
10614
2.99M
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
10615
10616
2.99M
    *pi4_coeff_off = 0;
10617
2.99M
    *pi4_tu_bits = 0;
10618
2.99M
    pu1_is_recon_available[0] = 0;
10619
10620
2.99M
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
10621
2.99M
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
10622
2.99M
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
10623
10624
2.99M
    if(2 == trans_size)
10625
0
    {
10626
0
        trans_size = 4;
10627
0
    }
10628
10629
    /* translate the transform size to index */
10630
2.99M
    trans_idx = trans_size >> 2;
10631
10632
2.99M
    if(16 == trans_size)
10633
133k
    {
10634
133k
        trans_idx = 3;
10635
133k
    }
10636
10637
2.99M
    if(u1_is_skip)
10638
0
    {
10639
0
        pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10640
0
            pu1_pred,
10641
0
            pu1_src,
10642
0
            pred_strd,
10643
0
            src_strd,
10644
0
            trans_size,
10645
0
            trans_size,
10646
0
            e_chroma_plane);
10647
10648
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10649
0
        {
10650
            /* buffer copy fromp pred to recon */
10651
0
            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
10652
0
                pu1_pred,
10653
0
                pred_strd,
10654
0
                pu1_recon,
10655
0
                i4_recon_stride,
10656
0
                trans_size,
10657
0
                trans_size,
10658
0
                e_chroma_plane);
10659
10660
0
            pu1_is_recon_available[0] = 1;
10661
0
        }
10662
10663
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10664
0
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
10665
0
        {
10666
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10667
0
                pu1_src,
10668
0
                src_strd,
10669
0
                pu1_pred,
10670
0
                pred_strd,
10671
0
                pi8_cost[0],
10672
0
                i4_alpha_stim_multiplier,
10673
0
                trans_size,
10674
0
                0,
10675
0
                ps_ctxt->u1_enable_psyRDOPT,
10676
0
                e_chroma_plane);
10677
0
        }
10678
0
#endif
10679
10680
0
#if ENABLE_INTER_ZCU_COST
10681
#if !WEIGH_CHROMA_COST
10682
        /* cbf = 0, accumulate cu not coded cost */
10683
        ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
10684
#else
10685
0
        ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
10686
0
                                          (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
10687
0
                                         CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
10688
0
#endif
10689
0
#endif
10690
10691
0
        return 0;
10692
0
    }
10693
10694
2.99M
    if(intra_flag == 1)
10695
2.38M
    {
10696
2.38M
        quant_scale_mat_offset = 0;
10697
10698
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10699
        ai4_quant_rounding_factors[0][0] =
10700
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
10701
10702
        for(i = 0; i < trans_size * trans_size; i++)
10703
        {
10704
            ai4_quant_rounding_factors[1][i] =
10705
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
10706
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10707
            ai4_quant_rounding_factors[2][i] =
10708
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
10709
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10710
        }
10711
#endif
10712
2.38M
    }
10713
611k
    else
10714
611k
    {
10715
611k
        quant_scale_mat_offset = NUM_TRANS_TYPES;
10716
611k
    }
10717
10718
2.99M
    switch(trans_size)
10719
2.99M
    {
10720
2.34M
    case 4:
10721
2.34M
    {
10722
2.34M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
10723
10724
2.34M
        break;
10725
0
    }
10726
516k
    case 8:
10727
516k
    {
10728
516k
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
10729
10730
516k
        break;
10731
0
    }
10732
133k
    case 16:
10733
133k
    {
10734
133k
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
10735
10736
133k
        break;
10737
0
    }
10738
0
    case 32:
10739
0
    {
10740
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
10741
10742
0
        break;
10743
0
    }
10744
2.99M
    }
10745
10746
    /* ---------- call residue and transform block ------- */
10747
2.99M
    u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
10748
2.99M
        pu1_src,
10749
2.99M
        pu1_pred,
10750
2.99M
        pi4_trans_scratch,
10751
2.99M
        pi2_trans_values,
10752
2.99M
        src_strd,
10753
2.99M
        pred_strd,
10754
2.99M
        trans_size,
10755
2.99M
        e_chroma_plane);
10756
2.99M
    (void)u4_blk_sad;
10757
    /* -------- calculate SSD calculation in Transform Domain ------ */
10758
10759
2.99M
    cbf = ps_ctxt->apf_quant_iquant_ssd
10760
2.99M
              [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
10761
10762
2.99M
          (pi2_trans_values,
10763
2.99M
           ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
10764
2.99M
           pi2_quant_coeffs,
10765
2.99M
           pi2_deq_data,
10766
2.99M
           trans_size,
10767
2.99M
           ps_ctxt->i4_chrm_cu_qp_div6,
10768
2.99M
           ps_ctxt->i4_chrm_cu_qp_mod6,
10769
2.99M
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10770
2.99M
           ps_ctxt->i4_quant_rnd_factor[intra_flag],
10771
2.99M
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10772
2.99M
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10773
#else
10774
           intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
10775
           intra_flag ? ai4_quant_rounding_factors[1]
10776
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10777
           intra_flag ? ai4_quant_rounding_factors[2]
10778
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10779
#endif
10780
2.99M
           trans_size,
10781
2.99M
           trans_size,
10782
2.99M
           deq_data_strd,
10783
2.99M
           pu1_csbf_buf,
10784
2.99M
           csbf_strd,
10785
2.99M
           pi4_zero_col,
10786
2.99M
           pi4_zero_row,
10787
2.99M
           ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
10788
2.99M
           pi8_cost);
10789
10790
2.99M
    if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
10791
642k
    {
10792
642k
        pi8_cost[0] = UINT_MAX;
10793
642k
    }
10794
10795
2.99M
    if(0 != cbf)
10796
1.48M
    {
10797
1.48M
        if(i4_perform_sbh || i4_perform_rdoq)
10798
1.03M
        {
10799
1.03M
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
10800
1.03M
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
10801
10802
1.03M
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
10803
1.03M
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
10804
1.03M
            ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
10805
1.03M
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
10806
1.03M
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
10807
10808
1.03M
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
10809
1.03M
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
10810
1.03M
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
10811
1.03M
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
10812
1.03M
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
10813
1.03M
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
10814
1.03M
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
10815
10816
1.03M
            if((!i4_perform_rdoq))
10817
529k
            {
10818
529k
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10819
10820
529k
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10821
529k
            }
10822
1.03M
        }
10823
10824
        /* ------- call coeffs scan function ------- */
10825
1.48M
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10826
1.48M
            pi2_quant_coeffs,
10827
1.48M
            pi4_subBlock2csbfId_map,
10828
1.48M
            i4_scan_idx,
10829
1.48M
            trans_size,
10830
1.48M
            pu1_ecd_data,
10831
1.48M
            pu1_csbf_buf,
10832
1.48M
            csbf_strd);
10833
1.48M
    }
10834
10835
    /*  Normalize Cost. Note : trans_idx, not (trans_idx-1) */
10836
2.99M
    pi8_cost[0] >>= ga_trans_shift[trans_idx];
10837
10838
2.99M
#if RDOPT_ZERO_CBF_ENABLE
10839
2.99M
    if((0 != cbf))
10840
1.48M
    {
10841
1.48M
        WORD32 tu_bits;
10842
1.48M
        LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
10843
10844
1.48M
        zero_cbf_cost_u = 0;
10845
10846
        /*Populating the feilds of rdoq_ctxt structure*/
10847
1.48M
        if(i4_perform_rdoq)
10848
500k
        {
10849
            //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
10850
            /* transform size to log2transform size */
10851
500k
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
10852
500k
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
10853
10854
500k
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
10855
500k
            ps_rdoq_sbh_ctxt->i4_is_luma = 0;
10856
500k
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
10857
500k
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
10858
500k
                (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
10859
500k
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
10860
500k
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
10861
500k
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
10862
500k
        }
10863
986k
        else if(i4_perform_zcbf)
10864
250k
        {
10865
            /* cost of zero cbf encoding */
10866
250k
            zero_cbf_cost_u =
10867
10868
250k
                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10869
250k
                    pu1_pred,
10870
250k
                    pu1_src,
10871
250k
                    pred_strd,
10872
250k
                    src_strd,
10873
250k
                    trans_size,
10874
250k
                    trans_size,
10875
250k
                    e_chroma_plane);
10876
250k
        }
10877
10878
        /************************************************************************/
10879
        /* call the entropy rdo encode to get the bit estimate for current tu   */
10880
        /* note that tu includes only residual coding bits and does not include */
10881
        /* tu split, cbf and qp delta encoding bits for a TU                    */
10882
        /************************************************************************/
10883
1.48M
        if(i4_perform_rdoq)
10884
500k
        {
10885
500k
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
10886
500k
                &ps_ctxt->s_rdopt_entropy_ctxt,
10887
500k
                pu1_ecd_data,
10888
500k
                trans_size,
10889
500k
                0,
10890
500k
                ps_rdoq_sbh_ctxt,
10891
500k
                pi8_cost,
10892
500k
                &zero_cbf_cost_u,
10893
500k
                0);
10894
            //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
10895
10896
500k
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
10897
23.9k
            {
10898
23.9k
                cbf = 0;
10899
10900
                /* num bytes is set to 0 */
10901
23.9k
                *pi4_coeff_off = 0;
10902
23.9k
            }
10903
10904
500k
            (*pi4_tu_bits) += tu_bits;
10905
10906
500k
            if((i4_perform_sbh) && (0 != cbf))
10907
476k
            {
10908
476k
                ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
10909
10910
476k
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10911
10912
476k
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10913
476k
            }
10914
10915
            /*Add round value before normalizing*/
10916
500k
            pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
10917
500k
            pi8_cost[0] >>= ga_trans_shift[trans_idx];
10918
10919
500k
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
10920
476k
            {
10921
476k
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10922
476k
                    pi2_quant_coeffs,
10923
476k
                    pi4_subBlock2csbfId_map,
10924
476k
                    i4_scan_idx,
10925
476k
                    trans_size,
10926
476k
                    pu1_ecd_data,
10927
476k
                    ps_rdoq_sbh_ctxt->pu1_csbf_buf,
10928
476k
                    csbf_strd);
10929
476k
            }
10930
500k
        }
10931
986k
        else
10932
986k
        {
10933
            /************************************************************************/
10934
            /* call the entropy rdo encode to get the bit estimate for current tu   */
10935
            /* note that tu includes only residual coding bits and does not include */
10936
            /* tu split, cbf and qp delta encoding bits for a TU                    */
10937
            /************************************************************************/
10938
986k
            tu_bits = ihevce_entropy_rdo_encode_tu(
10939
986k
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
10940
10941
986k
            (*pi4_tu_bits) += tu_bits;
10942
986k
        }
10943
10944
1.48M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10945
162k
        {
10946
162k
            pi8_cost[0] = ihevce_it_recon_ssd(
10947
162k
                ps_ctxt,
10948
162k
                pu1_src,
10949
162k
                src_strd,
10950
162k
                pu1_pred,
10951
162k
                pred_strd,
10952
162k
                pi2_deq_data,
10953
162k
                deq_data_strd,
10954
162k
                pu1_recon,
10955
162k
                i4_recon_stride,
10956
162k
                pu1_ecd_data,
10957
162k
                trans_size,
10958
162k
                PRED_MODE_INTRA,
10959
162k
                cbf,
10960
162k
                pi4_zero_col[0],
10961
162k
                pi4_zero_row[0],
10962
162k
                e_chroma_plane);
10963
10964
162k
            pu1_is_recon_available[0] = 1;
10965
162k
        }
10966
10967
1.48M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10968
1.48M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10969
0
        {
10970
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10971
0
                pu1_src,
10972
0
                src_strd,
10973
0
                pu1_recon,
10974
0
                i4_recon_stride,
10975
0
                pi8_cost[0],
10976
0
                i4_alpha_stim_multiplier,
10977
0
                trans_size,
10978
0
                0,
10979
0
                ps_ctxt->u1_enable_psyRDOPT,
10980
0
                e_chroma_plane);
10981
0
        }
10982
1.48M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10983
0
        {
10984
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10985
0
                pu1_src,
10986
0
                src_strd,
10987
0
                pu1_pred,
10988
0
                pred_strd,
10989
0
                pi8_cost[0],
10990
0
                i4_alpha_stim_multiplier,
10991
0
                trans_size,
10992
0
                0,
10993
0
                ps_ctxt->u1_enable_psyRDOPT,
10994
0
                e_chroma_plane);
10995
0
        }
10996
1.48M
#endif
10997
10998
1.48M
        curr_cb_cod_cost = pi8_cost[0];
10999
11000
        /* add the SSD cost to bits estimate given by ECD */
11001
1.48M
        curr_cb_cod_cost +=
11002
1.48M
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
11003
11004
1.48M
        if(i4_perform_zcbf)
11005
399k
        {
11006
399k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11007
399k
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
11008
0
            {
11009
0
                zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
11010
0
                    pu1_src,
11011
0
                    src_strd,
11012
0
                    pu1_pred,
11013
0
                    pred_strd,
11014
0
                    zero_cbf_cost_u,
11015
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11016
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11017
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11018
0
                                                 100.0,
11019
0
                    trans_size,
11020
0
                    0,
11021
0
                    ps_ctxt->u1_enable_psyRDOPT,
11022
0
                    e_chroma_plane);
11023
0
            }
11024
399k
#endif
11025
            /* force the tu as zero cbf if zero_cbf_cost is lower */
11026
399k
            if(zero_cbf_cost_u < curr_cb_cod_cost)
11027
2.36k
            {
11028
2.36k
                *pi4_coeff_off = 0;
11029
2.36k
                cbf = 0;
11030
2.36k
                (*pi4_tu_bits) = 0;
11031
2.36k
                pi8_cost[0] = zero_cbf_cost_u;
11032
11033
2.36k
                pu1_is_recon_available[0] = 0;
11034
11035
2.36k
                if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11036
69
                {
11037
69
                    ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
11038
69
                        pu1_pred,
11039
69
                        pred_strd,
11040
69
                        pu1_recon,
11041
69
                        i4_recon_stride,
11042
69
                        trans_size,
11043
69
                        trans_size,
11044
69
                        e_chroma_plane);
11045
11046
69
                    pu1_is_recon_available[0] = 1;
11047
69
                }
11048
2.36k
            }
11049
11050
399k
#if ENABLE_INTER_ZCU_COST
11051
399k
            if(!intra_flag)
11052
399k
            {
11053
#if !WEIGH_CHROMA_COST
11054
                ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
11055
#else
11056
399k
                ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11057
399k
                    (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
11058
399k
                     (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11059
399k
                    CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11060
399k
#endif
11061
399k
            }
11062
399k
#endif
11063
399k
        }
11064
1.48M
    }
11065
1.51M
    else
11066
1.51M
    {
11067
1.51M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11068
479k
        {
11069
479k
            pi8_cost[0] = ihevce_it_recon_ssd(
11070
479k
                ps_ctxt,
11071
479k
                pu1_src,
11072
479k
                src_strd,
11073
479k
                pu1_pred,
11074
479k
                pred_strd,
11075
479k
                pi2_deq_data,
11076
479k
                deq_data_strd,
11077
479k
                pu1_recon,
11078
479k
                i4_recon_stride,
11079
479k
                pu1_ecd_data,
11080
479k
                trans_size,
11081
479k
                PRED_MODE_INTRA,
11082
479k
                cbf,
11083
479k
                pi4_zero_col[0],
11084
479k
                pi4_zero_row[0],
11085
479k
                e_chroma_plane);
11086
11087
479k
            pu1_is_recon_available[0] = 1;
11088
479k
        }
11089
11090
1.51M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11091
1.51M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11092
0
        {
11093
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11094
0
                pu1_src,
11095
0
                src_strd,
11096
0
                pu1_recon,
11097
0
                i4_recon_stride,
11098
0
                pi8_cost[0],
11099
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11100
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11101
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11102
0
                                             100.0,
11103
0
                trans_size,
11104
0
                0,
11105
0
                ps_ctxt->u1_enable_psyRDOPT,
11106
0
                e_chroma_plane);
11107
0
        }
11108
1.51M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11109
0
        {
11110
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11111
0
                pu1_src,
11112
0
                src_strd,
11113
0
                pu1_pred,
11114
0
                pred_strd,
11115
0
                pi8_cost[0],
11116
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11117
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11118
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11119
0
                                             100.0,
11120
0
                trans_size,
11121
0
                0,
11122
0
                ps_ctxt->u1_enable_psyRDOPT,
11123
0
                e_chroma_plane);
11124
0
        }
11125
1.51M
#endif
11126
11127
1.51M
#if ENABLE_INTER_ZCU_COST
11128
1.51M
        if(!intra_flag)
11129
211k
        {
11130
#if !WEIGH_CHROMA_COST
11131
            /* cbf = 0, accumulate cu not coded cost */
11132
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
11133
#else
11134
            /* cbf = 0, accumulate cu not coded cost */
11135
11136
211k
            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11137
211k
                (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
11138
211k
                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11139
211k
                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11140
211k
#endif
11141
211k
        }
11142
1.51M
#endif
11143
1.51M
    }
11144
2.99M
#endif /* RDOPT_ZERO_CBF_ENABLE */
11145
11146
2.99M
    return (cbf);
11147
2.99M
}