Coverage Report

Created: 2025-12-14 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/ihevce_enc_loop_utils.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*!
22
******************************************************************************
23
* \file ihevce_enc_loop_utils.c
24
*
25
* \brief
26
*    This file contains utility functions of Encode loop
27
*
28
* \date
29
*    18/09/2012
30
*
31
* \author
32
*    Ittiam
33
*
34
*
35
* List of Functions
36
*
37
*
38
******************************************************************************
39
*/
40
41
/*****************************************************************************/
42
/* File Includes                                                             */
43
/*****************************************************************************/
44
/* System include files */
45
#include <stdio.h>
46
#include <string.h>
47
#include <stdlib.h>
48
#include <assert.h>
49
#include <stdarg.h>
50
#include <math.h>
51
#include <limits.h>
52
53
/* User include files */
54
#include "ihevc_typedefs.h"
55
#include "itt_video_api.h"
56
#include "ihevce_api.h"
57
58
#include "rc_cntrl_param.h"
59
#include "rc_frame_info_collector.h"
60
#include "rc_look_ahead_params.h"
61
62
#include "ihevc_defs.h"
63
#include "ihevc_macros.h"
64
#include "ihevc_debug.h"
65
#include "ihevc_structs.h"
66
#include "ihevc_platform_macros.h"
67
#include "ihevc_deblk.h"
68
#include "ihevc_itrans_recon.h"
69
#include "ihevc_chroma_itrans_recon.h"
70
#include "ihevc_chroma_intra_pred.h"
71
#include "ihevc_intra_pred.h"
72
#include "ihevc_inter_pred.h"
73
#include "ihevc_mem_fns.h"
74
#include "ihevc_padding.h"
75
#include "ihevc_weighted_pred.h"
76
#include "ihevc_sao.h"
77
#include "ihevc_resi_trans.h"
78
#include "ihevc_quant_iquant_ssd.h"
79
#include "ihevc_cabac_tables.h"
80
#include "ihevc_common_tables.h"
81
82
#include "ihevce_defs.h"
83
#include "ihevce_hle_interface.h"
84
#include "ihevce_lap_enc_structs.h"
85
#include "ihevce_multi_thrd_structs.h"
86
#include "ihevce_multi_thrd_funcs.h"
87
#include "ihevce_me_common_defs.h"
88
#include "ihevce_had_satd.h"
89
#include "ihevce_error_codes.h"
90
#include "ihevce_bitstream.h"
91
#include "ihevce_cabac.h"
92
#include "ihevce_rdoq_macros.h"
93
#include "ihevce_function_selector.h"
94
#include "ihevce_enc_structs.h"
95
#include "ihevce_entropy_structs.h"
96
#include "ihevce_cmn_utils_instr_set_router.h"
97
#include "ihevce_ipe_instr_set_router.h"
98
#include "ihevce_decomp_pre_intra_structs.h"
99
#include "ihevce_decomp_pre_intra_pass.h"
100
#include "ihevce_enc_loop_structs.h"
101
#include "ihevce_nbr_avail.h"
102
#include "ihevce_enc_loop_utils.h"
103
#include "ihevce_sub_pic_rc.h"
104
#include "ihevce_global_tables.h"
105
#include "ihevce_bs_compute_ctb.h"
106
#include "ihevce_cabac_rdo.h"
107
#include "ihevce_deblk.h"
108
#include "ihevce_frame_process.h"
109
#include "ihevce_rc_enc_structs.h"
110
#include "hme_datatype.h"
111
#include "hme_interface.h"
112
#include "hme_common_defs.h"
113
#include "hme_defs.h"
114
#include "hme_common_utils.h"
115
#include "ihevce_me_instr_set_router.h"
116
#include "ihevce_enc_subpel_gen.h"
117
#include "ihevce_inter_pred.h"
118
#include "ihevce_mv_pred.h"
119
#include "ihevce_mv_pred_merge.h"
120
#include "ihevce_enc_loop_inter_mode_sifter.h"
121
#include "ihevce_enc_cu_recursion.h"
122
#include "ihevce_enc_loop_pass.h"
123
#include "ihevce_common_utils.h"
124
#include "ihevce_dep_mngr_interface.h"
125
#include "ihevce_sao.h"
126
#include "ihevce_tile_interface.h"
127
#include "ihevce_profile.h"
128
#include "ihevce_stasino_helpers.h"
129
#include "ihevce_tu_tree_selector.h"
130
131
/*****************************************************************************/
132
/* Globals                                                                   */
133
/*****************************************************************************/
134
135
extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
136
extern const UWORD8 gu1_hevce_scan4x4[3][16];
137
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
138
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
139
extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
140
141
/*****************************************************************************/
142
/* Constant Macros                                                           */
143
/*****************************************************************************/
144
#define ENABLE_ZERO_CBF 1
145
#define DISABLE_RDOQ_INTRA 0
146
147
/*****************************************************************************/
148
/* Function Definitions                                                      */
149
/*****************************************************************************/
150
void *ihevce_tu_tree_update(
151
    tu_prms_t *ps_tu_prms,
152
    WORD32 *pnum_tu_in_cu,
153
    WORD32 depth,
154
    WORD32 tu_split_flag,
155
    WORD32 tu_early_cbf,
156
    WORD32 i4_x_off,
157
    WORD32 i4_y_off)
158
1.31M
{
159
    //WORD32 tu_split_flag = p_tu_split_flag[0];
160
1.31M
    WORD32 p_tu_split_flag[4];
161
1.31M
    WORD32 p_tu_early_cbf[4];
162
163
1.31M
    WORD32 tu_size = ps_tu_prms->u1_tu_size;
164
165
1.31M
    if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
166
110k
    {
167
110k
        if((tu_size >> depth) == 32)
168
28.5k
        {
169
            /* Get the individual TU split flags */
170
28.5k
            p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
171
28.5k
            p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
172
28.5k
            p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
173
28.5k
            p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
174
175
            /* Get the early CBF flags */
176
28.5k
            p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
177
28.5k
            p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
178
28.5k
            p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
179
28.5k
            p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
180
28.5k
        }
181
82.1k
        else
182
82.1k
        {
183
            /* Get the individual TU split flags */
184
82.1k
            p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
185
82.1k
            p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
186
82.1k
            p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
187
82.1k
            p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
188
189
            /* Get the early CBF flags */
190
82.1k
            p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
191
82.1k
            p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
192
82.1k
            p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
193
82.1k
            p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
194
82.1k
        }
195
196
110k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
197
110k
            ps_tu_prms,
198
110k
            pnum_tu_in_cu,
199
110k
            depth + 1,
200
110k
            p_tu_split_flag[0],
201
110k
            p_tu_early_cbf[0],
202
110k
            i4_x_off,
203
110k
            i4_y_off);
204
205
110k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
206
110k
            ps_tu_prms,
207
110k
            pnum_tu_in_cu,
208
110k
            depth + 1,
209
110k
            p_tu_split_flag[1],
210
110k
            p_tu_early_cbf[1],
211
110k
            (i4_x_off + (tu_size >> (depth + 1))),
212
110k
            i4_y_off);
213
214
110k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
215
110k
            ps_tu_prms,
216
110k
            pnum_tu_in_cu,
217
110k
            depth + 1,
218
110k
            p_tu_split_flag[2],
219
110k
            p_tu_early_cbf[2],
220
110k
            i4_x_off,
221
110k
            (i4_y_off + (tu_size >> (depth + 1))));
222
223
110k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
224
110k
            ps_tu_prms,
225
110k
            pnum_tu_in_cu,
226
110k
            depth + 1,
227
110k
            p_tu_split_flag[3],
228
110k
            p_tu_early_cbf[3],
229
110k
            (i4_x_off + (tu_size >> (depth + 1))),
230
110k
            (i4_y_off + (tu_size >> (depth + 1))));
231
110k
    }
232
1.20M
    else
233
1.20M
    {
234
1.20M
        if(tu_split_flag & 0x1)
235
114k
        {
236
            /* This piece of code will be entered for the 8x8, if it is split
237
            Update the 4 child TU's accordingly. */
238
239
114k
            (*pnum_tu_in_cu) += 4;
240
241
            /* TL TU update */
242
114k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
243
244
114k
            ps_tu_prms->u1_x_off = i4_x_off;
245
246
114k
            ps_tu_prms->u1_y_off = i4_y_off;
247
248
            /* Early CBF is not done for 4x4 transforms */
249
114k
            ps_tu_prms->i4_early_cbf = 1;
250
251
114k
            ps_tu_prms++;
252
253
            /* TR TU update */
254
114k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
255
256
114k
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
257
258
114k
            ps_tu_prms->u1_y_off = i4_y_off;
259
260
            /* Early CBF is not done for 4x4 transforms */
261
114k
            ps_tu_prms->i4_early_cbf = 1;
262
263
114k
            ps_tu_prms++;
264
265
            /* BL TU update */
266
114k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
267
268
114k
            ps_tu_prms->u1_x_off = i4_x_off;
269
270
114k
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
271
272
            /* Early CBF is not done for 4x4 transforms */
273
114k
            ps_tu_prms->i4_early_cbf = 1;
274
275
114k
            ps_tu_prms++;
276
277
            /* BR TU update */
278
114k
            ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
279
280
114k
            ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
281
282
114k
            ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
283
284
            /* Early CBF is not done for 4x4 transforms */
285
114k
            ps_tu_prms->i4_early_cbf = 1;
286
114k
        }
287
1.09M
        else
288
1.09M
        {
289
            /* Update the TU params */
290
1.09M
            ps_tu_prms->u1_tu_size = tu_size >> depth;
291
292
1.09M
            ps_tu_prms->u1_x_off = i4_x_off;
293
294
1.09M
            ps_tu_prms->u1_y_off = i4_y_off;
295
296
1.09M
            (*pnum_tu_in_cu)++;
297
298
            /* Early CBF update for current TU */
299
1.09M
            ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
300
1.09M
        }
301
1.20M
        if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
302
1.20M
        {
303
1.20M
            ps_tu_prms++;
304
305
1.20M
            ps_tu_prms->u1_tu_size = tu_size;
306
1.20M
        }
307
1.20M
    }
308
309
1.31M
    return ps_tu_prms;
310
1.31M
}
311
312
/*!
313
******************************************************************************
314
* \if Function name : ihevce_compute_quant_rel_param \endif
315
*
316
* \brief
317
*    This function updates quantization related parameters like qp_mod_6 etc in
318
*       context according to new qp
319
*
320
* \date
321
*    08/01/2013
322
*
323
* \author
324
*    Ittiam
325
*
326
* \return
327
*
328
* List of Functions
329
*
330
*
331
******************************************************************************
332
*/
333
void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
334
8.22M
{
335
8.22M
    WORD32 i4_div_factor;
336
337
8.22M
    ps_ctxt->i4_chrm_cu_qp =
338
8.22M
        (ps_ctxt->u1_chroma_array_type == 2)
339
8.22M
            ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
340
8.22M
            : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
341
8.22M
    ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
342
8.22M
    i4_div_factor = (i1_cu_qp + 3) / 6;
343
8.22M
    i4_div_factor = CLIP3(i4_div_factor, 3, 6);
344
8.22M
    ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
345
8.22M
    ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
346
8.22M
    ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
347
348
8.22M
#define INTER_RND_QP_BY_6
349
8.22M
#ifdef INTER_RND_QP_BY_6
350
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
351
8.22M
    {
352
8.22M
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
353
8.22M
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
354
8.22M
    }
355
#else
356
    /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
357
    ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
358
#endif
359
360
8.22M
    if(ISLICE == ps_ctxt->i1_slice_type)
361
3.03M
    {
362
        /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
363
3.03M
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
364
3.03M
            (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
365
3.03M
    }
366
5.18M
    else
367
5.18M
    {
368
5.18M
        if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
369
0
        {
370
            /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
371
0
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
372
0
                (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
373
0
        }
374
5.18M
        else
375
5.18M
        {
376
            /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
377
5.18M
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
378
5.18M
                ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
379
            /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
380
5.18M
        }
381
5.18M
    }
382
8.22M
}
383
384
/*!
385
******************************************************************************
386
* \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
387
*
388
* \brief
389
*    Function whihc calculates the Lambda params for current picture
390
*
391
* \param[in] ps_enc_ctxt : encoder ctxt pointer
392
* \param[in] ps_cur_pic_ctxt : current pic ctxt
393
* \param[in] i4_cur_frame_qp : current pic QP
394
* \param[in] first_field : is first field flag
395
* \param[in] i4_temporal_lyr_id : Current picture layer id
396
*
397
* \return
398
*    None
399
*
400
* \author
401
*  Ittiam
402
*
403
*****************************************************************************
404
*/
405
void ihevce_populate_cl_cu_lambda_prms(
406
    ihevce_enc_loop_ctxt_t *ps_ctxt,
407
    frm_lambda_ctxt_t *ps_frm_lamda,
408
    WORD32 i4_slice_type,
409
    WORD32 i4_temporal_lyr_id,
410
    WORD32 i4_lambda_type)
411
104k
{
412
104k
    WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
413
104k
    double lambda_modifier;
414
104k
    double lambda_uv_modifier;
415
104k
    double lambda;
416
104k
    double lambda_uv;
417
418
104k
    WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
419
420
    /*Populate lamda modifier */
421
104k
    ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
422
104k
    ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
423
104k
    ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
424
425
104k
    for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
426
5.44M
        i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
427
5.33M
        i4_curr_cu_qp++)
428
5.33M
    {
429
5.33M
        WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
430
5.33M
                               ? MIN(i4_curr_cu_qp, 51)
431
5.33M
                               : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
432
433
5.33M
        i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
434
435
5.33M
        lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
436
5.33M
        lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
437
438
5.33M
        if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
439
674k
        {
440
674k
            lambda_modifier = ps_frm_lamda->lambda_modifier *
441
674k
                              CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
442
674k
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
443
674k
                                 CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
444
674k
        }
445
4.66M
        else
446
4.66M
        {
447
4.66M
            lambda_modifier = ps_frm_lamda->lambda_modifier;
448
4.66M
            lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
449
4.66M
        }
450
5.33M
        if(ps_ctxt->i4_use_const_lamda_modifier)
451
0
        {
452
0
            if(ISLICE == ps_ctxt->i1_slice_type)
453
0
            {
454
0
                lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
455
0
                lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
456
0
            }
457
0
            else
458
0
            {
459
0
                lambda_modifier = CONST_LAMDA_MOD_VAL;
460
0
                lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
461
0
            }
462
0
        }
463
5.33M
        switch(i4_lambda_type)
464
5.33M
        {
465
0
        case 0:
466
0
        {
467
0
            i4_qp_bdoffset = 0;
468
469
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
470
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
471
472
0
            lambda *= lambda_modifier;
473
0
            lambda_uv *= lambda_uv_modifier;
474
475
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
476
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
477
478
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
479
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
480
481
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
482
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
483
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
484
0
            {
485
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
486
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
487
0
            }
488
0
            else
489
0
            {
490
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
491
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
492
0
            }
493
494
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
495
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
496
497
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
498
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
499
500
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
501
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
502
503
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
504
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
505
506
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
507
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
508
509
0
            break;
510
0
        }
511
0
        case 1:
512
0
        {
513
0
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
514
0
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
515
516
0
            lambda *= lambda_modifier;
517
0
            lambda_uv *= lambda_uv_modifier;
518
519
0
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
520
0
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
521
522
0
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
523
0
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
524
525
0
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
526
0
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
527
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
528
0
            {
529
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
530
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
531
0
            }
532
0
            else
533
0
            {
534
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
535
0
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
536
0
            }
537
0
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
538
0
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
539
540
0
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
541
0
                ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
542
543
0
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
544
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
545
546
0
            ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
547
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
548
549
0
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
550
0
                ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
551
552
0
            break;
553
0
        }
554
5.33M
        case 2:
555
5.33M
        {
556
5.33M
            lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
557
5.33M
            lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
558
559
5.33M
            lambda *= lambda_modifier;
560
5.33M
            lambda_uv *= lambda_uv_modifier;
561
562
5.33M
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
563
5.33M
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
564
565
5.33M
            ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
566
5.33M
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
567
568
5.33M
            ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
569
5.33M
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
570
571
5.33M
            if(ps_ctxt->i4_use_const_lamda_modifier)
572
0
            {
573
0
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
574
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
575
0
            }
576
5.33M
            else
577
5.33M
            {
578
5.33M
                ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
579
5.33M
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
580
5.33M
            }
581
5.33M
            ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
582
5.33M
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
583
584
            /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
585
5.33M
            lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
586
5.33M
            lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
587
588
5.33M
            lambda *= lambda_modifier;
589
5.33M
            lambda_uv *= lambda_uv_modifier;
590
591
5.33M
            ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
592
5.33M
                (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
593
594
5.33M
            ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
595
5.33M
                (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
596
597
5.33M
            ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
598
5.33M
                (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
599
5.33M
            if(ps_ctxt->i4_use_const_lamda_modifier)
600
0
            {
601
0
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
602
0
                    (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
603
0
            }
604
5.33M
            else
605
5.33M
            {
606
5.33M
                ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
607
5.33M
                    (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
608
5.33M
            }
609
610
5.33M
            ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
611
5.33M
                (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
612
613
5.33M
            break;
614
0
        }
615
0
        default:
616
0
        {
617
            /* Intended to be a barren wasteland! */
618
0
            ASSERT(0);
619
0
        }
620
5.33M
        }
621
5.33M
    }
622
104k
}
623
624
/*!
625
******************************************************************************
626
* \if Function name : ihevce_get_cl_cu_lambda_prms \endif
627
*
628
* \brief
629
*    Function whihc calculates the Lambda params for current picture
630
*
631
* \param[in] ps_enc_ctxt : encoder ctxt pointer
632
* \param[in] ps_cur_pic_ctxt : current pic ctxt
633
* \param[in] i4_cur_frame_qp : current pic QP
634
* \param[in] first_field : is first field flag
635
* \param[in] i4_temporal_lyr_id : Current picture layer id
636
*
637
* \return
638
*    None
639
*
640
* \author
641
*  Ittiam
642
*
643
*****************************************************************************
644
*/
645
void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
646
8.22M
{
647
8.22M
    WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
648
8.22M
                           ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
649
8.22M
                           : gai1_ihevc_chroma_qp_scale
650
8.22M
                                 [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
651
652
    /* closed loop ssd lambda is same as final lambda */
653
8.22M
    ps_ctxt->i8_cl_ssd_lambda_qf =
654
8.22M
        ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
655
8.22M
    ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
656
8.22M
        ps_ctxt
657
8.22M
            ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
658
8.22M
    ps_ctxt->u4_chroma_cost_weighing_factor =
659
8.22M
        ps_ctxt->au4_chroma_cost_weighing_factor_array
660
8.22M
            [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
661
    /* --- Initialized the lambda for SATD computations --- */
662
    /* --- 0.95 is the multiplication factor as per HM --- */
663
    /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
664
8.22M
    ps_ctxt->i4_satd_lamda =
665
8.22M
        ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
666
8.22M
    ps_ctxt->i4_sad_lamda =
667
8.22M
        ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
668
8.22M
}
669
670
/*!
671
******************************************************************************
672
* \if Function name : ihevce_update_pred_qp \endif
673
*
674
* \brief
675
*    Computes pred qp for the given CU
676
*
677
* \param[in]
678
*
679
* \return
680
*
681
*
682
* \author
683
*  Ittiam
684
*
685
*****************************************************************************
686
*/
687
void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
688
2.48M
{
689
2.48M
    WORD32 i4_pred_qp = 0x7FFFFFFF;
690
2.48M
    WORD32 i4_top, i4_left;
691
2.48M
    if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
692
299k
    {
693
299k
        i4_pred_qp = ps_ctxt->i4_prev_QP;
694
299k
    }
695
2.18M
    else
696
2.18M
    {
697
2.18M
        if(cu_pos_y == 0) /*CTB boundary*/
698
486k
        {
699
486k
            i4_top = ps_ctxt->i4_prev_QP;
700
486k
        }
701
1.69M
        else /*within CTB*/
702
1.69M
        {
703
1.69M
            i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
704
1.69M
        }
705
2.18M
        if(cu_pos_x == 0) /*CTB boundary*/
706
495k
        {
707
495k
            i4_left = ps_ctxt->i4_prev_QP;
708
495k
        }
709
1.68M
        else /*within CTB*/
710
1.68M
        {
711
1.68M
            i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
712
1.68M
        }
713
2.18M
        i4_pred_qp = (i4_left + i4_top + 1) >> 1;
714
2.18M
    }
715
2.48M
    ps_ctxt->i4_pred_qp = i4_pred_qp;
716
2.48M
    return;
717
2.48M
}
718
/*!
719
******************************************************************************
720
* \if Function name : ihevce_compute_cu_level_QP \endif
721
*
722
* \brief
723
*    Computes cu level QP with Traqo,Spatial Mod and In-frame RC
724
*
725
* \param[in]
726
*
727
* \return
728
*
729
*
730
* \author
731
*  Ittiam
732
*
733
*****************************************************************************
734
*/
735
void ihevce_compute_cu_level_QP(
736
    ihevce_enc_loop_ctxt_t *ps_ctxt,
737
    WORD32 i4_activity_for_qp,
738
    WORD32 i4_activity_for_lamda,
739
    WORD32 i4_reduce_qp)
740
7.19M
{
741
    /*modify quant related param in ctxt based on current cu qp*/
742
7.19M
    WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
743
7.19M
    WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
744
745
7.19M
    WORD32 i4_max_qp_allowed;
746
7.19M
    WORD32 i4_min_qp_allowed;
747
7.19M
    WORD32 i4_pred_qp;
748
749
7.19M
    i4_pred_qp = ps_ctxt->i4_pred_qp;
750
751
7.19M
    if(ps_ctxt->i4_sub_pic_level_rc)
752
0
    {
753
0
        i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
754
0
        i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
755
0
    }
756
7.19M
    else
757
7.19M
    {
758
7.19M
        i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
759
7.19M
        i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
760
7.19M
    }
761
7.19M
    if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
762
0
        return;
763
764
#if LAMDA_BASED_ON_QUANT
765
    i4_activity_for_lamda = i4_activity_for_qp;
766
#endif
767
768
7.19M
    if(i4_activity_for_qp != -1)
769
7.19M
    {
770
7.19M
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
771
7.19M
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
772
7.19M
        if(ps_ctxt->i4_qp_mod)
773
7.19M
        {
774
            /*Recompute the Qp as per enc thread's frame level Qp*/
775
7.19M
            ASSERT(i4_activity_for_qp > 0);
776
7.19M
            cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
777
7.19M
                    QP_LEVEL_MOD_ACT_FACTOR;
778
7.19M
        }
779
780
        // To avoid access of uninitialised Qscale to qp conversion table
781
7.19M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
782
453k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
783
6.74M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
784
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
785
786
7.19M
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
787
788
7.19M
        if((1 == i4_reduce_qp) && (cu_qp > 1))
789
0
            cu_qp--;
790
791
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
792
7.19M
        if(cu_qp > i4_max_qp_allowed)
793
0
            cu_qp = i4_max_qp_allowed;
794
7.19M
        else if(cu_qp < i4_min_qp_allowed)
795
0
            cu_qp = i4_min_qp_allowed;
796
797
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
798
7.19M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
799
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
800
7.19M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
801
460k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
802
803
        /*cu qp must be populated in cu_analyse_t struct*/
804
7.19M
        ps_ctxt->i4_cu_qp = cu_qp;
805
        /*recompute quant related param at every cu level*/
806
7.19M
        ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
807
7.19M
    }
808
809
    /*Decoupling qp and lamda calculation */
810
7.19M
    if(i4_activity_for_lamda != -1)
811
7.19M
    {
812
7.19M
        cu_qp = (ps_ctxt->ps_rc_quant_ctxt
813
7.19M
                     ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
814
815
7.19M
        if(ps_ctxt->i4_qp_mod)
816
7.19M
        {
817
7.19M
#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
818
            /*Recompute the Qp as per enc thread's frame level Qp*/
819
7.19M
            ASSERT(i4_activity_for_lamda > 0);
820
7.19M
            cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
821
7.19M
                    QP_LEVEL_MOD_ACT_FACTOR;
822
7.19M
#endif
823
7.19M
        }
824
7.19M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
825
223k
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
826
6.97M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
827
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
828
829
7.19M
        cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
830
831
        /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
832
7.19M
        if(cu_qp > i4_max_qp_allowed)
833
0
            cu_qp = i4_max_qp_allowed;
834
7.19M
        else if(cu_qp < i4_min_qp_allowed)
835
0
            cu_qp = i4_min_qp_allowed;
836
837
        /* CLIP to maintain Qp between user configured and min and max Qp values*/
838
7.19M
        if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
839
0
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
840
7.19M
        else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
841
1.04M
            cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
842
        /* get frame level lambda params */
843
7.19M
        ihevce_get_cl_cu_lambda_prms(
844
7.19M
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
845
7.19M
    }
846
7.19M
}
847
848
void ihevce_update_cu_level_qp_lamda(
849
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra)
850
7.19M
{
851
7.19M
    WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
852
853
7.19M
    if(ps_cu_analyse->u1_cu_size == 64)
854
148k
    {
855
148k
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
856
148k
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
857
148k
        i4_act_counter_lamda = 3;
858
148k
    }
859
7.05M
    else if(ps_cu_analyse->u1_cu_size == 32)
860
1.17M
    {
861
1.17M
        ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
862
1.17M
        i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
863
1.17M
        i4_act_counter_lamda = 0;
864
1.17M
    }
865
5.87M
    else if(ps_cu_analyse->u1_cu_size == 16)
866
3.11M
    {
867
3.11M
        ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
868
3.11M
        i4_act_counter = (trans_size == 8) || (trans_size == 4);
869
3.11M
        i4_act_counter_lamda = 0;
870
3.11M
    }
871
2.76M
    else if(ps_cu_analyse->u1_cu_size == 8)
872
2.76M
    {
873
2.76M
        ASSERT((trans_size == 8) || (trans_size == 4));
874
2.76M
        i4_act_counter = 1;
875
2.76M
        i4_act_counter_lamda = 0;
876
2.76M
    }
877
0
    else
878
0
    {
879
0
        ASSERT(0);
880
0
    }
881
882
7.19M
    if(ps_ctxt->i4_use_ctb_level_lamda)
883
0
    {
884
0
        ihevce_compute_cu_level_QP(
885
0
            ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0);
886
0
    }
887
7.19M
    else
888
7.19M
    {
889
7.19M
        ihevce_compute_cu_level_QP(
890
7.19M
            ps_ctxt,
891
7.19M
            ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra],
892
7.19M
            ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra],
893
7.19M
            0);
894
7.19M
    }
895
896
7.19M
    ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
897
7.19M
}
898
899
/**
900
*******************************************************************************
901
* \if Function name : ihevce_scan_coeffs \endif
902
*
903
* @brief * Computes the coeff buffer for a coded TU for entropy coding
904
*
905
* @par   Description
906
* Computes the coeff buffer for a coded TU for entropy coding
907
*
908
* \param[in] pi2_quan_coeffs Quantized coefficient context
909
*
910
* \param[in] scan_idx Scan index specifying the scan order
911
*
912
* \param[in] trans_size Transform unit size
913
*
914
* \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
915
*
916
* \param[in] pu1_csbf_buf csb flag buffer
917
*
918
* @returns num_bytes
919
* Number of bytes written to pu1_out_data
920
*
921
* @remarks
922
*
923
* \author
924
*  Ittiam
925
*
926
*******************************************************************************
927
*/
928
929
WORD32 ihevce_scan_coeffs(
930
    WORD16 *pi2_quant_coeffs,
931
    WORD32 *pi4_subBlock2csbfId_map,
932
    WORD32 scan_idx,
933
    WORD32 trans_size,
934
    UWORD8 *pu1_out_data,
935
    UWORD8 *pu1_csbf_buf,
936
    WORD32 i4_csbf_stride)
937
20.9M
{
938
20.9M
    WORD32 i, trans_unit_idx, num_gt1_flag;
939
20.9M
    UWORD16 u2_csbf0flags;
940
20.9M
    WORD32 num_bytes = 0;
941
20.9M
    UWORD8 *pu1_trans_table;
942
20.9M
    UWORD8 *pu1_csb_table;
943
20.9M
    WORD32 shift_value, mask_value;
944
20.9M
    UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
945
20.9M
    UWORD16 u2_sign_flags;
946
20.9M
    UWORD16 u2_abs_coeff_remaining[16];
947
20.9M
    WORD32 blk_row, blk_col;
948
949
20.9M
    UWORD8 *pu1_out_data_header;
950
20.9M
    UWORD16 *pu2_out_data_coeff;
951
952
20.9M
    WORD32 x_pos, y_pos;
953
20.9M
    WORD32 quant_coeff;
954
955
20.9M
    WORD32 num_gt0_flag;
956
20.9M
    (void)i4_csbf_stride;
957
20.9M
    pu1_out_data_header = pu1_out_data;
958
    /* Need only last 3 bits, rest are reserved for debugging and making */
959
    /* WORD alignment */
960
20.9M
    u2_csbf0flags = 0xBAD0;
961
962
    /* Select proper order for your transform unit and csb based on scan_idx*/
963
    /* and the trans_size */
964
965
    /* scan order inside a csb */
966
20.9M
    pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
967
    /* GETRANGE will give the log_2 of trans_size to shift_value */
968
20.9M
    GETRANGE(shift_value, trans_size);
969
20.9M
    shift_value = shift_value - 3; /* for finding. row no. from scan index */
970
20.9M
    mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
971
20.9M
    switch(trans_size)
972
20.9M
    {
973
406k
    case 32:
974
406k
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
975
406k
        break;
976
1.50M
    case 16:
977
1.50M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
978
1.50M
        break;
979
3.85M
    case 8:
980
3.85M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
981
3.85M
        break;
982
15.2M
    case 4:
983
15.2M
        pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
984
15.2M
        break;
985
0
    default:
986
0
        DBG_PRINTF("Invalid Trans Size\n");
987
0
        return -1;
988
0
        break;
989
20.9M
    }
990
991
    /*go through each csb in the scan order for first non-zero coded sub-block*/
992
44.1M
    for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
993
44.1M
    {
994
        /* check for the first csb flag in our scan order */
995
44.1M
        if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
996
20.9M
        {
997
20.9M
            UWORD8 u1_last_x, u1_last_y;
998
            /* row of csb */
999
20.9M
            blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
1000
            /* col of csb */
1001
20.9M
            blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
1002
1003
            /*check for the 1st non-0 values inside the csb in our scan order*/
1004
83.6M
            for(i = 15; i >= 0; i--)
1005
83.6M
            {
1006
83.6M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1007
83.6M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1008
1009
83.6M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1010
1011
83.6M
                if(quant_coeff != 0)
1012
20.9M
                    break;
1013
83.6M
            }
1014
1015
20.9M
            ASSERT(i >= 0);
1016
1017
20.9M
            u1_last_x = x_pos;
1018
20.9M
            u1_last_y = y_pos;
1019
1020
            /* storing last_x and last_y */
1021
20.9M
            *pu1_out_data_header = u1_last_x;
1022
20.9M
            pu1_out_data_header++;
1023
20.9M
            num_bytes++;
1024
20.9M
            *pu1_out_data_header = u1_last_y;
1025
20.9M
            pu1_out_data_header++;
1026
20.9M
            num_bytes++;
1027
1028
            /* storing the scan order */
1029
20.9M
            *pu1_out_data_header = scan_idx;
1030
20.9M
            pu1_out_data_header++;
1031
20.9M
            num_bytes++;
1032
            /* storing last_sub_block pos. in scan order count */
1033
20.9M
            *pu1_out_data_header = trans_unit_idx;
1034
20.9M
            pu1_out_data_header++;
1035
20.9M
            num_bytes++;
1036
1037
            /*stored the first 4 bytes, now all are word16. So word16 pointer*/
1038
20.9M
            pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
1039
1040
            /* u2_csbf0flags word */
1041
20.9M
            u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
1042
            /* storing u2_csbf0flags word */
1043
20.9M
            *pu2_out_data_coeff = u2_csbf0flags;
1044
20.9M
            pu2_out_data_coeff++;
1045
20.9M
            num_bytes += 2;
1046
1047
20.9M
            num_gt0_flag = 1;
1048
20.9M
            num_gt1_flag = 0;
1049
20.9M
            u2_sign_flags = 0;
1050
1051
            /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1052
20.9M
            u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
1053
20.9M
            if(abs(quant_coeff) > 1)
1054
10.9M
            {
1055
                /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1056
10.9M
                u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
1057
                /* update u2_abs_coeff_remaining */
1058
10.9M
                u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1059
1060
10.9M
                num_gt1_flag++;
1061
10.9M
            }
1062
1063
20.9M
            if(quant_coeff < 0)
1064
10.8M
            {
1065
                /* set the i th bit of u2_sign_flags */
1066
10.8M
                u2_sign_flags = u2_sign_flags | (1 << i);
1067
10.8M
            }
1068
1069
            /* Test remaining elements in our scan order */
1070
            /* Can optimize further by CLZ macro */
1071
273M
            for(i = i - 1; i >= 0; i--)
1072
252M
            {
1073
252M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1074
252M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1075
1076
252M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1077
1078
252M
                if(quant_coeff != 0)
1079
203M
                {
1080
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1081
203M
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1082
1083
203M
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1084
166M
                    {
1085
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1086
166M
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1087
1088
                        /* update u2_abs_coeff_remaining */
1089
166M
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1090
1091
166M
                        num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
1092
166M
                    }
1093
1094
203M
                    if(quant_coeff < 0)
1095
102M
                    {
1096
                        /* set the i th bit of u2_sign_flags */
1097
102M
                        u2_sign_flags |= (1 << i);
1098
102M
                    }
1099
1100
203M
                    num_gt0_flag++;
1101
203M
                }
1102
252M
            }
1103
1104
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1105
20.9M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1106
20.9M
            pu2_out_data_coeff++;
1107
20.9M
            num_bytes += 2;
1108
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1109
20.9M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1110
20.9M
            pu2_out_data_coeff++;
1111
20.9M
            num_bytes += 2;
1112
            /* storing u2_sign_flags 2 bytes */
1113
20.9M
            *pu2_out_data_coeff = u2_sign_flags;
1114
20.9M
            pu2_out_data_coeff++;
1115
20.9M
            num_bytes += 2;
1116
1117
            /* Store the u2_abs_coeff_remaining[] */
1118
198M
            for(i = 0; i < num_gt1_flag; i++)
1119
177M
            {
1120
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1121
177M
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1122
177M
                pu2_out_data_coeff++;
1123
177M
                num_bytes += 2;
1124
177M
            }
1125
1126
20.9M
            break; /*We just need this loop for finding 1st non-zero csb only*/
1127
20.9M
        }
1128
44.1M
    }
1129
1130
    /* go through remaining csb in the scan order */
1131
57.6M
    for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
1132
36.6M
    {
1133
36.6M
        blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
1134
36.6M
        blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
1135
1136
        /* u2_csbf0flags word */
1137
36.6M
        u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
1138
36.6M
                        (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
1139
1140
        /********************************************************************/
1141
        /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
1142
        /* block0, instead sig coeff map is directly signalled. This is     */
1143
        /* taken care by forcing csbf for block0 to be 1 even if it is 0    */
1144
        /********************************************************************/
1145
36.6M
        if(0 == trans_unit_idx)
1146
4.89M
        {
1147
4.89M
            u2_csbf0flags |= 1;
1148
4.89M
        }
1149
1150
36.6M
        if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
1151
30.6M
        {
1152
30.6M
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
1153
24.5M
            {
1154
                /* set the 2nd bit of u2_csbf0flags for right csbf */
1155
24.5M
                u2_csbf0flags = u2_csbf0flags | (1 << 1);
1156
24.5M
            }
1157
30.6M
        }
1158
36.6M
        if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
1159
30.0M
        {
1160
30.0M
            if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
1161
25.0M
            {
1162
                /* set the 3rd bit of u2_csbf0flags  for bottom csbf */
1163
25.0M
                u2_csbf0flags = u2_csbf0flags | (1 << 2);
1164
25.0M
            }
1165
30.0M
        }
1166
1167
        /* storing u2_csbf0flags word */
1168
36.6M
        *pu2_out_data_coeff = u2_csbf0flags;
1169
36.6M
        pu2_out_data_coeff++;
1170
36.6M
        num_bytes += 2;
1171
1172
        /* check for the csb flag in our scan order */
1173
36.6M
        if(u2_csbf0flags & 0x1)
1174
32.3M
        {
1175
32.3M
            u2_sig_coeff_abs_gt0_flags = 0;
1176
32.3M
            u2_sig_coeff_abs_gt1_flags = 0;
1177
32.3M
            u2_sign_flags = 0;
1178
1179
32.3M
            num_gt0_flag = 0;
1180
32.3M
            num_gt1_flag = 0;
1181
            /* check for the non-0 values inside the csb in our scan order */
1182
            /* Can optimize further by CLZ macro */
1183
549M
            for(i = 15; i >= 0; i--)
1184
517M
            {
1185
517M
                x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
1186
517M
                y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
1187
1188
517M
                quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
1189
1190
517M
                if(quant_coeff != 0)
1191
398M
                {
1192
                    /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
1193
398M
                    u2_sig_coeff_abs_gt0_flags |= (1 << i);
1194
1195
398M
                    if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
1196
329M
                    {
1197
                        /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
1198
329M
                        u2_sig_coeff_abs_gt1_flags |= (1 << i);
1199
1200
                        /* update u2_abs_coeff_remaining */
1201
329M
                        u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
1202
1203
329M
                        num_gt1_flag++;
1204
329M
                    }
1205
1206
398M
                    if(quant_coeff < 0)
1207
203M
                    {
1208
                        /* set the i th bit of u2_sign_flags */
1209
203M
                        u2_sign_flags = u2_sign_flags | (1 << i);
1210
203M
                    }
1211
1212
398M
                    num_gt0_flag++;
1213
398M
                }
1214
517M
            }
1215
1216
            /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
1217
32.3M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
1218
32.3M
            pu2_out_data_coeff++;
1219
32.3M
            num_bytes += 2;
1220
1221
            /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
1222
32.3M
            *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
1223
32.3M
            pu2_out_data_coeff++;
1224
32.3M
            num_bytes += 2;
1225
1226
            /* storing u2_sign_flags 2 bytes */
1227
32.3M
            *pu2_out_data_coeff = u2_sign_flags;
1228
32.3M
            pu2_out_data_coeff++;
1229
32.3M
            num_bytes += 2;
1230
1231
            /* Store the u2_abs_coeff_remaining[] */
1232
361M
            for(i = 0; i < num_gt1_flag; i++)
1233
329M
            {
1234
                /* storing u2_abs_coeff_remaining[i] 2 bytes */
1235
329M
                *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
1236
329M
                pu2_out_data_coeff++;
1237
329M
                num_bytes += 2;
1238
329M
            }
1239
32.3M
        }
1240
36.6M
    }
1241
1242
20.9M
    return num_bytes; /* Return the number of bytes written to out_data */
1243
20.9M
}
1244
1245
/**
1246
*******************************************************************************
1247
* \if Function name : ihevce_populate_intra_pred_mode \endif
1248
*
1249
* \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
1250
* b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
1251
*
1252
* \par   Description
1253
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1254
* for a CU
1255
*
1256
* \param[in] top_intra_mode Top intra mode
1257
* \param[in] left_intra_mode Left intra mode
1258
* \param[in] available_top Top availability flag
1259
* \param[in] available_left Left availability flag
1260
* \param[in] cu_pos_y CU 'y' position
1261
* \param[in] ps_cand_mode_list pointer to populate candidate list
1262
*
1263
* \returns none
1264
*
1265
* \author
1266
*  Ittiam
1267
*
1268
*******************************************************************************
1269
*/
1270
1271
void ihevce_populate_intra_pred_mode(
1272
    WORD32 top_intra_mode,
1273
    WORD32 left_intra_mode,
1274
    WORD32 available_top,
1275
    WORD32 available_left,
1276
    WORD32 cu_pos_y,
1277
    WORD32 *ps_cand_mode_list)
1278
1.73M
{
1279
    /* local variables */
1280
1.73M
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1281
1282
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1283
    /* N = top */
1284
1.73M
    if(0 == available_top)
1285
193k
    {
1286
193k
        cand_intra_pred_mode_top = INTRA_DC;
1287
193k
    }
1288
    /* for neighbour != INTRA, setting DC is done outside */
1289
1.54M
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1290
76.0k
    {
1291
76.0k
        cand_intra_pred_mode_top = INTRA_DC;
1292
76.0k
    }
1293
1.46M
    else
1294
1.46M
    {
1295
1.46M
        cand_intra_pred_mode_top = top_intra_mode;
1296
1.46M
    }
1297
1298
    /* N = left */
1299
1.73M
    if(0 == available_left)
1300
158k
    {
1301
158k
        cand_intra_pred_mode_left = INTRA_DC;
1302
158k
    }
1303
    /* for neighbour != INTRA, setting DC is done outside */
1304
1.57M
    else
1305
1.57M
    {
1306
1.57M
        cand_intra_pred_mode_left = left_intra_mode;
1307
1.57M
    }
1308
1309
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1310
1.73M
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1311
577k
    {
1312
577k
        if(cand_intra_pred_mode_left < 2)
1313
391k
        {
1314
391k
            ps_cand_mode_list[0] = INTRA_PLANAR;
1315
391k
            ps_cand_mode_list[1] = INTRA_DC;
1316
391k
            ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1317
391k
        }
1318
185k
        else
1319
185k
        {
1320
185k
            ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1321
185k
            ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1322
185k
            ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1323
185k
        }
1324
577k
    }
1325
1.15M
    else
1326
1.15M
    {
1327
1.15M
        ps_cand_mode_list[0] = cand_intra_pred_mode_left;
1328
1.15M
        ps_cand_mode_list[1] = cand_intra_pred_mode_top;
1329
1330
1.15M
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1331
900k
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1332
659k
        {
1333
659k
            ps_cand_mode_list[2] = INTRA_PLANAR;
1334
659k
        }
1335
498k
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1336
194k
        {
1337
194k
            ps_cand_mode_list[2] = INTRA_DC;
1338
194k
        }
1339
303k
        else
1340
303k
        {
1341
303k
            ps_cand_mode_list[2] = INTRA_ANGULAR(26);
1342
303k
        }
1343
1.15M
    }
1344
1.73M
}
1345
/**
1346
*******************************************************************************
1347
* \if Function name : ihevce_intra_pred_mode_signaling \endif
1348
*
1349
* \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
1350
* b5_rem_intra_pred_mode for a CU
1351
*
1352
* \par   Description
1353
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
1354
* for a CU
1355
*
1356
* \param[in] ps_nbr_top Top neighbour context
1357
* \param[in] ps_nbr_left Left neighbour context
1358
* \param[in] available_top Top availability flag
1359
* \param[in] available_left Left availability flag
1360
* \param[in] cu_pos_y CU 'y' position
1361
* \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
1362
* \param[inout] ps_intra_pred_mode_current
1363
* Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
1364
* b5_rem_intra_pred_mode
1365
*
1366
* \returns none
1367
*
1368
* \author
1369
*  Ittiam
1370
*
1371
*******************************************************************************
1372
*/
1373
1374
void ihevce_intra_pred_mode_signaling(
1375
    WORD32 top_intra_mode,
1376
    WORD32 left_intra_mode,
1377
    WORD32 available_top,
1378
    WORD32 available_left,
1379
    WORD32 cu_pos_y,
1380
    WORD32 luma_intra_pred_mode_current,
1381
    intra_prev_rem_flags_t *ps_intra_pred_mode_current)
1382
25.6M
{
1383
    /* local variables */
1384
25.6M
    WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
1385
25.6M
    WORD32 cand_mode_list[3];
1386
1387
25.6M
    ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1388
25.6M
    ps_intra_pred_mode_current->b2_mpm_idx = 0;  // for safety purpose
1389
25.6M
    ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
1390
1391
    /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
1392
    /* N = top */
1393
25.6M
    if(0 == available_top)
1394
3.09M
    {
1395
3.09M
        cand_intra_pred_mode_top = INTRA_DC;
1396
3.09M
    }
1397
    /* for neighbour != INTRA, setting DC is done outside */
1398
22.5M
    else if(0 == cu_pos_y) /* It's on the CTB boundary */
1399
1.66M
    {
1400
1.66M
        cand_intra_pred_mode_top = INTRA_DC;
1401
1.66M
    }
1402
20.9M
    else
1403
20.9M
    {
1404
20.9M
        cand_intra_pred_mode_top = top_intra_mode;
1405
20.9M
    }
1406
1407
    /* N = left */
1408
25.6M
    if(0 == available_left)
1409
2.60M
    {
1410
2.60M
        cand_intra_pred_mode_left = INTRA_DC;
1411
2.60M
    }
1412
    /* for neighbour != INTRA, setting DC is done outside */
1413
23.0M
    else
1414
23.0M
    {
1415
23.0M
        cand_intra_pred_mode_left = left_intra_mode;
1416
23.0M
    }
1417
1418
    /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
1419
25.6M
    if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
1420
11.5M
    {
1421
11.5M
        if(cand_intra_pred_mode_left < 2)
1422
8.69M
        {
1423
8.69M
            cand_mode_list[0] = INTRA_PLANAR;
1424
8.69M
            cand_mode_list[1] = INTRA_DC;
1425
8.69M
            cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
1426
8.69M
        }
1427
2.81M
        else
1428
2.81M
        {
1429
2.81M
            cand_mode_list[0] = cand_intra_pred_mode_left;
1430
2.81M
            cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
1431
2.81M
            cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
1432
2.81M
        }
1433
11.5M
    }
1434
14.1M
    else
1435
14.1M
    {
1436
14.1M
        cand_mode_list[0] = cand_intra_pred_mode_left;
1437
14.1M
        cand_mode_list[1] = cand_intra_pred_mode_top;
1438
1439
14.1M
        if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
1440
10.0M
           (cand_intra_pred_mode_top != INTRA_PLANAR))
1441
6.82M
        {
1442
6.82M
            cand_mode_list[2] = INTRA_PLANAR;
1443
6.82M
        }
1444
7.33M
        else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
1445
2.50M
        {
1446
2.50M
            cand_mode_list[2] = INTRA_DC;
1447
2.50M
        }
1448
4.83M
        else
1449
4.83M
        {
1450
4.83M
            cand_mode_list[2] = INTRA_ANGULAR(26);
1451
4.83M
        }
1452
14.1M
    }
1453
1454
    /* Signal Generation */
1455
1456
    /* Flag & mpm_index generation */
1457
25.6M
    if(cand_mode_list[0] == luma_intra_pred_mode_current)
1458
8.20M
    {
1459
8.20M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1460
8.20M
        ps_intra_pred_mode_current->b2_mpm_idx = 0;
1461
8.20M
    }
1462
17.4M
    else if(cand_mode_list[1] == luma_intra_pred_mode_current)
1463
6.74M
    {
1464
6.74M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1465
6.74M
        ps_intra_pred_mode_current->b2_mpm_idx = 1;
1466
6.74M
    }
1467
10.7M
    else if(cand_mode_list[2] == luma_intra_pred_mode_current)
1468
3.57M
    {
1469
3.57M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
1470
3.57M
        ps_intra_pred_mode_current->b2_mpm_idx = 2;
1471
3.57M
    }
1472
    /* Flag & b5_rem_intra_pred_mode generation */
1473
7.15M
    else
1474
7.15M
    {
1475
7.15M
        WORD32 rem_mode;
1476
1477
7.15M
        ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
1478
1479
        /* sorting cand_mode_list */
1480
7.15M
        if(cand_mode_list[0] > cand_mode_list[1])
1481
3.23M
        {
1482
3.23M
            SWAP(cand_mode_list[0], cand_mode_list[1]);
1483
3.23M
        }
1484
7.15M
        if(cand_mode_list[0] > cand_mode_list[2])
1485
2.94M
        {
1486
2.94M
            SWAP(cand_mode_list[0], cand_mode_list[2]);
1487
2.94M
        }
1488
7.15M
        if(cand_mode_list[1] > cand_mode_list[2])
1489
3.84M
        {
1490
3.84M
            SWAP(cand_mode_list[1], cand_mode_list[2]);
1491
3.84M
        }
1492
1493
7.15M
        rem_mode = luma_intra_pred_mode_current;
1494
1495
7.15M
        if((rem_mode) >= cand_mode_list[2])
1496
2.39M
        {
1497
2.39M
            (rem_mode)--;
1498
2.39M
        }
1499
7.15M
        if((rem_mode) >= cand_mode_list[1])
1500
5.93M
        {
1501
5.93M
            (rem_mode)--;
1502
5.93M
        }
1503
7.15M
        if((rem_mode) >= cand_mode_list[0])
1504
6.58M
        {
1505
6.58M
            (rem_mode)--;
1506
6.58M
        }
1507
7.15M
        ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
1508
7.15M
    }
1509
25.6M
}
1510
1511
void ihevce_quant_rounding_factor_gen(
1512
    WORD32 i4_trans_size,
1513
    WORD32 is_luma,
1514
    rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
1515
    WORD32 *pi4_quant_round_0_1,
1516
    WORD32 *pi4_quant_round_1_2,
1517
    double i4_lamda_modifier,
1518
    UWORD8 i4_is_tu_level_quant_rounding)
1519
7.70M
{
1520
    //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
1521
7.70M
    UWORD8 *pu1_ctxt_model;
1522
7.70M
    WORD32 scan_pos;
1523
7.70M
    WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag    */
1524
7.70M
    WORD32 abs_gt1_base_ctxt;
1525
7.70M
    WORD32 log2_tr_size, i;
1526
7.70M
    UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
1527
7.70M
    UWORD16 u4_bits_estimated_r1_temp;
1528
7.70M
    WORD32 j = 0;
1529
7.70M
    WORD32 k = 0;
1530
7.70M
    WORD32 temp2;
1531
1532
7.70M
    double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
1533
7.70M
    LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
1534
    /* transform size to log2transform size */
1535
7.70M
    GETRANGE(log2_tr_size, i4_trans_size);
1536
7.70M
    log2_tr_size -= 1;
1537
1538
7.70M
    if(1 == i4_is_tu_level_quant_rounding)
1539
0
    {
1540
0
        entropy_context_t *ps_cur_tu_entropy;
1541
0
        cab_ctxt_t *ps_cabac;
1542
0
        WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
1543
0
        ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
1544
1545
0
        ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
1546
1547
0
        pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
1548
0
    }
1549
7.70M
    else
1550
7.70M
    {
1551
7.70M
        pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
1552
7.70M
    }
1553
    /*If transform size is 4x4, then only one sub-block*/
1554
7.70M
    if(is_luma)
1555
4.69M
    {
1556
4.69M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
1557
4.69M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
1558
1559
4.69M
        if(3 == log2_tr_size)
1560
1.68M
        {
1561
            /* 8x8 transform size */
1562
            /* Assuming diagnol scan idx for now */
1563
1.68M
            sig_coeff_base_ctxt += 9;
1564
1.68M
        }
1565
3.00M
        else if(3 < log2_tr_size)
1566
1.32M
        {
1567
            /* larger transform sizes */
1568
1.32M
            sig_coeff_base_ctxt += 21;
1569
1.32M
        }
1570
4.69M
    }
1571
3.00M
    else
1572
3.00M
    {
1573
        /* chroma context initializations */
1574
3.00M
        sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
1575
3.00M
        abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
1576
1577
3.00M
        if(3 == log2_tr_size)
1578
992k
        {
1579
            /* 8x8 transform size */
1580
992k
            sig_coeff_base_ctxt += 9;
1581
992k
        }
1582
2.01M
        else if(3 < log2_tr_size)
1583
330k
        {
1584
            /* larger transform sizes */
1585
330k
            sig_coeff_base_ctxt += 12;
1586
330k
        }
1587
3.00M
    }
1588
1589
    /*Transform size of 4x4 will have only a single CSB */
1590
    /* derive the context inc as per section 9.3.3.1.4 */
1591
1592
7.70M
    if(2 == log2_tr_size)
1593
3.37M
    {
1594
3.37M
        UWORD8 sig_ctxinc;
1595
3.37M
        WORD32 state_mps;
1596
3.37M
        WORD32 gt1_ctxt = 0;
1597
3.37M
        WORD32 ctxt_set = 0;
1598
3.37M
        WORD32 ctxt_idx = 0;
1599
1600
        /* context set based on luma subblock pos */
1601
1602
        /* Encodet the abs level gt1 bins */
1603
        /* Currently calculating trade off between mps(2) and mps(1)*/
1604
        /* The estimation has to be further done for mps(11) and mps(111)*/
1605
        /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
1606
        /* gt1_ctxt = 0 for the co-ef value to be 2 */
1607
1608
3.37M
        ctxt_set = gt1_ctxt = 0;
1609
3.37M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1610
1611
3.37M
        state_mps = pu1_ctxt_model[ctxt_idx];
1612
1613
3.37M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1614
1615
3.37M
        u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1616
1617
3.37M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
1618
57.3M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1619
53.9M
        {
1620
53.9M
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1621
53.9M
        }
1622
1623
57.3M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1624
53.9M
        {
1625
            //UWORD8 nbr_csbf = 1;
1626
            /* derive the x,y pos */
1627
53.9M
            UWORD8 y_pos_x_pos = scan_pos;  //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1628
1629
            /* 4x4 transform size increment uses lookup */
1630
53.9M
            sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
1631
1632
            /*Get the mps state based on ctxt modes */
1633
53.9M
            state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
1634
1635
            /* Bits taken to encode sig co-ef flag as 0 */
1636
53.9M
            u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1637
1638
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1639
            //
1640
53.9M
            u4_bits_estimated_r1 =
1641
53.9M
                (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1642
1643
            /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1644
53.9M
            u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1645
1646
53.9M
            QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1647
53.9M
            *(pi4_quant_round_0_1 + scan_pos) = temp2;
1648
53.9M
        }
1649
3.37M
    }
1650
4.33M
    else
1651
4.33M
    {
1652
4.33M
        UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
1653
4.33M
        WORD32 is_nbr_csb_state_mps;
1654
1655
4.33M
        WORD32 state_mps;
1656
4.33M
        WORD32 gt1_ctxt = 0;
1657
4.33M
        WORD32 ctxt_set = 0;
1658
4.33M
        WORD32 ctxt_idx;
1659
        /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
1660
        /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
1661
1662
        /*ctxt_set = 0 DC subblock, the previous state did not have 2
1663
        ctxt_set = 1 DC subblock, the previous state did have >= 2
1664
        ctxt_set = 2 AC subblock, the previous state did not have 2
1665
        ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1666
4.33M
        i = 1;
1667
4.33M
        ctxt_set = (i && is_luma) ? 2 : 0;
1668
1669
4.33M
        ctxt_set++;
1670
1671
        /*0th position indicates the probability of 2 */
1672
        /*1th position indicates the probability of 1 */
1673
        /*2th position indicates the probability of 11 */
1674
        /*3th position indicates the probability of 111 */
1675
1676
4.33M
        gt1_ctxt = 0;
1677
4.33M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1678
1679
4.33M
        state_mps = pu1_ctxt_model[ctxt_idx];
1680
1681
4.33M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1682
1683
4.33M
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1684
4.33M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1685
1686
852M
        for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
1687
848M
        {
1688
848M
            *(pi4_quant_round_1_2 + scan_pos) = temp2;
1689
848M
        }
1690
1691
4.33M
        i = 0;
1692
4.33M
        ctxt_set = (i && is_luma) ? 2 : 0;
1693
4.33M
        ctxt_set++;
1694
1695
        /*0th position indicates the probability of 2 */
1696
        /*1th position indicates the probability of 1 */
1697
        /*2th position indicates the probability of 11 */
1698
        /*3th position indicates the probability of 111 */
1699
1700
4.33M
        gt1_ctxt = 0;
1701
4.33M
        ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1702
1703
4.33M
        state_mps = pu1_ctxt_model[ctxt_idx];
1704
1705
4.33M
        u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
1706
1707
4.33M
        u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1708
4.33M
        QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
1709
1710
73.6M
        for(scan_pos = 0; scan_pos < 16; scan_pos++)
1711
69.3M
        {
1712
69.3M
            *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1713
69.3M
        }
1714
1715
4.33M
        {
1716
4.33M
            WORD32 ctxt_idx;
1717
1718
4.33M
            WORD32 nbr_csbf_0, nbr_csbf_1;
1719
4.33M
            WORD32 state_mps_0, state_mps_1;
1720
4.33M
            ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
1721
4.33M
            ctxt_idx += is_luma ? 0 : 2;
1722
1723
            /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
1724
            /* if neibhor not available, ctxt idx = 0*/
1725
4.33M
            nbr_csbf_0 = 0;
1726
4.33M
            ctxt_idx += nbr_csbf_0 ? 1 : 0;
1727
4.33M
            state_mps_0 = pu1_ctxt_model[ctxt_idx];
1728
1729
4.33M
            nbr_csbf_1 = 1;
1730
4.33M
            ctxt_idx += nbr_csbf_1 ? 1 : 0;
1731
4.33M
            state_mps_1 = pu1_ctxt_model[ctxt_idx];
1732
1733
4.33M
            is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
1734
4.33M
        }
1735
1736
4.33M
        if(1 == is_nbr_csb_state_mps)
1737
862k
        {
1738
13.0M
            for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
1739
12.2M
            {
1740
12.2M
                UWORD8 sig_ctxinc;
1741
12.2M
                WORD32 state_mps;
1742
12.2M
                WORD32 gt1_ctxt = 0;
1743
12.2M
                WORD32 ctxt_set = 0;
1744
1745
12.2M
                WORD32 ctxt_idx;
1746
1747
                /*Check if the cabac states had previous nbr available */
1748
1749
12.2M
                if(i == 0)
1750
862k
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
1751
11.3M
                else if(i < (i4_trans_size >> 2))
1752
1.94M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
1753
9.42M
                else if((i % (i4_trans_size >> 2)) == 0)
1754
1.94M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
1755
7.48M
                else
1756
7.48M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1757
1758
12.2M
                if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
1759
1.94M
                    k++;
1760
1761
12.2M
                j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
1762
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1763
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1764
                ctxt_set = 2 AC subblock, the previous state did not have 2
1765
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1766
1767
12.2M
                ctxt_set = (i && is_luma) ? 2 : 0;
1768
1769
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1770
12.2M
                gt1_ctxt = 0;
1771
12.2M
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1772
1773
12.2M
                state_mps = pu1_ctxt_model[ctxt_idx];
1774
1775
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1776
12.2M
                u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1777
1778
207M
                for(scan_pos = 0; scan_pos < 16; scan_pos++)
1779
195M
                {
1780
195M
                    UWORD8 y_pos_x_pos;
1781
1782
195M
                    if(scan_pos || i)
1783
194M
                    {
1784
194M
                        y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1785
                        /* ctxt for AC coeff depends on curpos and neigbour csbf */
1786
194M
                        sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1787
1788
                        /* based on luma subblock pos */
1789
194M
                        sig_ctxinc += (i && is_luma) ? 3 : 0;
1790
1791
194M
                        sig_ctxinc += sig_coeff_base_ctxt;
1792
194M
                    }
1793
862k
                    else
1794
862k
                    {
1795
                        /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1796
                        /* DC coeff has fixed context for luma and chroma */
1797
862k
                        sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1798
862k
                    }
1799
1800
                    /*Get the mps state based on ctxt modes */
1801
195M
                    state_mps = pu1_ctxt_model[sig_ctxinc];
1802
1803
                    /* Bits taken to encode sig co-ef flag as 0 */
1804
195M
                    u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1805
1806
195M
                    u4_bits_estimated_r1 =
1807
195M
                        (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1808
1809
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1810
195M
                    u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1811
195M
                    {
1812
195M
                        QUANT_ROUND_FACTOR(
1813
195M
                            temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1814
195M
                        *(pi4_quant_round_0_1 +
1815
195M
                          ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
1816
195M
                    }
1817
195M
                }
1818
12.2M
            }
1819
862k
        }
1820
3.46M
        else
1821
3.46M
        {
1822
            /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
1823
            Hence will write the same value to all sub block, and overwrite for the 1st one */
1824
3.46M
            i = 1;
1825
3.46M
            {
1826
3.46M
                UWORD8 sig_ctxinc;
1827
3.46M
                UWORD8 y_pos_x_pos;
1828
3.46M
                WORD32 quant_rounding_0_1;
1829
1830
3.46M
                pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
1831
1832
3.46M
                scan_pos = 0;
1833
3.46M
                y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1834
                /* ctxt for AC coeff depends on curpos and neigbour csbf */
1835
3.46M
                sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1836
1837
                /* based on luma subblock pos */
1838
3.46M
                sig_ctxinc += (is_luma) ? 3 : 0;
1839
1840
3.46M
                sig_ctxinc += sig_coeff_base_ctxt;
1841
1842
                /*Get the mps state based on ctxt modes */
1843
3.46M
                state_mps = pu1_ctxt_model[sig_ctxinc];
1844
1845
                /* Bits taken to encode sig co-ef flag as 0 */
1846
3.46M
                u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1847
1848
3.46M
                u4_bits_estimated_r1 =
1849
3.46M
                    (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1850
1851
                /*ctxt_set = 0 DC subblock, the previous state did not have 2
1852
                ctxt_set = 1 DC subblock, the previous state did have >= 2
1853
                ctxt_set = 2 AC subblock, the previous state did not have 2
1854
                ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1855
1856
3.46M
                ctxt_set = (i && is_luma) ? 2 : 0;
1857
1858
                /* gt1_ctxt = 1 for the co-ef value to be 1 */
1859
3.46M
                gt1_ctxt = 0;
1860
3.46M
                ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1861
1862
3.46M
                state_mps = pu1_ctxt_model[ctxt_idx];
1863
1864
                /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1865
3.46M
                u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1866
1867
3.46M
                QUANT_ROUND_FACTOR(
1868
3.46M
                    quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1869
1870
656M
                for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
1871
652M
                    scan_pos++)
1872
652M
                {
1873
652M
                    *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
1874
652M
                }
1875
3.46M
            }
1876
1877
            /*First Subblock*/
1878
3.46M
            i = 0;
1879
1880
3.46M
            {
1881
3.46M
                UWORD8 sig_ctxinc;
1882
3.46M
                WORD32 state_mps;
1883
3.46M
                WORD32 gt1_ctxt = 0;
1884
3.46M
                WORD32 ctxt_set = 0;
1885
1886
3.46M
                WORD32 ctxt_idx;
1887
1888
                /*Check if the cabac states had previous nbr available */
1889
1890
3.46M
                {
1891
3.46M
                    pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
1892
1893
                    /*ctxt_set = 0 DC subblock, the previous state did not have 2
1894
                    ctxt_set = 1 DC subblock, the previous state did have >= 2
1895
                    ctxt_set = 2 AC subblock, the previous state did not have 2
1896
                    ctxt_set = 3 AC subblock, the previous state did have >= 2*/
1897
3.46M
                    ctxt_set = (i && is_luma) ? 2 : 0;
1898
1899
                    /* gt1_ctxt = 1 for the co-ef value to be 1 */
1900
3.46M
                    gt1_ctxt = 0;
1901
3.46M
                    ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
1902
1903
3.46M
                    state_mps = pu1_ctxt_model[ctxt_idx];
1904
1905
                    /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1906
3.46M
                    u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1907
1908
58.9M
                    for(scan_pos = 0; scan_pos < 16; scan_pos++)
1909
55.5M
                    {
1910
55.5M
                        UWORD8 y_pos_x_pos;
1911
1912
55.5M
                        if(scan_pos)
1913
52.0M
                        {
1914
52.0M
                            y_pos_x_pos = scan_pos;  // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
1915
                            /* ctxt for AC coeff depends on curpos and neigbour csbf */
1916
52.0M
                            sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
1917
1918
                            /* based on luma subblock pos */
1919
52.0M
                            sig_ctxinc += (i && is_luma) ? 3 : 0;
1920
1921
52.0M
                            sig_ctxinc += sig_coeff_base_ctxt;
1922
52.0M
                        }
1923
3.46M
                        else
1924
3.46M
                        {
1925
                            /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
1926
                            /* DC coeff has fixed context for luma and chroma */
1927
3.46M
                            sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
1928
3.46M
                        }
1929
1930
                        /*Get the mps state based on ctxt modes */
1931
55.5M
                        state_mps = pu1_ctxt_model[sig_ctxinc];
1932
1933
                        /* Bits taken to encode sig co-ef flag as 0 */
1934
55.5M
                        u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
1935
1936
55.5M
                        u4_bits_estimated_r1 =
1937
55.5M
                            (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
1938
1939
                        /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
1940
55.5M
                        u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
1941
55.5M
                        {
1942
55.5M
                            QUANT_ROUND_FACTOR(
1943
55.5M
                                temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
1944
55.5M
                            *(pi4_quant_round_0_1 +
1945
55.5M
                              ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
1946
55.5M
                        }
1947
55.5M
                    }
1948
3.46M
                }
1949
3.46M
            }
1950
3.46M
        }
1951
4.33M
    }
1952
7.70M
    return;
1953
7.70M
}
1954
1955
/*!
1956
******************************************************************************
1957
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
1958
*
1959
* \brief
1960
*    Transform unit level (Luma) enc_loop function
1961
*
1962
* \param[in] ps_ctxt    enc_loop module ctxt pointer
1963
* \param[in] pu1_pred   pointer to predicted data buffer
1964
* \param[in] pred_strd  predicted buffer stride
1965
* \param[in] pu1_src    pointer to source data buffer
1966
* \param[in] src_strd   source buffer stride
1967
* \param[in] pi2_deq_data   pointer to store iq data
1968
* \param[in] deq_data_strd  iq data buffer stride
1969
* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
1970
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
1971
*                           block
1972
* \param[out] csbf_strd  csbf buffer stride
1973
* \param[in] trans_size transform size (4, 8, 16,32)
1974
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
1975
* \param[out] pi4_cost      pointer to store the cost
1976
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
1977
*                           coeff buffer
1978
* \param[out] pu4_tu_bits   pointer to store the best TU bits required encode
1979
the current TU in RDopt Mode
1980
* \param[out] pu4_blk_sad   pointer to store the block sad for RC
1981
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
1982
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
1983
* \param[in]  i4_perform_rdoq Indicates if RDOQ should be performed or not
1984
* \param[in]  i4_perform_sbh Indicates if SBH should be performed or not
1985
*
1986
* \return
1987
*    CBF of the current block
1988
*
1989
* \author
1990
*  Ittiam
1991
*
1992
*****************************************************************************
1993
*/
1994
1995
WORD32 ihevce_t_q_iq_ssd_scan_fxn(
1996
    ihevce_enc_loop_ctxt_t *ps_ctxt,
1997
    UWORD8 *pu1_pred,
1998
    WORD32 pred_strd,
1999
    UWORD8 *pu1_src,
2000
    WORD32 src_strd,
2001
    WORD16 *pi2_deq_data,
2002
    WORD32 deq_data_strd,
2003
    UWORD8 *pu1_recon,
2004
    WORD32 i4_recon_stride,
2005
    UWORD8 *pu1_ecd_data,
2006
    UWORD8 *pu1_csbf_buf,
2007
    WORD32 csbf_strd,
2008
    WORD32 trans_size,
2009
    WORD32 packed_pred_mode,
2010
    LWORD64 *pi8_cost,
2011
    WORD32 *pi4_coeff_off,
2012
    WORD32 *pi4_tu_bits,
2013
    UWORD32 *pu4_blk_sad,
2014
    WORD32 *pi4_zero_col,
2015
    WORD32 *pi4_zero_row,
2016
    UWORD8 *pu1_is_recon_available,
2017
    WORD32 i4_perform_rdoq,
2018
    WORD32 i4_perform_sbh,
2019
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2020
    WORD32 i4_alpha_stim_multiplier,
2021
    UWORD8 u1_is_cu_noisy,
2022
#endif
2023
    SSD_TYPE_T e_ssd_type,
2024
    WORD32 early_cbf)
2025
32.0M
{
2026
32.0M
    WORD32 cbf = 0;
2027
32.0M
    WORD32 trans_idx;
2028
32.0M
    WORD32 quant_scale_mat_offset;
2029
32.0M
    WORD32 *pi4_trans_scratch;
2030
32.0M
    WORD16 *pi2_trans_values;
2031
32.0M
    WORD16 *pi2_quant_coeffs;
2032
32.0M
    WORD32 *pi4_subBlock2csbfId_map = NULL;
2033
2034
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2035
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
2036
#endif
2037
2038
32.0M
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
2039
2040
32.0M
    WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
2041
25.6M
                             (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
2042
32.0M
    WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
2043
32.0M
    WORD8 intra_flag = 0;
2044
32.0M
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
2045
2046
32.0M
    *pi4_tu_bits = 0;
2047
32.0M
    *pi4_coeff_off = 0;
2048
32.0M
    pu1_is_recon_available[0] = 0;
2049
2050
32.0M
    if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
2051
1.31M
    {
2052
1.31M
        if(e_ssd_type != NULL_TYPE)
2053
1.31M
        {
2054
            /* SSD cost is stored to the pointer */
2055
1.31M
            pi8_cost[0] =
2056
2057
1.31M
                ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
2058
1.31M
                    pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
2059
2060
1.31M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2061
1.31M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2062
0
            {
2063
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2064
0
                    pu1_src,
2065
0
                    src_strd,
2066
0
                    pu1_pred,
2067
0
                    pred_strd,
2068
0
                    pi8_cost[0],
2069
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2070
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2071
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2072
0
                                                 100.0,
2073
0
                    trans_size,
2074
0
                    0,
2075
0
                    ps_ctxt->u1_enable_psyRDOPT,
2076
0
                    NULL_PLANE);
2077
0
            }
2078
1.31M
#endif
2079
2080
            /* copy pred to recon for skip mode */
2081
1.31M
            if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2082
506k
            {
2083
506k
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2084
506k
                    pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2085
506k
                pu1_is_recon_available[0] = 1;
2086
506k
            }
2087
810k
            else
2088
810k
            {
2089
810k
                pu1_is_recon_available[0] = 0;
2090
810k
            }
2091
2092
1.31M
#if ENABLE_INTER_ZCU_COST
2093
1.31M
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
2094
1.31M
#endif
2095
1.31M
        }
2096
0
        else
2097
0
        {
2098
0
            pi8_cost[0] = UINT_MAX;
2099
0
        }
2100
2101
        /* cbf is returned as 0 */
2102
1.31M
        return (0);
2103
1.31M
    }
2104
2105
    /* derive context variables */
2106
30.7M
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
2107
30.7M
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2108
30.7M
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
2109
2110
    /* translate the transform size to index for 4x4 and 8x8 */
2111
30.7M
    trans_idx = trans_size >> 2;
2112
2113
30.7M
    if(PRED_MODE_INTRA == packed_pred_mode)
2114
25.6M
    {
2115
25.6M
        quant_scale_mat_offset = 0;
2116
25.6M
        intra_flag = 1;
2117
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2118
        ai4_quant_rounding_factors[0][0] =
2119
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
2120
2121
        for(i = 0; i < trans_size * trans_size; i++)
2122
        {
2123
            ai4_quant_rounding_factors[1][i] =
2124
                MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
2125
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2126
            ai4_quant_rounding_factors[2][i] =
2127
                MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
2128
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
2129
        }
2130
#endif
2131
25.6M
    }
2132
5.04M
    else
2133
5.04M
    {
2134
5.04M
        quant_scale_mat_offset = NUM_TRANS_TYPES;
2135
5.04M
    }
2136
    /* for intra 4x4 DST transform should be used */
2137
30.7M
    if((1 == trans_idx) && (1 == intra_flag))
2138
12.8M
    {
2139
12.8M
        trans_idx = 0;
2140
12.8M
    }
2141
    /* for 16x16 cases */
2142
17.8M
    else if(16 == trans_size)
2143
6.06M
    {
2144
6.06M
        trans_idx = 3;
2145
6.06M
    }
2146
    /* for 32x32 cases */
2147
11.7M
    else if(32 == trans_size)
2148
2.28M
    {
2149
2.28M
        trans_idx = 4;
2150
2.28M
    }
2151
2152
30.7M
    switch(trans_size)
2153
30.7M
    {
2154
14.4M
    case 4:
2155
14.4M
    {
2156
14.4M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
2157
2158
14.4M
        break;
2159
0
    }
2160
7.90M
    case 8:
2161
7.90M
    {
2162
7.90M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
2163
2164
7.90M
        break;
2165
0
    }
2166
6.06M
    case 16:
2167
6.06M
    {
2168
6.06M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
2169
2170
6.06M
        break;
2171
0
    }
2172
2.28M
    case 32:
2173
2.28M
    {
2174
2.28M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
2175
2176
2.28M
        break;
2177
0
    }
2178
30.7M
    }
2179
2180
    /* Do not call the FT and Quant functions if early_cbf is 0 */
2181
30.7M
    if(1 == early_cbf)
2182
30.7M
    {
2183
        /* ---------- call residue and transform block ------- */
2184
30.7M
        *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
2185
30.7M
            pu1_src,
2186
30.7M
            pu1_pred,
2187
30.7M
            pi4_trans_scratch,
2188
30.7M
            pi2_trans_values,
2189
30.7M
            src_strd,
2190
30.7M
            pred_strd,
2191
30.7M
            trans_size,
2192
30.7M
            NULL_PLANE);
2193
2194
30.7M
        cbf = ps_ctxt->apf_quant_iquant_ssd
2195
30.7M
                  [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
2196
30.7M
                      pi2_trans_values,
2197
30.7M
                      ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
2198
30.7M
                      pi2_quant_coeffs,
2199
30.7M
                      pi2_deq_data,
2200
30.7M
                      trans_size,
2201
30.7M
                      ps_ctxt->i4_cu_qp_div6,
2202
30.7M
                      ps_ctxt->i4_cu_qp_mod6,
2203
30.7M
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
2204
30.7M
                      ps_ctxt->i4_quant_rnd_factor[intra_flag],
2205
30.7M
                      ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2206
30.7M
                      ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2207
#else
2208
                      intra_flag ? ai4_quant_rounding_factors[0][0]
2209
                                 : ps_ctxt->i4_quant_rnd_factor[intra_flag],
2210
                      intra_flag ? ai4_quant_rounding_factors[1]
2211
                                 : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
2212
                      intra_flag ? ai4_quant_rounding_factors[2]
2213
                                 : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
2214
#endif
2215
30.7M
                      trans_size,
2216
30.7M
                      trans_size,
2217
30.7M
                      deq_data_strd,
2218
30.7M
                      pu1_csbf_buf,
2219
30.7M
                      csbf_strd,
2220
30.7M
                      pi4_zero_col,
2221
30.7M
                      pi4_zero_row,
2222
30.7M
                      ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
2223
30.7M
                      pi8_cost);
2224
2225
30.7M
        if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
2226
12.1M
        {
2227
12.1M
            pi8_cost[0] = UINT_MAX;
2228
12.1M
        }
2229
30.7M
    }
2230
2231
30.7M
    if(0 != cbf)
2232
11.0M
    {
2233
11.0M
        if(i4_perform_sbh || i4_perform_rdoq)
2234
8.10M
        {
2235
8.10M
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
2236
8.10M
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
2237
8.10M
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
2238
2239
8.10M
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
2240
8.10M
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
2241
8.10M
            ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
2242
8.10M
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2243
8.10M
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
2244
2245
8.10M
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
2246
8.10M
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
2247
8.10M
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
2248
8.10M
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
2249
8.10M
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
2250
8.10M
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
2251
2252
            /* ------- call coeffs scan function ------- */
2253
8.10M
            if((!i4_perform_rdoq))
2254
4.18M
            {
2255
4.18M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2256
2257
4.18M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2258
4.18M
            }
2259
8.10M
        }
2260
2261
11.0M
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2262
11.0M
            pi2_quant_coeffs,
2263
11.0M
            pi4_subBlock2csbfId_map,
2264
11.0M
            ps_ctxt->i4_scan_idx,
2265
11.0M
            trans_size,
2266
11.0M
            pu1_ecd_data,
2267
11.0M
            pu1_csbf_buf,
2268
11.0M
            csbf_strd);
2269
11.0M
    }
2270
30.7M
    *pi8_cost >>= ga_trans_shift[trans_idx];
2271
2272
30.7M
#if RDOPT_ZERO_CBF_ENABLE
2273
    /* compare null cbf cost with encode tu rd-cost */
2274
30.7M
    if(cbf != 0)
2275
11.0M
    {
2276
11.0M
        WORD32 tu_bits;
2277
11.0M
        LWORD64 tu_rd_cost;
2278
2279
11.0M
        LWORD64 zero_cbf_cost = 0;
2280
2281
        /*Populating the feilds of rdoq_ctxt structure*/
2282
11.0M
        if(i4_perform_rdoq)
2283
3.91M
        {
2284
            /* transform size to log2transform size */
2285
3.91M
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
2286
3.91M
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
2287
3.91M
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
2288
3.91M
            ps_rdoq_sbh_ctxt->i4_is_luma = 1;
2289
3.91M
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
2290
3.91M
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
2291
3.91M
                (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
2292
3.91M
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
2293
3.91M
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
2294
3.91M
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
2295
3.91M
        }
2296
7.15M
        else if(i4_perform_zcbf)
2297
1.17M
        {
2298
1.17M
            zero_cbf_cost =
2299
2300
1.17M
                ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
2301
1.17M
                    pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE);
2302
1.17M
        }
2303
2304
        /************************************************************************/
2305
        /* call the entropy rdo encode to get the bit estimate for current tu   */
2306
        /* note that tu includes only residual coding bits and does not include */
2307
        /* tu split, cbf and qp delta encoding bits for a TU                    */
2308
        /************************************************************************/
2309
11.0M
        if(i4_perform_rdoq)
2310
3.91M
        {
2311
3.91M
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
2312
3.91M
                &ps_ctxt->s_rdopt_entropy_ctxt,
2313
3.91M
                (pu1_ecd_data),
2314
3.91M
                trans_size,
2315
3.91M
                1,
2316
3.91M
                ps_rdoq_sbh_ctxt,
2317
3.91M
                pi8_cost,
2318
3.91M
                &zero_cbf_cost,
2319
3.91M
                0);
2320
2321
3.91M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
2322
134k
            {
2323
134k
                cbf = 0;
2324
134k
                *pi4_coeff_off = 0;
2325
134k
            }
2326
2327
3.91M
            if((i4_perform_sbh) && (0 != cbf))
2328
3.77M
            {
2329
3.77M
                ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
2330
3.77M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
2331
3.77M
                *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
2332
3.77M
            }
2333
2334
            /*Add round value before normalizing*/
2335
3.91M
            *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
2336
3.91M
            *pi8_cost >>= ga_trans_shift[trans_idx];
2337
2338
3.91M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
2339
3.77M
            {
2340
3.77M
                pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
2341
3.77M
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
2342
3.77M
                    pi2_quant_coeffs,
2343
3.77M
                    pi4_subBlock2csbfId_map,
2344
3.77M
                    ps_ctxt->i4_scan_idx,
2345
3.77M
                    trans_size,
2346
3.77M
                    pu1_ecd_data,
2347
3.77M
                    pu1_csbf_buf,
2348
3.77M
                    csbf_strd);
2349
3.77M
            }
2350
3.91M
        }
2351
7.15M
        else
2352
7.15M
        {
2353
7.15M
            tu_bits = ihevce_entropy_rdo_encode_tu(
2354
7.15M
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
2355
7.15M
        }
2356
2357
11.0M
        *pi4_tu_bits = tu_bits;
2358
2359
11.0M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2360
2.57M
        {
2361
2.57M
            *pi8_cost = ihevce_it_recon_ssd(
2362
2.57M
                ps_ctxt,
2363
2.57M
                pu1_src,
2364
2.57M
                src_strd,
2365
2.57M
                pu1_pred,
2366
2.57M
                pred_strd,
2367
2.57M
                pi2_deq_data,
2368
2.57M
                deq_data_strd,
2369
2.57M
                pu1_recon,
2370
2.57M
                i4_recon_stride,
2371
2.57M
                pu1_ecd_data,
2372
2.57M
                trans_size,
2373
2.57M
                packed_pred_mode,
2374
2.57M
                cbf,
2375
2.57M
                *pi4_zero_col,
2376
2.57M
                *pi4_zero_row,
2377
2.57M
                NULL_PLANE);
2378
2379
2.57M
            pu1_is_recon_available[0] = 1;
2380
2.57M
        }
2381
2382
11.0M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2383
11.0M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2384
0
        {
2385
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2386
0
                pu1_src,
2387
0
                src_strd,
2388
0
                pu1_recon,
2389
0
                i4_recon_stride,
2390
0
                pi8_cost[0],
2391
0
                i4_alpha_stim_multiplier,
2392
0
                trans_size,
2393
0
                0,
2394
0
                ps_ctxt->u1_enable_psyRDOPT,
2395
0
                NULL_PLANE);
2396
0
        }
2397
11.0M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2398
0
        {
2399
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
2400
0
                pu1_src,
2401
0
                src_strd,
2402
0
                pu1_pred,
2403
0
                pred_strd,
2404
0
                pi8_cost[0],
2405
0
                i4_alpha_stim_multiplier,
2406
0
                trans_size,
2407
0
                0,
2408
0
                ps_ctxt->u1_enable_psyRDOPT,
2409
0
                NULL_PLANE);
2410
0
        }
2411
11.0M
#endif
2412
2413
        /* add the SSD cost to bits estimate given by ECD */
2414
11.0M
        tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
2415
11.0M
                                     tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
2416
2417
11.0M
        if(i4_perform_zcbf)
2418
1.83M
        {
2419
1.83M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2420
1.83M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
2421
0
            {
2422
0
                zero_cbf_cost = ihevce_inject_stim_into_distortion(
2423
0
                    pu1_src,
2424
0
                    src_strd,
2425
0
                    pu1_pred,
2426
0
                    pred_strd,
2427
0
                    zero_cbf_cost,
2428
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2429
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2430
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2431
0
                                                 100.0,
2432
0
                    trans_size,
2433
0
                    0,
2434
0
                    ps_ctxt->u1_enable_psyRDOPT,
2435
0
                    NULL_PLANE);
2436
0
            }
2437
1.83M
#endif
2438
2439
            /* force the tu as zero cbf if zero_cbf_cost is lower */
2440
1.83M
            if(zero_cbf_cost < tu_rd_cost)
2441
62.5k
            {
2442
                /* num bytes is set to 0 */
2443
62.5k
                *pi4_coeff_off = 0;
2444
2445
                /* cbf is returned as 0 */
2446
62.5k
                cbf = 0;
2447
2448
                /* cost is returned as 0 cbf cost */
2449
62.5k
                *pi8_cost = zero_cbf_cost;
2450
2451
                /* TU bits is set to 0 */
2452
62.5k
                *pi4_tu_bits = 0;
2453
62.5k
                pu1_is_recon_available[0] = 0;
2454
2455
62.5k
                if(SPATIAL_DOMAIN_SSD == e_ssd_type)
2456
6.60k
                {
2457
                    /* copy pred to recon for zcbf mode */
2458
2459
6.60k
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
2460
6.60k
                        pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
2461
2462
6.60k
                    pu1_is_recon_available[0] = 1;
2463
6.60k
                }
2464
62.5k
            }
2465
            /* accumulate cu not coded cost with zcbf cost */
2466
1.83M
#if ENABLE_INTER_ZCU_COST
2467
1.83M
            ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
2468
1.83M
#endif
2469
1.83M
        }
2470
11.0M
    }
2471
19.6M
    else
2472
19.6M
    {
2473
        /* cbf = 0, accumulate cu not coded cost */
2474
19.6M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
2475
9.60M
        {
2476
9.60M
            *pi8_cost = ihevce_it_recon_ssd(
2477
9.60M
                ps_ctxt,
2478
9.60M
                pu1_src,
2479
9.60M
                src_strd,
2480
9.60M
                pu1_pred,
2481
9.60M
                pred_strd,
2482
9.60M
                pi2_deq_data,
2483
9.60M
                deq_data_strd,
2484
9.60M
                pu1_recon,
2485
9.60M
                i4_recon_stride,
2486
9.60M
                pu1_ecd_data,
2487
9.60M
                trans_size,
2488
9.60M
                packed_pred_mode,
2489
9.60M
                cbf,
2490
9.60M
                *pi4_zero_col,
2491
9.60M
                *pi4_zero_row,
2492
9.60M
                NULL_PLANE);
2493
2494
9.60M
            pu1_is_recon_available[0] = 1;
2495
9.60M
        }
2496
2497
19.6M
#if ENABLE_INTER_ZCU_COST
2498
19.6M
        {
2499
19.6M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
2500
19.6M
            if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2501
0
            {
2502
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2503
0
                    pu1_src,
2504
0
                    src_strd,
2505
0
                    pu1_recon,
2506
0
                    i4_recon_stride,
2507
0
                    pi8_cost[0],
2508
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2509
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2510
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2511
0
                                                 100.0,
2512
0
                    trans_size,
2513
0
                    0,
2514
0
                    ps_ctxt->u1_enable_psyRDOPT,
2515
0
                    NULL_PLANE);
2516
0
            }
2517
19.6M
            else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
2518
0
            {
2519
0
                pi8_cost[0] = ihevce_inject_stim_into_distortion(
2520
0
                    pu1_src,
2521
0
                    src_strd,
2522
0
                    pu1_pred,
2523
0
                    pred_strd,
2524
0
                    pi8_cost[0],
2525
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
2526
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
2527
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
2528
0
                                                 100.0,
2529
0
                    trans_size,
2530
0
                    0,
2531
0
                    ps_ctxt->u1_enable_psyRDOPT,
2532
0
                    NULL_PLANE);
2533
0
            }
2534
19.6M
#endif
2535
2536
19.6M
            ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
2537
19.6M
        }
2538
19.6M
#endif /* ENABLE_INTER_ZCU_COST */
2539
19.6M
    }
2540
30.7M
#endif
2541
2542
30.7M
    return (cbf);
2543
30.7M
}
2544
2545
/*!
2546
******************************************************************************
2547
* \if Function name : ihevce_it_recon_fxn \endif
2548
*
2549
* \brief
2550
*    Transform unit level (Luma) IT Recon function
2551
*
2552
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2553
* \param[in] pi2_deq_data   pointer to iq data
2554
* \param[in] deq_data_strd  iq data buffer stride
2555
* \param[in] pu1_pred       pointer to predicted data buffer
2556
* \param[in] pred_strd      predicted buffer stride
2557
* \param[in] pu1_recon      pointer to recon buffer
2558
* \param[in] recon_strd     recon buffer stride
2559
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2560
* \param[in] trans_size     transform size (4, 8, 16,32)
2561
* \param[in] packed_pred_mode   0:Inter 1:Intra 2:Skip
2562
* \param[in] cbf            CBF of the current block
2563
* \param[in] zero_cols      zero_cols of the current block
2564
* \param[in] zero_rows      zero_rows of the current block
2565
*
2566
* \return
2567
*
2568
* \author
2569
*  Ittiam
2570
*
2571
*****************************************************************************
2572
*/
2573
2574
void ihevce_it_recon_fxn(
2575
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2576
    WORD16 *pi2_deq_data,
2577
    WORD32 deq_dat_strd,
2578
    UWORD8 *pu1_pred,
2579
    WORD32 pred_strd,
2580
    UWORD8 *pu1_recon,
2581
    WORD32 recon_strd,
2582
    UWORD8 *pu1_ecd_data,
2583
    WORD32 trans_size,
2584
    WORD32 packed_pred_mode,
2585
    WORD32 cbf,
2586
    WORD32 zero_cols,
2587
    WORD32 zero_rows)
2588
19.6M
{
2589
19.6M
    WORD32 dc_add_flag = 0;
2590
19.6M
    WORD32 trans_idx;
2591
2592
    /* translate the transform size to index for 4x4 and 8x8 */
2593
19.6M
    trans_idx = trans_size >> 2;
2594
2595
    /* if SKIP mode needs to be evaluated the pred is copied to recon */
2596
19.6M
    if(PRED_MODE_SKIP == packed_pred_mode)
2597
227k
    {
2598
227k
        UWORD8 *pu1_curr_recon, *pu1_curr_pred;
2599
2600
227k
        pu1_curr_pred = pu1_pred;
2601
227k
        pu1_curr_recon = pu1_recon;
2602
2603
        /* 2D copy of data */
2604
2605
227k
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2606
227k
            pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
2607
2608
227k
        return;
2609
227k
    }
2610
2611
    /* for intra 4x4 DST transform should be used */
2612
19.4M
    if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
2613
6.59M
    {
2614
6.59M
        trans_idx = 0;
2615
6.59M
    }
2616
    /* for 16x16 cases */
2617
12.8M
    else if(16 == trans_size)
2618
4.46M
    {
2619
4.46M
        trans_idx = 3;
2620
4.46M
    }
2621
    /* for 32x32 cases */
2622
8.33M
    else if(32 == trans_size)
2623
1.72M
    {
2624
1.72M
        trans_idx = 4;
2625
1.72M
    }
2626
2627
    /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
2628
19.4M
    if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2629
3.91M
    {
2630
3.91M
        dc_add_flag = 1;
2631
3.91M
    }
2632
2633
19.4M
    if(0 == cbf)
2634
14.4M
    {
2635
        /* buffer copy */
2636
14.4M
        ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
2637
14.4M
            pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
2638
14.4M
    }
2639
4.92M
    else if((1 == dc_add_flag) && (0 != trans_idx))
2640
126k
    {
2641
        /* dc add */
2642
126k
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2643
126k
            pu1_pred,
2644
126k
            pred_strd,
2645
126k
            pu1_recon,
2646
126k
            recon_strd,
2647
126k
            trans_size,
2648
126k
            pi2_deq_data[0],
2649
126k
            NULL_PLANE /* luma */
2650
126k
        );
2651
126k
    }
2652
4.79M
    else
2653
4.79M
    {
2654
4.79M
        ps_ctxt->apf_it_recon[trans_idx](
2655
4.79M
            pi2_deq_data,
2656
4.79M
            &ps_ctxt->ai2_scratch[0],
2657
4.79M
            pu1_pred,
2658
4.79M
            pu1_recon,
2659
4.79M
            deq_dat_strd,
2660
4.79M
            pred_strd,
2661
4.79M
            recon_strd,
2662
4.79M
            zero_cols,
2663
4.79M
            zero_rows);
2664
4.79M
    }
2665
19.4M
}
2666
2667
/*!
2668
******************************************************************************
2669
* \if Function name : ihevce_chroma_it_recon_fxn \endif
2670
*
2671
* \brief
2672
*    Transform unit level (Chroma) IT Recon function
2673
*
2674
* \param[in] ps_ctxt        enc_loop module ctxt pointer
2675
* \param[in] pi2_deq_data   pointer to iq data
2676
* \param[in] deq_data_strd  iq data buffer stride
2677
* \param[in] pu1_pred       pointer to predicted data buffer
2678
* \param[in] pred_strd      predicted buffer stride
2679
* \param[in] pu1_recon      pointer to recon buffer
2680
* \param[in] recon_strd     recon buffer stride
2681
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
2682
* \param[in] trans_size     transform size (4, 8, 16)
2683
* \param[in] cbf            CBF of the current block
2684
* \param[in] zero_cols      zero_cols of the current block
2685
* \param[in] zero_rows      zero_rows of the current block
2686
*
2687
* \return
2688
*
2689
* \author
2690
*  Ittiam
2691
*
2692
*****************************************************************************
2693
*/
2694
2695
void ihevce_chroma_it_recon_fxn(
2696
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2697
    WORD16 *pi2_deq_data,
2698
    WORD32 deq_dat_strd,
2699
    UWORD8 *pu1_pred,
2700
    WORD32 pred_strd,
2701
    UWORD8 *pu1_recon,
2702
    WORD32 recon_strd,
2703
    UWORD8 *pu1_ecd_data,
2704
    WORD32 trans_size,
2705
    WORD32 cbf,
2706
    WORD32 zero_cols,
2707
    WORD32 zero_rows,
2708
    CHROMA_PLANE_ID_T e_chroma_plane)
2709
24.8M
{
2710
24.8M
    WORD32 trans_idx;
2711
2712
24.8M
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
2713
2714
    /* since 2x2 transform is not allowed for chroma*/
2715
24.8M
    if(2 == trans_size)
2716
0
    {
2717
0
        trans_size = 4;
2718
0
    }
2719
2720
    /* translate the transform size to index */
2721
24.8M
    trans_idx = trans_size >> 2;
2722
2723
    /* for 16x16 cases */
2724
24.8M
    if(16 == trans_size)
2725
3.79M
    {
2726
3.79M
        trans_idx = 3;
2727
3.79M
    }
2728
2729
24.8M
    if(0 == cbf)
2730
21.9M
    {
2731
        /* buffer copy */
2732
21.9M
        ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
2733
21.9M
            pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
2734
21.9M
    }
2735
2.88M
    else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
2736
244k
    {
2737
        /* dc add */
2738
244k
        ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
2739
244k
            pu1_pred,
2740
244k
            pred_strd,
2741
244k
            pu1_recon,
2742
244k
            recon_strd,
2743
244k
            trans_size,
2744
244k
            pi2_deq_data[0],
2745
244k
            e_chroma_plane /* chroma plane */
2746
244k
        );
2747
244k
    }
2748
2.64M
    else
2749
2.64M
    {
2750
2.64M
        ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
2751
2.64M
            pi2_deq_data,
2752
2.64M
            &ps_ctxt->ai2_scratch[0],
2753
2.64M
            pu1_pred + (WORD32)e_chroma_plane,
2754
2.64M
            pu1_recon + (WORD32)e_chroma_plane,
2755
2.64M
            deq_dat_strd,
2756
2.64M
            pred_strd,
2757
2.64M
            recon_strd,
2758
2.64M
            zero_cols,
2759
2.64M
            zero_rows);
2760
2.64M
    }
2761
24.8M
}
2762
2763
/**
2764
*******************************************************************************
2765
* \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
2766
*
2767
* \brief * Filters the RDOPT candidates based on mpm_idx
2768
*
2769
* \par   Description
2770
* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
2771
* for a CU
2772
*
2773
* \param[in] ps_ctxt : ptr to enc loop context
2774
* \param[in] ps_cu_analyse : ptr to CU analyse structure
2775
* \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
2776
* \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
2777
* \param[in] pu1_luma_mode luma mode
2778
*
2779
* \returns none
2780
*
2781
* \author
2782
*  Ittiam
2783
*
2784
*******************************************************************************
2785
*/
2786
2787
void ihevce_mpm_idx_based_filter_RDOPT_cand(
2788
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2789
    cu_analyse_t *ps_cu_analyse,
2790
    nbr_4x4_t *ps_left_nbr_4x4,
2791
    nbr_4x4_t *ps_top_nbr_4x4,
2792
    UWORD8 *pu1_luma_mode,
2793
    UWORD8 *pu1_eval_mark)
2794
394k
{
2795
394k
    WORD32 cu_pos_x;
2796
394k
    WORD32 cu_pos_y;
2797
394k
    nbr_avail_flags_t s_nbr;
2798
394k
    WORD32 trans_size;
2799
394k
    WORD32 au4_cand_mode_list[3];
2800
394k
    WORD32 nbr_flags;
2801
394k
    UWORD8 *pu1_intra_luma_modes;
2802
394k
    WORD32 rdopt_cand_ctr = 0;
2803
394k
    UWORD8 *pu1_luma_eval_mark;
2804
2805
394k
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
2806
394k
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
2807
394k
    trans_size = ps_cu_analyse->u1_cu_size;
2808
2809
    /* get the neighbour availability flags */
2810
394k
    nbr_flags = ihevce_get_nbr_intra(
2811
394k
        &s_nbr,
2812
394k
        ps_ctxt->pu1_ctb_nbr_map,
2813
394k
        ps_ctxt->i4_nbr_map_strd,
2814
394k
        cu_pos_x,
2815
394k
        cu_pos_y,
2816
394k
        trans_size >> 2);
2817
394k
    (void)nbr_flags;
2818
    /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
2819
    *TU=CU/2 also since the modes are same in both the cases.
2820
    */
2821
394k
    ihevce_populate_intra_pred_mode(
2822
394k
        ps_top_nbr_4x4->b6_luma_intra_mode,
2823
394k
        ps_left_nbr_4x4->b6_luma_intra_mode,
2824
394k
        s_nbr.u1_top_avail,
2825
394k
        s_nbr.u1_left_avail,
2826
394k
        cu_pos_y,
2827
394k
        &au4_cand_mode_list[0]);
2828
2829
    /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
2830
    *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
2831
    */
2832
2833
394k
    pu1_intra_luma_modes = pu1_luma_mode;
2834
394k
    pu1_luma_eval_mark = pu1_eval_mark;
2835
2836
1.41M
    while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
2837
1.02M
    {
2838
1.02M
        WORD32 i;
2839
1.02M
        WORD32 found_flag = 0;
2840
2841
        /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
2842
        *irrespective of whether the cand is present in the mpm idx list or not
2843
        */
2844
1.02M
        if(rdopt_cand_ctr == 0)
2845
359k
        {
2846
359k
            rdopt_cand_ctr++;
2847
359k
            continue;
2848
359k
        }
2849
2850
2.01M
        for(i = 0; i < 3; i++)
2851
1.67M
        {
2852
1.67M
            if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
2853
319k
            {
2854
319k
                found_flag = 1;
2855
319k
                break;
2856
319k
            }
2857
1.67M
        }
2858
2859
664k
        if(found_flag == 0)
2860
344k
        {
2861
344k
            pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
2862
344k
        }
2863
2864
664k
        rdopt_cand_ctr++;
2865
664k
    }
2866
394k
}
2867
2868
/*!
2869
******************************************************************************
2870
* \if Function name : ihevce_intra_rdopt_cu_ntu \endif
2871
*
2872
* \brief
2873
*    Intra Coding unit funtion for RD opt mode
2874
*
2875
* \param[in] ps_ctxt    enc_loop module ctxt pointer
2876
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
2877
* \param[in] pu1_luma_mode : pointer to luma mode
2878
* \param[in] ps_cu_analyse  pointer to cu analyse pointer
2879
* \param[in] pu1_src    pointer to source data buffer
2880
* \param[in] src_strd   source buffer stride
2881
* \param[in] pu1_cu_left pointer to left recon data buffer
2882
* \param[in] pu1_cu_top  pointer to top recon data buffer
2883
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
2884
* \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
2885
* \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
2886
* \param[in] nbr_4x4_left_strd left nbr4x4 stride
2887
* \param[in] cu_left_stride left recon buffer stride
2888
* \param[in] curr_buf_idx RD opt buffer index for current usage
2889
* \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
2890
*
2891
* \return
2892
*    RDopt cost
2893
*
2894
* \author
2895
*  Ittiam
2896
*
2897
*****************************************************************************
2898
*/
2899
LWORD64 ihevce_intra_rdopt_cu_ntu(
2900
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2901
    enc_loop_cu_prms_t *ps_cu_prms,
2902
    void *pv_pred_org,
2903
    WORD32 pred_strd_org,
2904
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
2905
    UWORD8 *pu1_luma_mode,
2906
    cu_analyse_t *ps_cu_analyse,
2907
    void *pv_curr_src,
2908
    void *pv_cu_left,
2909
    void *pv_cu_top,
2910
    void *pv_cu_top_left,
2911
    nbr_4x4_t *ps_left_nbr_4x4,
2912
    nbr_4x4_t *ps_top_nbr_4x4,
2913
    WORD32 nbr_4x4_left_strd,
2914
    WORD32 cu_left_stride,
2915
    WORD32 curr_buf_idx,
2916
    WORD32 func_proc_mode,
2917
    WORD32 i4_alpha_stim_multiplier)
2918
8.79M
{
2919
8.79M
    enc_loop_cu_final_prms_t *ps_final_prms;
2920
8.79M
    nbr_avail_flags_t s_nbr;
2921
8.79M
    nbr_4x4_t *ps_nbr_4x4;
2922
8.79M
    nbr_4x4_t *ps_tmp_lt_4x4;
2923
8.79M
    recon_datastore_t *ps_recon_datastore;
2924
2925
8.79M
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
2926
2927
8.79M
    UWORD32 *pu4_nbr_flags;
2928
8.79M
    UWORD8 *pu1_intra_pred_mode;
2929
8.79M
    WORD32 cu_pos_x;
2930
8.79M
    WORD32 cu_pos_y;
2931
8.79M
    WORD32 trans_size = 0;
2932
8.79M
    UWORD8 *pu1_left;
2933
8.79M
    UWORD8 *pu1_top;
2934
8.79M
    UWORD8 *pu1_top_left;
2935
8.79M
    UWORD8 *pu1_recon;
2936
8.79M
    UWORD8 *pu1_csbf_buf;
2937
8.79M
    UWORD8 *pu1_ecd_data;
2938
8.79M
    WORD16 *pi2_deq_data;
2939
8.79M
    WORD32 deq_data_strd;
2940
8.79M
    LWORD64 total_rdopt_cost;
2941
8.79M
    WORD32 ctr;
2942
8.79M
    WORD32 left_strd;
2943
8.79M
    WORD32 i4_recon_stride;
2944
8.79M
    WORD32 csbf_strd;
2945
8.79M
    WORD32 ecd_data_bytes_cons;
2946
8.79M
    WORD32 num_4x4_in_tu;
2947
8.79M
    WORD32 num_4x4_in_cu;
2948
8.79M
    WORD32 chrm_present_flag;
2949
8.79M
    WORD32 tx_size;
2950
8.79M
    WORD32 cu_bits;
2951
8.79M
    WORD32 num_cu_parts = 0;
2952
8.79M
    WORD32 num_cands = 0;
2953
8.79M
    WORD32 cu_pos_x_8pelunits;
2954
8.79M
    WORD32 cu_pos_y_8pelunits;
2955
8.79M
    WORD32 i4_perform_rdoq;
2956
8.79M
    WORD32 i4_perform_sbh;
2957
8.79M
    UWORD8 u1_compute_spatial_ssd;
2958
8.79M
    UWORD8 u1_compute_recon;
2959
8.79M
    UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
2960
2961
8.79M
    UWORD16 u2_num_tus_in_cu = 0;
2962
8.79M
    WORD32 is_sub_pu_in_hq = 0;
2963
    /* Get the RDOPT cost of the best CU mode for early_exit */
2964
8.79M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
2965
    /* cabac context of prev intra luma pred flag */
2966
8.79M
    UWORD8 u1_prev_flag_cabac_ctxt =
2967
8.79M
        ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
2968
8.79M
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
2969
2970
8.79M
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
2971
2972
8.79M
    total_rdopt_cost = 0;
2973
8.79M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
2974
8.79M
    ps_recon_datastore = &ps_final_prms->s_recon_datastore;
2975
8.79M
    i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
2976
8.79M
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
2977
8.79M
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
2978
8.79M
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
2979
8.79M
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
2980
8.79M
    deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
2981
8.79M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
2982
8.79M
    ps_tmp_lt_4x4 = ps_left_nbr_4x4;
2983
8.79M
    pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
2984
8.79M
    pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
2985
8.79M
    cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
2986
8.79M
    cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
2987
8.79M
    cu_pos_x_8pelunits = cu_pos_x;
2988
8.79M
    cu_pos_y_8pelunits = cu_pos_y;
2989
2990
    /* reset cu not coded cost */
2991
8.79M
    ps_ctxt->i8_cu_not_coded_cost = 0;
2992
2993
    /* based on the Processng mode */
2994
8.79M
    if(TU_EQ_CU == func_proc_mode)
2995
5.94M
    {
2996
5.94M
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
2997
5.94M
        trans_size = ps_cu_analyse->u1_cu_size;
2998
5.94M
        num_cu_parts = 1;
2999
5.94M
        num_cands = 1;
3000
5.94M
        u2_num_tus_in_cu = 1;
3001
5.94M
    }
3002
2.85M
    else if(TU_EQ_CU_DIV2 == func_proc_mode)
3003
2.31M
    {
3004
2.31M
        ps_final_prms->u1_part_mode = SIZE_2Nx2N;
3005
2.31M
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3006
2.31M
        num_cu_parts = 4;
3007
2.31M
        num_cands = 1;
3008
2.31M
        u2_num_tus_in_cu = 4;
3009
2.31M
    }
3010
534k
    else if(TU_EQ_SUBCU == func_proc_mode)
3011
534k
    {
3012
534k
        ps_final_prms->u1_part_mode = SIZE_NxN;
3013
534k
        trans_size = ps_cu_analyse->u1_cu_size >> 1;
3014
534k
        num_cu_parts = 4;
3015
        /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
3016
534k
        if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
3017
339k
        {
3018
339k
            if(ps_ctxt->i1_slice_type != BSLICE)
3019
311k
            {
3020
311k
                num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
3021
311k
            }
3022
28.3k
            else
3023
28.3k
            {
3024
28.3k
                num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
3025
28.3k
            }
3026
339k
        }
3027
194k
        else
3028
194k
        {
3029
194k
            num_cands = MAX_INTRA_CU_CANDIDATES;
3030
194k
        }
3031
534k
        u2_num_tus_in_cu = 4;
3032
534k
    }
3033
0
    else
3034
0
    {
3035
        /* should not enter here */
3036
0
        ASSERT(0);
3037
0
    }
3038
3039
8.79M
    if(ps_ctxt->i1_cu_qp_delta_enable)
3040
3.83M
    {
3041
3.83M
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1);
3042
3.83M
    }
3043
3044
8.79M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
3045
0
    {
3046
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
3047
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
3048
0
             100.0f);
3049
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
3050
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
3051
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
3052
0
    }
3053
3054
8.79M
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
3055
5.18M
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3056
3.86M
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3057
3058
8.79M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
3059
0
    {
3060
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
3061
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
3062
0
    }
3063
3064
    /* populate the neigbours */
3065
8.79M
    pu1_left = (UWORD8 *)pv_cu_left;
3066
8.79M
    pu1_top = (UWORD8 *)pv_cu_top;
3067
8.79M
    pu1_top_left = (UWORD8 *)pv_cu_top_left;
3068
8.79M
    left_strd = cu_left_stride;
3069
8.79M
    num_4x4_in_tu = (trans_size >> 2);
3070
8.79M
    num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
3071
8.79M
    chrm_present_flag = 1;
3072
8.79M
    ecd_data_bytes_cons = 0;
3073
8.79M
    cu_bits = 0;
3074
3075
    /* get the 4x4 level postion of current cu */
3076
8.79M
    cu_pos_x = cu_pos_x << 1;
3077
8.79M
    cu_pos_y = cu_pos_y << 1;
3078
3079
    /* pouplate cu level params knowing that current is intra */
3080
8.79M
    ps_final_prms->u1_skip_flag = 0;
3081
8.79M
    ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
3082
8.79M
    ps_final_prms->u2_num_pus_in_cu = 1;
3083
    /*init the is_cu_coded flag*/
3084
8.79M
    ps_final_prms->u1_is_cu_coded = 0;
3085
8.79M
    ps_final_prms->u4_cu_sad = 0;
3086
3087
8.79M
    ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
3088
8.79M
    ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
3089
8.79M
    ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
3090
8.79M
    ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
3091
8.79M
    ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
3092
8.79M
    ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
3093
3094
8.79M
    ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
3095
3096
    /*copy qp directly as intra cant be skip*/
3097
8.79M
    ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
3098
8.79M
    ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
3099
8.79M
    ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
3100
8.79M
    ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
3101
8.79M
    ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
3102
8.79M
    ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
3103
8.79M
    ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
3104
8.79M
    ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
3105
8.79M
    ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
3106
3107
    /* RDOPT copy States :  TU init (best until prev TU) to current */
3108
8.79M
    memcpy(
3109
8.79M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3110
8.79M
             .s_cabac_ctxt.au1_ctxt_models[0],
3111
8.79M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3112
8.79M
        IHEVC_CAB_COEFFX_PREFIX);
3113
3114
    /* RDOPT copy States :update to init state if 0 cbf */
3115
8.79M
    memcpy(
3116
8.79M
        &au1_intra_nxn_rdopt_ctxt_models[0][0],
3117
8.79M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3118
8.79M
        IHEVC_CAB_COEFFX_PREFIX);
3119
8.79M
    memcpy(
3120
8.79M
        &au1_intra_nxn_rdopt_ctxt_models[1][0],
3121
8.79M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
3122
8.79M
        IHEVC_CAB_COEFFX_PREFIX);
3123
3124
    /* loop for all partitions in CU  blocks */
3125
24.9M
    for(ctr = 0; ctr < num_cu_parts; ctr++)
3126
17.0M
    {
3127
17.0M
        UWORD8 *pu1_curr_mode;
3128
17.0M
        WORD32 cand_ctr;
3129
17.0M
        WORD32 nbr_flags;
3130
3131
        /* for NxN case to track the best mode       */
3132
        /* for other cases zeroth index will be used */
3133
17.0M
        intra_prev_rem_flags_t as_intra_prev_rem[2];
3134
17.0M
        LWORD64 ai8_cand_rdopt_cost[2];
3135
17.0M
        UWORD32 au4_tu_sad[2];
3136
17.0M
        WORD32 ai4_tu_bits[2];
3137
17.0M
        WORD32 ai4_cbf[2];
3138
17.0M
        WORD32 ai4_curr_bytes[2];
3139
17.0M
        WORD32 ai4_zero_col[2];
3140
17.0M
        WORD32 ai4_zero_row[2];
3141
        /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
3142
        cand. are there) ping-pong buffer to store the best and current */
3143
17.0M
        UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
3144
17.0M
        UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
3145
17.0M
        WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
3146
        /* Context models stored for RDopt store and restore purpose */
3147
3148
17.0M
        UWORD8 au1_recon_availability[2];
3149
3150
17.0M
        WORD32 best_cand_idx = 0;
3151
17.0M
        LWORD64 best_cand_cost = MAX_COST_64;
3152
        /* counters to toggle b/w best and current */
3153
17.0M
        WORD32 best_intra_buf_idx = 1;
3154
17.0M
        WORD32 curr_intra_buf_idx = 0;
3155
3156
        /* copy the mode pointer to be used in inner loop */
3157
17.0M
        pu1_curr_mode = pu1_luma_mode;
3158
3159
        /* get the neighbour availability flags */
3160
17.0M
        nbr_flags = ihevce_get_nbr_intra(
3161
17.0M
            &s_nbr,
3162
17.0M
            ps_ctxt->pu1_ctb_nbr_map,
3163
17.0M
            ps_ctxt->i4_nbr_map_strd,
3164
17.0M
            cu_pos_x,
3165
17.0M
            cu_pos_y,
3166
17.0M
            num_4x4_in_tu);
3167
3168
        /* copy the nbr flags for chroma reuse */
3169
17.0M
        if(4 != trans_size)
3170
12.8M
        {
3171
12.8M
            *pu4_nbr_flags = nbr_flags;
3172
12.8M
        }
3173
4.22M
        else if(1 == chrm_present_flag)
3174
1.07M
        {
3175
            /* compute the avail flags assuming luma trans is 8x8 */
3176
            /* get the neighbour availability flags */
3177
1.07M
            *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
3178
1.07M
                ps_ctxt->pu1_ctb_nbr_map,
3179
1.07M
                ps_ctxt->i4_nbr_map_strd,
3180
1.07M
                cu_pos_x,
3181
1.07M
                cu_pos_y,
3182
1.07M
                (num_4x4_in_tu << 1),
3183
1.07M
                (num_4x4_in_tu << 1));
3184
1.07M
        }
3185
3186
17.0M
        u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
3187
3188
17.0M
        if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
3189
5.70M
        {
3190
5.70M
            ps_recon_datastore->u1_is_lumaRecon_available = 1;
3191
5.70M
        }
3192
11.3M
        else if(!ctr)
3193
3.09M
        {
3194
3.09M
            ps_recon_datastore->u1_is_lumaRecon_available = 0;
3195
3.09M
        }
3196
3197
17.0M
        ihevc_intra_pred_luma_ref_substitution_fptr =
3198
17.0M
            ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3199
3200
        /* call reference array substitution */
3201
17.0M
        ihevc_intra_pred_luma_ref_substitution_fptr(
3202
17.0M
            pu1_top_left,
3203
17.0M
            pu1_top,
3204
17.0M
            pu1_left,
3205
17.0M
            left_strd,
3206
17.0M
            trans_size,
3207
17.0M
            nbr_flags,
3208
17.0M
            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3209
17.0M
            1);
3210
3211
        /* Intra Mode gating based on MPM cand list and encoder quality preset */
3212
17.0M
        if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
3213
759k
           (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
3214
163k
        {
3215
163k
            ihevce_mpm_idx_based_filter_RDOPT_cand(
3216
163k
                ps_ctxt,
3217
163k
                ps_cu_analyse,
3218
163k
                ps_left_nbr_4x4,
3219
163k
                ps_top_nbr_4x4,
3220
163k
                pu1_luma_mode,
3221
163k
                &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
3222
163k
        }
3223
3224
17.0M
        if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
3225
1.34M
           (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
3226
1.34M
        {
3227
1.34M
            WORD32 ai4_mpm_mode_list[3];
3228
1.34M
            WORD32 i;
3229
3230
1.34M
            WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
3231
3232
1.34M
            ihevce_populate_intra_pred_mode(
3233
1.34M
                ps_top_nbr_4x4->b6_luma_intra_mode,
3234
1.34M
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3235
1.34M
                s_nbr.u1_top_avail,
3236
1.34M
                s_nbr.u1_left_avail,
3237
1.34M
                cu_pos_y,
3238
1.34M
                &ai4_mpm_mode_list[0]);
3239
3240
5.36M
            for(i = 0; i < 3; i++)
3241
4.02M
            {
3242
4.02M
                if(ps_cu_analyse->s_cu_intra_cand
3243
4.02M
                       .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
3244
754k
                {
3245
754k
                    ASSERT(ai4_mpm_mode_list[i] < 35);
3246
3247
754k
                    ps_cu_analyse->s_cu_intra_cand
3248
754k
                        .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
3249
754k
                    pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
3250
754k
                    ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
3251
754k
                    i4_curr_index++;
3252
754k
                }
3253
4.02M
            }
3254
3255
1.34M
            pu1_luma_mode[i4_curr_index] = 255;
3256
1.34M
        }
3257
3258
        /* loop over candidates for each partition */
3259
42.8M
        for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
3260
27.4M
        {
3261
27.4M
            WORD32 curr_pred_mode;
3262
27.4M
            WORD32 bits = 0;
3263
27.4M
            LWORD64 curr_cost;
3264
27.4M
            WORD32 luma_pred_func_idx;
3265
27.4M
            UWORD8 *pu1_curr_ecd_data;
3266
27.4M
            WORD16 *pi2_curr_deq_data;
3267
27.4M
            WORD32 curr_deq_data_strd;
3268
27.4M
            WORD32 pred_strd;
3269
27.4M
            UWORD8 *pu1_pred;
3270
3271
            /* if NXN case the recon and ecd data is stored in temp buffers */
3272
27.4M
            if(TU_EQ_SUBCU == func_proc_mode)
3273
12.4M
            {
3274
12.4M
                pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
3275
12.4M
                pred_strd = trans_size;
3276
12.4M
                pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
3277
12.4M
                pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
3278
12.4M
                curr_deq_data_strd = trans_size;
3279
3280
12.4M
                ASSERT(trans_size == MIN_TU_SIZE);
3281
12.4M
            }
3282
14.9M
            else
3283
14.9M
            {
3284
14.9M
                pu1_pred = (UWORD8 *)pv_pred_org;
3285
14.9M
                pred_strd = pred_strd_org;
3286
14.9M
                pu1_curr_ecd_data = pu1_ecd_data;
3287
14.9M
                pi2_curr_deq_data = pi2_deq_data;
3288
14.9M
                curr_deq_data_strd = deq_data_strd;
3289
14.9M
            }
3290
3291
27.4M
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
3292
27.4M
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3293
3294
27.4M
            if(is_sub_pu_in_hq == 1)
3295
0
            {
3296
0
                curr_pred_mode = cand_ctr;
3297
0
            }
3298
27.4M
            else
3299
27.4M
            {
3300
27.4M
                curr_pred_mode = pu1_curr_mode[cand_ctr];
3301
27.4M
            }
3302
3303
            /* If the candidate mode is 255, then break */
3304
27.4M
            if(255 == curr_pred_mode)
3305
1.55M
            {
3306
1.55M
                break;
3307
1.55M
            }
3308
25.8M
            else if(250 == curr_pred_mode)
3309
0
            {
3310
0
                continue;
3311
0
            }
3312
3313
            /* check if this mode needs to be evaluated or not. For 2nx2n cases, this   */
3314
            /* function will be called once per candidate, so this check has been done  */
3315
            /* outside this function call. For NxN case, this function will be called   */
3316
            /* only once, and all the candidates will be evaluated here.                */
3317
25.8M
            if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
3318
7.15M
            {
3319
7.15M
                if((TU_EQ_SUBCU == func_proc_mode) &&
3320
2.09M
                   (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
3321
184k
                {
3322
184k
                    continue;
3323
184k
                }
3324
7.15M
            }
3325
3326
            /* call reference filtering */
3327
25.6M
            ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
3328
25.6M
                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
3329
25.6M
                trans_size,
3330
25.6M
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3331
25.6M
                curr_pred_mode,
3332
25.6M
                ps_ctxt->i1_strong_intra_smoothing_enable_flag);
3333
3334
            /* use the look up to get the function idx */
3335
25.6M
            luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
3336
3337
            /* call the intra prediction function */
3338
25.6M
            ps_ctxt->apf_lum_ip[luma_pred_func_idx](
3339
25.6M
                (UWORD8 *)ps_ctxt->pv_ref_filt_out,
3340
25.6M
                1,
3341
25.6M
                pu1_pred,
3342
25.6M
                pred_strd,
3343
25.6M
                trans_size,
3344
25.6M
                curr_pred_mode);
3345
3346
            /* populate the coeffs scan idx */
3347
25.6M
            ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
3348
3349
            /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
3350
25.6M
            if(trans_size < 16)
3351
19.2M
            {
3352
                /* for modes from 22 upto 30 horizontal scan is used */
3353
19.2M
                if((curr_pred_mode > 21) && (curr_pred_mode < 31))
3354
4.96M
                {
3355
4.96M
                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
3356
4.96M
                }
3357
                /* for modes from 6 upto 14 horizontal scan is used */
3358
14.2M
                else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
3359
4.48M
                {
3360
4.48M
                    ps_ctxt->i4_scan_idx = SCAN_VERT;
3361
4.48M
                }
3362
19.2M
            }
3363
3364
            /* RDOPT copy States :  TU init (best until prev TU) to current */
3365
25.6M
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3366
25.6M
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3367
25.6M
                        .s_cabac_ctxt.au1_ctxt_models[0] +
3368
25.6M
                    IHEVC_CAB_COEFFX_PREFIX,
3369
25.6M
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3370
25.6M
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3371
3372
25.6M
            i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
3373
25.6M
            i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
3374
3375
#if DISABLE_RDOQ_INTRA
3376
            i4_perform_rdoq = 0;
3377
#endif
3378
3379
            /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
3380
            /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
3381
            /* Currently the complete array will contain only single value*/
3382
            /*The rounding factor is calculated with the formula
3383
            Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
3384
            rounding factor = (1 - DeadZone Val)
3385
3386
            Assumption: Cabac states of All the sub-blocks in the TU are considered independent
3387
            */
3388
25.6M
            if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
3389
18.7M
            {
3390
18.7M
                if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
3391
0
                {
3392
0
                    double i4_lamda_modifier;
3393
3394
0
                    if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
3395
0
                    {
3396
0
                        i4_lamda_modifier =
3397
0
                            ps_ctxt->i4_lamda_modifier *
3398
0
                            CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3399
0
                    }
3400
0
                    else
3401
0
                    {
3402
0
                        i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
3403
0
                    }
3404
0
                    if(ps_ctxt->i4_use_const_lamda_modifier)
3405
0
                    {
3406
0
                        if(ISLICE == ps_ctxt->i1_slice_type)
3407
0
                        {
3408
0
                            i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3409
0
                        }
3410
0
                        else
3411
0
                        {
3412
0
                            i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
3413
0
                        }
3414
0
                    }
3415
3416
0
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3417
0
                        &ps_ctxt->i4_quant_round_tu[0][0];
3418
0
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3419
0
                        &ps_ctxt->i4_quant_round_tu[1][0];
3420
3421
0
                    memset(
3422
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3423
0
                        0,
3424
0
                        trans_size * trans_size * sizeof(WORD32));
3425
0
                    memset(
3426
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3427
0
                        0,
3428
0
                        trans_size * trans_size * sizeof(WORD32));
3429
3430
0
                    ihevce_quant_rounding_factor_gen(
3431
0
                        trans_size,
3432
0
                        1,
3433
0
                        &ps_ctxt->s_rdopt_entropy_ctxt,
3434
0
                        ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
3435
0
                        ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
3436
0
                        i4_lamda_modifier,
3437
0
                        1);
3438
0
                }
3439
18.7M
                else
3440
18.7M
                {
3441
18.7M
                    ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
3442
18.7M
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
3443
18.7M
                    ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
3444
18.7M
                        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
3445
18.7M
                }
3446
18.7M
            }
3447
3448
            /* call T Q IT IQ and recon function */
3449
25.6M
            ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
3450
25.6M
                ps_ctxt,
3451
25.6M
                pu1_pred,
3452
25.6M
                pred_strd,
3453
25.6M
                (UWORD8 *)pv_curr_src,
3454
25.6M
                src_strd,
3455
25.6M
                pi2_curr_deq_data,
3456
25.6M
                curr_deq_data_strd,
3457
25.6M
                pu1_recon,
3458
25.6M
                i4_recon_stride,
3459
25.6M
                pu1_curr_ecd_data,
3460
25.6M
                pu1_csbf_buf,
3461
25.6M
                csbf_strd,
3462
25.6M
                trans_size,
3463
25.6M
                PRED_MODE_INTRA,
3464
25.6M
                &ai8_cand_rdopt_cost[curr_intra_buf_idx],
3465
25.6M
                &ai4_curr_bytes[curr_intra_buf_idx],
3466
25.6M
                &ai4_tu_bits[curr_intra_buf_idx],
3467
25.6M
                &au4_tu_sad[curr_intra_buf_idx],
3468
25.6M
                &ai4_zero_col[curr_intra_buf_idx],
3469
25.6M
                &ai4_zero_row[curr_intra_buf_idx],
3470
25.6M
                &au1_recon_availability[curr_intra_buf_idx],
3471
25.6M
                i4_perform_rdoq,
3472
25.6M
                i4_perform_sbh,
3473
25.6M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3474
25.6M
                i4_alpha_stim_multiplier,
3475
25.6M
                u1_is_cu_noisy,
3476
25.6M
#endif
3477
25.6M
                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
3478
25.6M
                1 /*early_cbf */
3479
25.6M
            );
3480
3481
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
3482
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
3483
            {
3484
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
3485
                ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3486
                    pv_curr_src,
3487
                    src_strd,
3488
                    pu1_pred,
3489
                    pred_strd,
3490
                    ai8_cand_rdopt_cost[curr_intra_buf_idx],
3491
                    i4_alpha_stim_multiplier,
3492
                    trans_size,
3493
                    0,
3494
                    ps_ctxt->u1_enable_psyRDOPT,
3495
                    NULL_PLANE);
3496
#else
3497
                if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
3498
                {
3499
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3500
                        pv_curr_src,
3501
                        src_strd,
3502
                        pu1_recon,
3503
                        i4_recon_stride,
3504
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3505
                        i4_alpha_stim_multiplier,
3506
                        trans_size,
3507
                        0,
3508
                        ps_ctxt->u1_enable_psyRDOPT,
3509
                        NULL_PLANE);
3510
                }
3511
                else
3512
                {
3513
                    ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
3514
                        pv_curr_src,
3515
                        src_strd,
3516
                        pu1_pred,
3517
                        pred_strd,
3518
                        ai8_cand_rdopt_cost[curr_intra_buf_idx],
3519
                        i4_alpha_stim_multiplier,
3520
                        trans_size,
3521
                        0,
3522
                        ps_ctxt->u1_enable_psyRDOPT,
3523
                        NULL_PLANE);
3524
                }
3525
#endif
3526
            }
3527
#endif
3528
3529
25.6M
            if(TU_EQ_SUBCU == func_proc_mode)
3530
10.7M
            {
3531
10.7M
                ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
3532
10.7M
            }
3533
3534
            /* based on CBF/No CBF copy the corresponding state */
3535
25.6M
            if(0 == ai4_cbf[curr_intra_buf_idx])
3536
16.5M
            {
3537
                /* RDOPT copy States :update to init state if 0 cbf */
3538
16.5M
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3539
16.5M
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3540
16.5M
                        IHEVC_CAB_COEFFX_PREFIX,
3541
16.5M
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3542
16.5M
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3543
16.5M
            }
3544
9.12M
            else
3545
9.12M
            {
3546
                /* RDOPT copy States :update to new state only if CBF is non zero */
3547
9.12M
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3548
9.12M
                    &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
3549
9.12M
                        IHEVC_CAB_COEFFX_PREFIX,
3550
9.12M
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
3551
9.12M
                            .s_cabac_ctxt.au1_ctxt_models[0] +
3552
9.12M
                        IHEVC_CAB_COEFFX_PREFIX,
3553
9.12M
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3554
9.12M
            }
3555
3556
            /* call the function which perform intra mode prediction */
3557
25.6M
            ihevce_intra_pred_mode_signaling(
3558
25.6M
                ps_top_nbr_4x4->b6_luma_intra_mode,
3559
25.6M
                ps_tmp_lt_4x4->b6_luma_intra_mode,
3560
25.6M
                s_nbr.u1_top_avail,
3561
25.6M
                s_nbr.u1_left_avail,
3562
25.6M
                cu_pos_y,
3563
25.6M
                curr_pred_mode,
3564
25.6M
                &as_intra_prev_rem[curr_intra_buf_idx]);
3565
            /******************************************************************/
3566
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3567
            The bits for these are evaluated for every RDO mode of current subcu
3568
            as they can significantly contribute to RDO cost.  Note that these
3569
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3570
            are accounted for in encode_cu call later */
3571
3572
            /******************************************************************/
3573
            /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
3574
            The bits for these are evaluated for every RDO mode of current subcu
3575
            as they can significantly contribute to RDO cost.  Note that these
3576
            bits are not accounted for here (ai8_cand_rdopt_cost) as they
3577
            are accounted for in encode_cu call later */
3578
3579
            /* Estimate bits to encode prev rem flag  for NXN mode */
3580
25.6M
            {
3581
25.6M
                WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
3582
25.6M
                    [u1_prev_flag_cabac_ctxt ^
3583
25.6M
                     as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3584
3585
                /* rounding the fractional bits to nearest integer */
3586
25.6M
                bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
3587
25.6M
            }
3588
3589
            /* based on prev flag all the mpmidx bits and rem bits */
3590
25.6M
            if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
3591
18.5M
            {
3592
                /* mpm_idx */
3593
18.5M
                bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
3594
18.5M
            }
3595
7.15M
            else
3596
7.15M
            {
3597
                /* rem intra mode */
3598
7.15M
                bits += 5;
3599
7.15M
            }
3600
3601
25.6M
            bits += ai4_tu_bits[curr_intra_buf_idx];
3602
3603
            /* compute the total cost for current candidate */
3604
25.6M
            curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
3605
3606
            /* get the final ssd cost */
3607
25.6M
            curr_cost +=
3608
25.6M
                COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3609
3610
            /* check of the best candidate cost */
3611
25.6M
            if(curr_cost < best_cand_cost)
3612
18.3M
            {
3613
18.3M
                best_cand_cost = curr_cost;
3614
18.3M
                best_cand_idx = cand_ctr;
3615
18.3M
                best_intra_buf_idx = curr_intra_buf_idx;
3616
18.3M
                curr_intra_buf_idx = !curr_intra_buf_idx;
3617
18.3M
            }
3618
25.6M
        }
3619
3620
        /***************    For TU_EQ_SUBCU case    *****************/
3621
        /* Copy the pred for best cand. to the final pred array     */
3622
        /* Copy the iq-coeff for best cand. to the final array      */
3623
        /* copy the best coeffs data to final buffer                */
3624
17.0M
        if(TU_EQ_SUBCU == func_proc_mode)
3625
2.11M
        {
3626
            /* Copy the pred for best cand. to the final pred array */
3627
3628
2.11M
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3629
2.11M
                (UWORD8 *)pv_pred_org,
3630
2.11M
                pred_strd_org,
3631
2.11M
                &au1_cur_pred_data[best_intra_buf_idx][0],
3632
2.11M
                trans_size,
3633
2.11M
                trans_size,
3634
2.11M
                trans_size);
3635
3636
            /* Copy the deq-coeff for best cand. to the final array */
3637
3638
2.11M
            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3639
2.11M
                (UWORD8 *)pi2_deq_data,
3640
2.11M
                deq_data_strd << 1,
3641
2.11M
                (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
3642
2.11M
                trans_size << 1,
3643
2.11M
                trans_size << 1,
3644
2.11M
                trans_size);
3645
            /* copy the coeffs to final cu ecd bytes buffer */
3646
2.11M
            memcpy(
3647
2.11M
                pu1_ecd_data,
3648
2.11M
                &au1_intra_coeffs[best_intra_buf_idx][0],
3649
2.11M
                ai4_curr_bytes[best_intra_buf_idx]);
3650
3651
2.11M
            pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
3652
2.11M
                        (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
3653
2.11M
        }
3654
3655
        /*----------   Calculate Recon for the best INTRA mode     ---------*/
3656
        /* TU_EQ_CU case : No need for recon, otherwise recon is required   */
3657
        /* Compute recon only for the best mode for TU_EQ_SUBCU case        */
3658
17.0M
        if(u1_compute_recon)
3659
5.40M
        {
3660
5.40M
            ihevce_it_recon_fxn(
3661
5.40M
                ps_ctxt,
3662
5.40M
                pi2_deq_data,
3663
5.40M
                deq_data_strd,
3664
5.40M
                (UWORD8 *)pv_pred_org,
3665
5.40M
                pred_strd_org,
3666
5.40M
                pu1_recon,
3667
5.40M
                i4_recon_stride,
3668
5.40M
                pu1_ecd_data,
3669
5.40M
                trans_size,
3670
5.40M
                PRED_MODE_INTRA,
3671
5.40M
                ai4_cbf[best_intra_buf_idx],
3672
5.40M
                ai4_zero_col[best_intra_buf_idx],
3673
5.40M
                ai4_zero_row[best_intra_buf_idx]);
3674
3675
5.40M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3676
5.40M
        }
3677
11.6M
        else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
3678
6.80M
        {
3679
6.80M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
3680
6.80M
        }
3681
4.82M
        else
3682
4.82M
        {
3683
4.82M
            ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
3684
4.82M
        }
3685
3686
        /* RDOPT copy States :update to best modes state */
3687
17.0M
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
3688
17.0M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
3689
17.0M
            &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
3690
17.0M
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
3691
3692
        /* copy the prev,mpm_idx and rem modes from best cand */
3693
17.0M
        ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
3694
3695
        /* update the cabac context of prev intra pred mode flag */
3696
17.0M
        u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
3697
17.0M
            [(u1_prev_flag_cabac_ctxt << 1) |
3698
17.0M
             as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
3699
3700
        /* accumulate the TU bits into cu bits */
3701
17.0M
        cu_bits += ai4_tu_bits[best_intra_buf_idx];
3702
3703
        /* copy the intra pred mode for chroma reuse */
3704
17.0M
        if(is_sub_pu_in_hq == 0)
3705
17.0M
        {
3706
17.0M
            *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
3707
17.0M
        }
3708
0
        else
3709
0
        {
3710
0
            *pu1_intra_pred_mode = best_cand_idx;
3711
0
        }
3712
3713
        /* Store luma mode as chroma mode. If chroma prcs happens, and
3714
        if a diff. mode wins, it should update this!! */
3715
17.0M
        if(1 == chrm_present_flag)
3716
13.8M
        {
3717
13.8M
            if(is_sub_pu_in_hq == 0)
3718
13.8M
            {
3719
13.8M
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3720
13.8M
                    ((ps_ctxt->u1_chroma_array_type == 2)
3721
13.8M
                         ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
3722
13.8M
                         : pu1_curr_mode[best_cand_idx]);
3723
13.8M
            }
3724
0
            else
3725
0
            {
3726
0
                ps_final_prms->u1_chroma_intra_pred_actual_mode =
3727
0
                    ((ps_ctxt->u1_chroma_array_type == 2)
3728
0
                         ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
3729
0
                         : best_cand_idx);
3730
0
            }
3731
3732
13.8M
            ps_final_prms->u1_chroma_intra_pred_mode = 4;
3733
13.8M
        }
3734
3735
        /*remember the cbf flag to replicate qp for 4x4 neighbour*/
3736
17.0M
        ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
3737
3738
        /*accumulate ssd over all TU of intra CU*/
3739
17.0M
        ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
3740
3741
        /* update the bytes */
3742
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3743
17.0M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
3744
17.0M
            ai4_curr_bytes[best_intra_buf_idx];
3745
        /* update the zero_row and col info for the final mode */
3746
17.0M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
3747
17.0M
            ai4_zero_col[best_intra_buf_idx];
3748
17.0M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
3749
17.0M
            ai4_zero_row[best_intra_buf_idx];
3750
3751
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
3752
3753
        /* update the total bytes cons */
3754
17.0M
        ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
3755
17.0M
        pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
3756
3757
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3758
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
3759
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
3760
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
3761
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
3762
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
3763
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
3764
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
3765
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
3766
17.0M
        GETRANGE(tx_size, trans_size);
3767
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
3768
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
3769
17.0M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
3770
3771
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
3772
17.0M
        ps_nbr_4x4->b1_skip_flag = 0;
3773
17.0M
        ps_nbr_4x4->b1_intra_flag = 1;
3774
17.0M
        ps_nbr_4x4->b1_pred_l0_flag = 0;
3775
17.0M
        ps_nbr_4x4->b1_pred_l1_flag = 0;
3776
3777
17.0M
        if(is_sub_pu_in_hq == 0)
3778
17.0M
        {
3779
17.0M
            ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
3780
17.0M
        }
3781
0
        else
3782
0
        {
3783
0
            ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
3784
0
        }
3785
3786
17.0M
        ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
3787
3788
        /* since tu size can be less than cusize, replication is done with strd */
3789
17.0M
        {
3790
17.0M
            WORD32 i, j;
3791
17.0M
            nbr_4x4_t *ps_tmp_4x4;
3792
3793
17.0M
            ps_tmp_4x4 = ps_nbr_4x4;
3794
3795
66.7M
            for(i = 0; i < num_4x4_in_tu; i++)
3796
49.7M
            {
3797
267M
                for(j = 0; j < num_4x4_in_tu; j++)
3798
217M
                {
3799
217M
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
3800
217M
                }
3801
                /* row level update*/
3802
49.7M
                ps_tmp_4x4 += num_4x4_in_cu;
3803
49.7M
            }
3804
17.0M
        }
3805
3806
17.0M
        if(TU_EQ_SUBCU == func_proc_mode)
3807
2.11M
        {
3808
2.11M
            pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
3809
2.11M
        }
3810
3811
17.0M
        if((num_cu_parts > 1) && (ctr < 3))
3812
8.38M
        {
3813
            /* set the neighbour map to 1 */
3814
8.38M
            ihevce_set_nbr_map(
3815
8.38M
                ps_ctxt->pu1_ctb_nbr_map,
3816
8.38M
                ps_ctxt->i4_nbr_map_strd,
3817
8.38M
                cu_pos_x,
3818
8.38M
                cu_pos_y,
3819
8.38M
                trans_size >> 2,
3820
8.38M
                1);
3821
3822
            /* block level updates block number (1 & 3 )*/
3823
8.38M
            pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
3824
8.38M
            pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
3825
8.38M
            pi2_deq_data += trans_size;
3826
3827
8.38M
            switch(ctr)
3828
8.38M
            {
3829
2.85M
            case 0:
3830
2.85M
            {
3831
2.85M
                pu1_left = pu1_recon + trans_size - 1;
3832
2.85M
                pu1_top += trans_size;
3833
2.85M
                pu1_top_left = pu1_top - 1;
3834
2.85M
                left_strd = i4_recon_stride;
3835
3836
2.85M
                break;
3837
0
            }
3838
2.78M
            case 1:
3839
2.78M
            {
3840
2.78M
                ASSERT(
3841
2.78M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
3842
2.78M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
3843
3844
                /* Since the 'lumaRefSubstitution' function expects both Top and */
3845
                /* TopRight recon pixels to be present in the same buffer */
3846
2.78M
                if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
3847
2.78M
                   ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
3848
156k
                {
3849
156k
                    UWORD8 *pu1_src =
3850
156k
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3851
156k
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3852
156k
                        trans_size;
3853
156k
                    UWORD8 *pu1_dst =
3854
156k
                        ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3855
156k
                             [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3856
156k
                        trans_size;
3857
3858
156k
                    ps_ctxt->s_cmn_opt_func.pf_copy_2d(
3859
156k
                        pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
3860
3861
156k
                    ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
3862
156k
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
3863
156k
                }
3864
3865
2.78M
                pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
3866
2.78M
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3867
2.78M
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
3868
2.78M
                          (trans_size - 1) * i4_recon_stride;
3869
2.78M
                pu1_top_left = pu1_left - cu_left_stride;
3870
2.78M
                left_strd = cu_left_stride;
3871
3872
2.78M
                break;
3873
2.78M
            }
3874
2.75M
            case 2:
3875
2.75M
            {
3876
2.75M
                ASSERT(
3877
2.75M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
3878
2.75M
                    (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
3879
3880
2.75M
                pu1_left = pu1_recon + trans_size - 1;
3881
2.75M
                pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
3882
2.75M
                               [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
3883
2.75M
                          (trans_size - 1) * i4_recon_stride + trans_size;
3884
2.75M
                pu1_top_left = pu1_top - 1;
3885
2.75M
                left_strd = i4_recon_stride;
3886
3887
2.75M
                break;
3888
2.75M
            }
3889
8.38M
            }
3890
3891
8.38M
            pu1_csbf_buf += num_4x4_in_tu;
3892
8.38M
            cu_pos_x += num_4x4_in_tu;
3893
8.38M
            ps_nbr_4x4 += num_4x4_in_tu;
3894
8.38M
            ps_top_nbr_4x4 += num_4x4_in_tu;
3895
8.38M
            ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
3896
3897
8.38M
            pu1_intra_pred_mode++;
3898
3899
            /* after 2 blocks increment the pointers to bottom blocks */
3900
8.38M
            if(1 == ctr)
3901
2.78M
            {
3902
2.78M
                pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
3903
2.78M
                pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
3904
3905
2.78M
                pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
3906
2.78M
                pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
3907
2.78M
                pi2_deq_data -= (trans_size << 1);
3908
2.78M
                pi2_deq_data += (trans_size * deq_data_strd);
3909
3910
2.78M
                pu1_csbf_buf -= (num_4x4_in_tu << 1);
3911
2.78M
                pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
3912
3913
2.78M
                ps_nbr_4x4 -= (num_4x4_in_tu << 1);
3914
2.78M
                ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
3915
2.78M
                ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
3916
2.78M
                ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
3917
3918
                /* decrement pos x to start */
3919
2.78M
                cu_pos_x -= (num_4x4_in_tu << 1);
3920
2.78M
                cu_pos_y += num_4x4_in_tu;
3921
2.78M
            }
3922
8.38M
        }
3923
3924
17.0M
#if RDOPT_ENABLE
3925
        /* compute the RDOPT cost for the current TU */
3926
17.0M
        ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
3927
17.0M
            ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
3928
17.0M
#endif
3929
3930
        /* accumulate the costs */
3931
17.0M
        total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
3932
3933
17.0M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
3934
17.0M
        {
3935
            /* Early exit : If the current running cost exceeds
3936
            the prev. best mode cost, break */
3937
17.0M
            if(total_rdopt_cost > prev_best_rdopt_cost)
3938
853k
            {
3939
853k
                return (total_rdopt_cost);
3940
853k
            }
3941
17.0M
        }
3942
3943
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
3944
16.1M
        chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
3945
3946
16.1M
        pu4_nbr_flags++;
3947
16.1M
    }
3948
    /* Modify the cost function for this CU. */
3949
    /* loop in for 8x8 blocks */
3950
7.94M
    if(ps_ctxt->u1_enable_psyRDOPT)
3951
0
    {
3952
0
        UWORD8 *pu1_recon_cu;
3953
0
        WORD32 recon_stride;
3954
0
        WORD32 curr_pos_x;
3955
0
        WORD32 curr_pos_y;
3956
0
        WORD32 start_index;
3957
0
        WORD32 num_horz_cu_in_ctb;
3958
0
        WORD32 cu_size;
3959
0
        WORD32 had_block_size;
3960
3961
        /* tODO: sreenivasa ctb size has to be used appropriately */
3962
0
        had_block_size = 8;
3963
0
        cu_size = ps_cu_analyse->u1_cu_size; /* todo */
3964
0
        num_horz_cu_in_ctb = 64 / had_block_size;
3965
3966
0
        curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
3967
0
        curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
3968
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
3969
0
        pu1_recon_cu =
3970
0
            ((UWORD8 *)ps_final_prms->s_recon_datastore
3971
0
                 .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
3972
        /* + \  curr_pos_x + curr_pos_y * recon_stride; */
3973
3974
        /* start index to index the source satd of curr cu int he current ctb*/
3975
0
        start_index =
3976
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
3977
3978
0
        {
3979
0
            total_rdopt_cost += ihevce_psy_rd_cost(
3980
0
                ps_ctxt->ai4_source_satd_8x8,
3981
0
                pu1_recon_cu,
3982
0
                recon_stride,
3983
0
                1,  //
3984
0
                cu_size,
3985
0
                0,  // pic type
3986
0
                0,  //layer id
3987
0
                ps_ctxt->i4_satd_lamda,  // lambda
3988
0
                start_index,
3989
0
                ps_ctxt->u1_is_input_data_hbd,
3990
0
                ps_ctxt->u4_psy_strength,
3991
0
                &ps_ctxt->s_cmn_opt_func
3992
3993
0
            );  // 8 bit
3994
0
        }
3995
0
    }
3996
3997
#if !FORCE_INTRA_TU_DEPTH_TO_0  //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
3998
7.94M
    if(TU_EQ_SUBCU == func_proc_mode)
3999
513k
    {
4000
513k
        UWORD8 au1_tu_eq_cu_div2_modes[4];
4001
513k
        UWORD8 au1_freq_of_mode[4];
4002
4003
513k
        WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
4004
513k
            ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
4005
4006
513k
        if(1 == i4_num_clusters)
4007
120k
        {
4008
120k
            ps_final_prms->u2_num_pus_in_cu = 1;
4009
120k
            ps_final_prms->u1_part_mode = SIZE_2Nx2N;
4010
120k
        }
4011
513k
    }
4012
7.94M
#endif
4013
4014
    /* store the num TUs*/
4015
7.94M
    ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
4016
4017
    /* update the bytes consumed */
4018
7.94M
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4019
4020
    /* store the current cu size to final prms */
4021
7.94M
    ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
4022
4023
    /* cu bits will be having luma residual bits till this point    */
4024
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4025
7.94M
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4026
4027
    /* ------------- Chroma processing -------------- */
4028
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4029
7.94M
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4030
6.68M
    {
4031
6.68M
        LWORD64 chrm_rdopt_cost;
4032
6.68M
        WORD32 chrm_rdopt_tu_bits;
4033
4034
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4035
6.68M
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4036
4037
6.68M
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4038
6.68M
            ps_ctxt,
4039
6.68M
            curr_buf_idx,
4040
6.68M
            func_proc_mode,
4041
6.68M
            ps_chrm_cu_buf_prms->pu1_curr_src,
4042
6.68M
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4043
6.68M
            ps_chrm_cu_buf_prms->pu1_cu_left,
4044
6.68M
            ps_chrm_cu_buf_prms->pu1_cu_top,
4045
6.68M
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4046
6.68M
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4047
6.68M
            cu_pos_x_8pelunits,
4048
6.68M
            cu_pos_y_8pelunits,
4049
6.68M
            &chrm_rdopt_tu_bits,
4050
6.68M
            i4_alpha_stim_multiplier,
4051
6.68M
            u1_is_cu_noisy);
4052
4053
6.68M
#if WEIGH_CHROMA_COST
4054
6.68M
        chrm_rdopt_cost = (LWORD64)(
4055
6.68M
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4056
6.68M
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4057
6.68M
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4058
6.68M
#endif
4059
4060
6.68M
#if CHROMA_RDOPT_ENABLE
4061
6.68M
        total_rdopt_cost += chrm_rdopt_cost;
4062
6.68M
#endif
4063
6.68M
        cu_bits += chrm_rdopt_tu_bits;
4064
4065
        /* cu bits for chroma residual if chroma rdopt is on       */
4066
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4067
6.68M
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4068
4069
6.68M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4070
6.68M
        {
4071
            /* Early exit : If the current running cost exceeds
4072
            the prev. best mode cost, break */
4073
6.68M
            if(total_rdopt_cost > prev_best_rdopt_cost)
4074
650k
            {
4075
650k
                return (total_rdopt_cost);
4076
650k
            }
4077
6.68M
        }
4078
6.68M
    }
4079
1.26M
    else
4080
1.26M
    {}
4081
4082
    /* RDOPT copy States :  Best after all luma TUs to current */
4083
7.29M
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4084
7.29M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4085
7.29M
                .s_cabac_ctxt.au1_ctxt_models[0] +
4086
7.29M
            IHEVC_CAB_COEFFX_PREFIX,
4087
7.29M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4088
7.29M
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4089
4090
    /* get the neighbour availability flags for current cu  */
4091
7.29M
    ihevce_get_only_nbr_flag(
4092
7.29M
        &s_nbr,
4093
7.29M
        ps_ctxt->pu1_ctb_nbr_map,
4094
7.29M
        ps_ctxt->i4_nbr_map_strd,
4095
7.29M
        (cu_pos_x_8pelunits << 1),
4096
7.29M
        (cu_pos_y_8pelunits << 1),
4097
7.29M
        (trans_size << 1),
4098
7.29M
        (trans_size << 1));
4099
4100
    /* call the entropy rdo encode to get the bit estimate for current cu */
4101
    /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
4102
7.29M
    {
4103
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4104
7.29M
        WORD32 cbf_bits, header_bits;
4105
4106
7.29M
        header_bits = ihevce_entropy_rdo_encode_cu(
4107
7.29M
            &ps_ctxt->s_rdopt_entropy_ctxt,
4108
7.29M
            ps_final_prms,
4109
7.29M
            cu_pos_x_8pelunits,
4110
7.29M
            cu_pos_y_8pelunits,
4111
7.29M
            ps_cu_analyse->u1_cu_size,
4112
7.29M
            s_nbr.u1_top_avail,
4113
7.29M
            s_nbr.u1_left_avail,
4114
7.29M
            &ps_final_prms->pu1_cu_coeffs[0],
4115
7.29M
            &cbf_bits);
4116
4117
7.29M
        cu_bits += header_bits;
4118
4119
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4120
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4121
7.29M
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4122
7.29M
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4123
4124
7.29M
#if RDOPT_ENABLE
4125
        /* add the cost of coding the cu bits */
4126
7.29M
        total_rdopt_cost +=
4127
7.29M
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4128
7.29M
#endif
4129
7.29M
    }
4130
7.29M
    return (total_rdopt_cost);
4131
7.94M
}
4132
/*!
4133
******************************************************************************
4134
* \if Function name : ihevce_inter_rdopt_cu_ntu \endif
4135
*
4136
* \brief
4137
*    Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
4138
*
4139
* \param[in] ps_ctxt       enc_loop module ctxt pointer
4140
* \param[in] ps_inter_cand pointer to inter candidate structure
4141
* \param[in] pu1_src       pointer to source data buffer
4142
* \param[in] cu_size       Current CU size
4143
* \param[in] cu_pos_x      cu position x w.r.t to ctb
4144
* \param[in] cu_pos_y      cu position y w.r.t to ctb
4145
* \param[in] src_strd      source buffer stride
4146
* \param[in] curr_buf_idx  buffer index for current output storage
4147
* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
4148
*
4149
* \return
4150
*    Rdopt cost
4151
*
4152
* \author
4153
*  Ittiam
4154
*
4155
*****************************************************************************
4156
*/
4157
LWORD64 ihevce_inter_rdopt_cu_ntu(
4158
    ihevce_enc_loop_ctxt_t *ps_ctxt,
4159
    enc_loop_cu_prms_t *ps_cu_prms,
4160
    void *pv_src,
4161
    WORD32 cu_size,
4162
    WORD32 cu_pos_x,
4163
    WORD32 cu_pos_y,
4164
    WORD32 curr_buf_idx,
4165
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
4166
    cu_inter_cand_t *ps_inter_cand,
4167
    cu_analyse_t *ps_cu_analyse,
4168
    WORD32 i4_alpha_stim_multiplier)
4169
797k
{
4170
797k
    enc_loop_cu_final_prms_t *ps_final_prms;
4171
797k
    nbr_4x4_t *ps_nbr_4x4;
4172
797k
    tu_prms_t s_tu_prms[64 * 4];
4173
797k
    tu_prms_t *ps_tu_prms;
4174
4175
797k
    WORD32 i4_perform_rdoq;
4176
797k
    WORD32 i4_perform_sbh;
4177
797k
    WORD32 ai4_tu_split_flags[4];
4178
797k
    WORD32 ai4_tu_early_cbf[4];
4179
797k
    WORD32 num_split_flags = 1;
4180
797k
    WORD32 i;
4181
797k
    UWORD8 u1_tu_size;
4182
797k
    UWORD8 *pu1_pred;
4183
797k
    UWORD8 *pu1_ecd_data;
4184
797k
    WORD16 *pi2_deq_data;
4185
797k
    UWORD8 *pu1_csbf_buf;
4186
797k
    UWORD8 *pu1_tu_sz_sft;
4187
797k
    UWORD8 *pu1_tu_posx;
4188
797k
    UWORD8 *pu1_tu_posy;
4189
797k
    LWORD64 total_rdopt_cost;
4190
797k
    WORD32 ctr;
4191
797k
    WORD32 chrm_ctr;
4192
797k
    WORD32 num_tu_in_cu = 0;
4193
797k
    WORD32 pred_stride;
4194
797k
    WORD32 recon_stride;
4195
797k
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
4196
797k
    WORD32 csbf_strd;
4197
797k
    WORD32 chrm_present_flag;
4198
797k
    WORD32 ecd_data_bytes_cons;
4199
797k
    WORD32 num_4x4_in_cu;
4200
797k
    WORD32 num_4x4_in_tu;
4201
797k
    WORD32 recon_func_mode;
4202
797k
    WORD32 cu_bits;
4203
797k
    UWORD8 u1_compute_spatial_ssd;
4204
4205
    /* min_trans_size is initialized to some huge number than usual TU sizes */
4206
797k
    WORD32 i4_min_trans_size = 256;
4207
    /* Get the RDOPT cost of the best CU mode for early_exit */
4208
797k
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
4209
797k
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
4210
4211
    /* model for no residue syntax qt root cbf flag */
4212
797k
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
4213
4214
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4215
797k
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
4216
4217
    /* for skip cases tables are not reqquired */
4218
797k
    UWORD8 u1_skip_tu_sz_sft = 0;
4219
797k
    UWORD8 u1_skip_tu_posx = 0;
4220
797k
    UWORD8 u1_skip_tu_posy = 0;
4221
797k
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
4222
4223
    /* get the pointers based on curbuf idx */
4224
797k
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
4225
797k
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
4226
797k
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
4227
797k
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
4228
797k
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
4229
797k
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
4230
4231
797k
    pred_stride = ps_inter_cand->i4_pred_data_stride;
4232
797k
    recon_stride = cu_size;
4233
797k
    pu1_pred = ps_inter_cand->pu1_pred_data;
4234
797k
    chrm_ctr = 0;
4235
797k
    ecd_data_bytes_cons = 0;
4236
797k
    total_rdopt_cost = 0;
4237
797k
    num_4x4_in_cu = cu_size >> 2;
4238
797k
    recon_func_mode = PRED_MODE_INTER;
4239
797k
    cu_bits = 0;
4240
4241
    /* get the 4x4 level postion of current cu */
4242
797k
    cu_pos_x = cu_pos_x << 1;
4243
797k
    cu_pos_y = cu_pos_y << 1;
4244
4245
    /* default value for cu coded flag */
4246
797k
    ps_final_prms->u1_is_cu_coded = 0;
4247
4248
    /*init of ssd of CU accuumulated over all TU*/
4249
797k
    ps_final_prms->u4_cu_sad = 0;
4250
4251
    /* populate the coeffs scan idx */
4252
797k
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
4253
4254
797k
#if ENABLE_INTER_ZCU_COST
4255
    /* reset cu not coded cost */
4256
797k
    ps_ctxt->i8_cu_not_coded_cost = 0;
4257
4258
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
4259
797k
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
4260
797k
#endif
4261
4262
797k
    if(ps_cu_analyse->u1_cu_size == 64)
4263
25.3k
    {
4264
25.3k
        num_split_flags = 4;
4265
25.3k
        u1_tu_size = 32;
4266
25.3k
    }
4267
772k
    else
4268
772k
    {
4269
772k
        num_split_flags = 1;
4270
772k
        u1_tu_size = ps_cu_analyse->u1_cu_size;
4271
772k
    }
4272
4273
    /* ckeck for skip mode */
4274
797k
    if(1 == ps_final_prms->u1_skip_flag)
4275
271k
    {
4276
271k
        if(64 == cu_size)
4277
8.54k
        {
4278
            /* TU = CU/2 is set but no trnaform is evaluated  */
4279
8.54k
            num_tu_in_cu = 4;
4280
8.54k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4281
8.54k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4282
8.54k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4283
8.54k
        }
4284
262k
        else
4285
262k
        {
4286
            /* TU = CU is set but no trnaform is evaluated  */
4287
262k
            num_tu_in_cu = 1;
4288
262k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4289
262k
            pu1_tu_posx = &u1_skip_tu_posx;
4290
262k
            pu1_tu_posy = &u1_skip_tu_posy;
4291
262k
        }
4292
4293
271k
        recon_func_mode = PRED_MODE_SKIP;
4294
271k
    }
4295
    /* check for PU part mode being AMP or No AMP */
4296
526k
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
4297
469k
    {
4298
469k
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
4299
452k
        {
4300
            /* TU= CU is evaluated 2Nx2N inter case */
4301
452k
            num_tu_in_cu = 1;
4302
452k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
4303
452k
            pu1_tu_posx = &u1_skip_tu_posx;
4304
452k
            pu1_tu_posy = &u1_skip_tu_posy;
4305
452k
        }
4306
16.5k
        else
4307
16.5k
        {
4308
            /* currently TU= CU/2 is evaluated for all inter case */
4309
16.5k
            num_tu_in_cu = 4;
4310
16.5k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
4311
16.5k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
4312
16.5k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
4313
16.5k
        }
4314
469k
    }
4315
56.8k
    else
4316
56.8k
    {
4317
        /* for AMP cases one level of TU recurssion is done */
4318
        /* based on oreintation of the partitions           */
4319
56.8k
        num_tu_in_cu = 10;
4320
56.8k
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4321
56.8k
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4322
56.8k
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
4323
56.8k
    }
4324
4325
797k
    ps_tu_prms = &s_tu_prms[0];
4326
797k
    num_tu_in_cu = 0;
4327
4328
1.67M
    for(i = 0; i < num_split_flags; i++)
4329
873k
    {
4330
873k
        WORD32 i4_x_off = 0, i4_y_off = 0;
4331
4332
873k
        if(i == 1 || i == 3)
4333
50.7k
        {
4334
50.7k
            i4_x_off = 32;
4335
50.7k
        }
4336
4337
873k
        if(i == 2 || i == 3)
4338
50.7k
        {
4339
50.7k
            i4_y_off = 32;
4340
50.7k
        }
4341
4342
873k
        if(1 == ps_final_prms->u1_skip_flag)
4343
297k
        {
4344
297k
            ai4_tu_split_flags[0] = 0;
4345
297k
            ps_inter_cand->ai4_tu_split_flag[i] = 0;
4346
4347
297k
            ai4_tu_early_cbf[0] = 0;
4348
297k
        }
4349
576k
        else
4350
576k
        {
4351
576k
            ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
4352
576k
            ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
4353
576k
        }
4354
4355
873k
        ps_tu_prms->u1_tu_size = u1_tu_size;
4356
4357
873k
        ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
4358
873k
            ps_tu_prms,
4359
873k
            &num_tu_in_cu,
4360
873k
            0,
4361
873k
            ai4_tu_split_flags[0],
4362
873k
            ai4_tu_early_cbf[0],
4363
873k
            i4_x_off,
4364
873k
            i4_y_off);
4365
873k
    }
4366
4367
    /* loop for all tu blocks in current cu */
4368
797k
    ps_tu_prms = &s_tu_prms[0];
4369
2.34M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4370
1.54M
    {
4371
1.54M
        trans_size = ps_tu_prms->u1_tu_size;
4372
4373
1.54M
        if(i4_min_trans_size > trans_size)
4374
804k
        {
4375
804k
            i4_min_trans_size = trans_size;
4376
804k
        }
4377
1.54M
        ps_tu_prms++;
4378
1.54M
    }
4379
4380
797k
    if(ps_ctxt->i1_cu_qp_delta_enable)
4381
178k
    {
4382
178k
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
4383
178k
    }
4384
4385
797k
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
4386
0
    {
4387
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
4388
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
4389
0
             100.0f);
4390
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
4391
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
4392
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
4393
0
    }
4394
4395
797k
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
4396
549k
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
4397
99.9k
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4398
4399
797k
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
4400
0
    {
4401
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
4402
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
4403
0
    }
4404
4405
797k
    if(!u1_compute_spatial_ssd)
4406
697k
    {
4407
697k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4408
697k
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4409
697k
    }
4410
99.9k
    else
4411
99.9k
    {
4412
99.9k
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
4413
99.9k
    }
4414
4415
797k
    ps_tu_prms = &s_tu_prms[0];
4416
4417
797k
    ASSERT(num_tu_in_cu <= 256);
4418
4419
    /* RDOPT copy States :  TU init (best until prev TU) to current */
4420
797k
    memcpy(
4421
797k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4422
797k
             .s_cabac_ctxt.au1_ctxt_models[0],
4423
797k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
4424
797k
        IHEVC_CAB_COEFFX_PREFIX);
4425
4426
2.25M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
4427
1.53M
    {
4428
1.53M
        WORD32 curr_bytes;
4429
1.53M
        WORD32 tx_size;
4430
1.53M
        WORD32 cbf, zero_col, zero_row;
4431
1.53M
        LWORD64 rdopt_cost;
4432
1.53M
        UWORD8 u1_is_recon_available;
4433
4434
1.53M
        WORD32 curr_pos_x;
4435
1.53M
        WORD32 curr_pos_y;
4436
1.53M
        nbr_4x4_t *ps_cur_nbr_4x4;
4437
1.53M
        UWORD8 *pu1_cur_pred;
4438
1.53M
        UWORD8 *pu1_cur_src;
4439
1.53M
        UWORD8 *pu1_cur_recon;
4440
1.53M
        WORD16 *pi2_cur_deq_data;
4441
1.53M
        UWORD32 u4_tu_sad;
4442
1.53M
        WORD32 tu_bits;
4443
4444
1.53M
        WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4445
4446
1.53M
        trans_size = ps_tu_prms->u1_tu_size;
4447
        /* get the current pos x and pos y in pixels */
4448
1.53M
        curr_pos_x = ps_tu_prms->u1_x_off;  //((cu_size >> 2) * pu1_tu_posx[ctr]);
4449
1.53M
        curr_pos_y = ps_tu_prms->u1_y_off;  //((cu_size >> 2) * pu1_tu_posy[ctr]);
4450
4451
1.53M
        num_4x4_in_tu = trans_size >> 2;
4452
4453
#if FORCE_8x8_TFR
4454
        if(cu_size == 64)
4455
        {
4456
            curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
4457
            curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
4458
        }
4459
#endif
4460
4461
        /* increment the pointers to start of current TU  */
4462
1.53M
        pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
4463
1.53M
        pu1_cur_src += (curr_pos_y * src_strd);
4464
1.53M
        pu1_cur_pred = (pu1_pred + curr_pos_x);
4465
1.53M
        pu1_cur_pred += (curr_pos_y * pred_stride);
4466
1.53M
        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
4467
1.53M
        pi2_cur_deq_data += (curr_pos_y * cu_size);
4468
1.53M
        pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
4469
1.53M
                        curr_pos_x + curr_pos_y * i4_recon_stride;
4470
4471
1.53M
        ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
4472
1.53M
        ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
4473
4474
        /* RDOPT copy States :  TU init (best until prev TU) to current */
4475
1.53M
        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4476
1.53M
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4477
1.53M
                    .s_cabac_ctxt.au1_ctxt_models[0] +
4478
1.53M
                IHEVC_CAB_COEFFX_PREFIX,
4479
1.53M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4480
1.53M
            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4481
4482
1.53M
        i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
4483
1.53M
        i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
4484
4485
        /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
4486
        /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
4487
        /* Currently the complete array will contain only single value*/
4488
        /*The rounding factor is calculated with the formula
4489
        Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
4490
        rounding factor = (1 - DeadZone Val)
4491
4492
        Assumption: Cabac states of All the sub-blocks in the TU are considered independent
4493
        */
4494
1.53M
        if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
4495
0
        {
4496
0
            double i4_lamda_modifier;
4497
4498
0
            if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
4499
0
            {
4500
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
4501
0
                                    CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
4502
0
            }
4503
0
            else
4504
0
            {
4505
0
                i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
4506
0
            }
4507
0
            if(ps_ctxt->i4_use_const_lamda_modifier)
4508
0
            {
4509
0
                if(ISLICE == ps_ctxt->i1_slice_type)
4510
0
                {
4511
0
                    i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
4512
0
                }
4513
0
                else
4514
0
                {
4515
0
                    i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
4516
0
                }
4517
0
            }
4518
0
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4519
0
                &ps_ctxt->i4_quant_round_tu[0][0];
4520
0
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4521
0
                &ps_ctxt->i4_quant_round_tu[1][0];
4522
4523
0
            memset(
4524
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4525
0
                0,
4526
0
                trans_size * trans_size * sizeof(WORD32));
4527
0
            memset(
4528
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4529
0
                0,
4530
0
                trans_size * trans_size * sizeof(WORD32));
4531
4532
0
            ihevce_quant_rounding_factor_gen(
4533
0
                trans_size,
4534
0
                1,
4535
0
                &ps_ctxt->s_rdopt_entropy_ctxt,
4536
0
                ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
4537
0
                ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
4538
0
                i4_lamda_modifier,
4539
0
                1);
4540
0
        }
4541
1.53M
        else
4542
1.53M
        {
4543
1.53M
            ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
4544
1.53M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
4545
1.53M
            ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
4546
1.53M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
4547
1.53M
        }
4548
4549
        /* call T Q IT IQ and recon function */
4550
1.53M
        cbf = ihevce_t_q_iq_ssd_scan_fxn(
4551
1.53M
            ps_ctxt,
4552
1.53M
            pu1_cur_pred,
4553
1.53M
            pred_stride,
4554
1.53M
            pu1_cur_src,
4555
1.53M
            src_strd,
4556
1.53M
            pi2_cur_deq_data,
4557
1.53M
            cu_size,
4558
1.53M
            pu1_cur_recon,
4559
1.53M
            i4_recon_stride,
4560
1.53M
            pu1_ecd_data,
4561
1.53M
            pu1_csbf_buf,
4562
1.53M
            csbf_strd,
4563
1.53M
            trans_size,
4564
1.53M
            recon_func_mode,
4565
1.53M
            &rdopt_cost,
4566
1.53M
            &curr_bytes,
4567
1.53M
            &tu_bits,
4568
1.53M
            &u4_tu_sad,
4569
1.53M
            &zero_col,
4570
1.53M
            &zero_row,
4571
1.53M
            &u1_is_recon_available,
4572
1.53M
            i4_perform_rdoq,
4573
1.53M
            i4_perform_sbh,
4574
1.53M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4575
1.53M
            i4_alpha_stim_multiplier,
4576
1.53M
            u1_is_cu_noisy,
4577
1.53M
#endif
4578
1.53M
            u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
4579
1.53M
            ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
4580
4581
#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
4582
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
4583
        {
4584
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
4585
            rdopt_cost = ihevce_inject_stim_into_distortion(
4586
                pu1_cur_src,
4587
                src_strd,
4588
                pu1_cur_pred,
4589
                pred_stride,
4590
                rdopt_cost,
4591
                i4_alpha_stim_multiplier,
4592
                trans_size,
4593
                0,
4594
                ps_ctxt->u1_enable_psyRDOPT,
4595
                NULL_PLANE);
4596
#else
4597
            if(u1_compute_spatial_ssd && u1_is_recon_available)
4598
            {
4599
                rdopt_cost = ihevce_inject_stim_into_distortion(
4600
                    pu1_cur_src,
4601
                    src_strd,
4602
                    pu1_cur_recon,
4603
                    i4_recon_stride,
4604
                    rdopt_cost,
4605
                    i4_alpha_stim_multiplier,
4606
                    trans_size,
4607
                    0,
4608
                    NULL_PLANE);
4609
            }
4610
            else
4611
            {
4612
                rdopt_cost = ihevce_inject_stim_into_distortion(
4613
                    pu1_cur_src,
4614
                    src_strd,
4615
                    pu1_cur_pred,
4616
                    pred_stride,
4617
                    rdopt_cost,
4618
                    i4_alpha_stim_multiplier,
4619
                    trans_size,
4620
                    0,
4621
                    ps_ctxt->u1_enable_psyRDOPT,
4622
                    NULL_PLANE);
4623
            }
4624
#endif
4625
        }
4626
#endif
4627
4628
1.53M
        if(u1_compute_spatial_ssd && u1_is_recon_available)
4629
170k
        {
4630
170k
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
4631
170k
        }
4632
1.36M
        else
4633
1.36M
        {
4634
1.36M
            ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
4635
1.36M
        }
4636
4637
        /* accumulate the TU sad into cu sad */
4638
1.53M
        ps_final_prms->u4_cu_sad += u4_tu_sad;
4639
4640
        /* accumulate the TU bits into cu bits */
4641
1.53M
        cu_bits += tu_bits;
4642
4643
        /* inter cu is coded if any of the tu is coded in it */
4644
1.53M
        ps_final_prms->u1_is_cu_coded |= cbf;
4645
4646
        /* call the entropy function to get the bits */
4647
        /* add that to rd opt cost(SSD)              */
4648
4649
        /* update the bytes */
4650
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4651
1.53M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
4652
        /* update the zero_row and col info for the final mode */
4653
1.53M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
4654
1.53M
        ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
4655
4656
        /* update the bytes */
4657
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
4658
4659
        /* update the total bytes cons */
4660
1.53M
        ecd_data_bytes_cons += curr_bytes;
4661
1.53M
        pu1_ecd_data += curr_bytes;
4662
4663
        /* RDOPT copy States :  New updated after curr TU to TU init */
4664
1.53M
        if(0 != cbf)
4665
250k
        {
4666
            /* update to new state only if CBF is non zero */
4667
250k
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4668
250k
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4669
250k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4670
250k
                        .s_cabac_ctxt.au1_ctxt_models[0] +
4671
250k
                    IHEVC_CAB_COEFFX_PREFIX,
4672
250k
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4673
250k
        }
4674
4675
        /* by default chroma present is set to 1*/
4676
1.53M
        chrm_present_flag = 1;
4677
1.53M
        if(4 == trans_size)
4678
457k
        {
4679
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
4680
457k
            if(0 != chrm_ctr)
4681
342k
            {
4682
342k
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
4683
342k
            }
4684
4685
            /* increment the chrm ctr unconditionally */
4686
457k
            chrm_ctr++;
4687
4688
            /* after ctr reached 4 reset it */
4689
457k
            if(4 == chrm_ctr)
4690
114k
            {
4691
114k
                chrm_ctr = 0;
4692
114k
            }
4693
457k
        }
4694
4695
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
4696
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
4697
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
4698
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
4699
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
4700
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
4701
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
4702
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
4703
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
4704
1.53M
        GETRANGE(tx_size, trans_size);
4705
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
4706
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
4707
1.53M
        ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
4708
4709
        /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
4710
1.53M
        ps_cur_nbr_4x4->b1_y_cbf = cbf;
4711
        /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
4712
1.53M
        ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
4713
4714
        /* Qp and cbf are stored for the all 4x4 in TU */
4715
1.53M
        {
4716
1.53M
            WORD32 i, j;
4717
1.53M
            nbr_4x4_t *ps_tmp_4x4;
4718
1.53M
            ps_tmp_4x4 = ps_cur_nbr_4x4;
4719
4720
6.96M
            for(i = 0; i < num_4x4_in_tu; i++)
4721
5.42M
            {
4722
35.5M
                for(j = 0; j < num_4x4_in_tu; j++)
4723
30.1M
                {
4724
30.1M
                    ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
4725
30.1M
                    ps_tmp_4x4[j].b1_y_cbf = cbf;
4726
30.1M
                }
4727
                /* row level update*/
4728
5.42M
                ps_tmp_4x4 += num_4x4_in_cu;
4729
5.42M
            }
4730
1.53M
        }
4731
4732
1.53M
#if RDOPT_ENABLE
4733
        /* compute the rdopt cost */
4734
1.53M
        rdopt_cost +=
4735
1.53M
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4736
1.53M
#endif
4737
        /* accumulate the costs */
4738
1.53M
        total_rdopt_cost += rdopt_cost;
4739
4740
1.53M
        ps_tu_prms++;
4741
4742
1.53M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4743
1.53M
        {
4744
            /* Early exit : If the current running cost exceeds
4745
            the prev. best mode cost, break */
4746
1.53M
            if(total_rdopt_cost > prev_best_rdopt_cost)
4747
80.7k
            {
4748
80.7k
                return (total_rdopt_cost);
4749
80.7k
            }
4750
1.53M
        }
4751
1.53M
    }
4752
4753
    /* Modify the cost function for this CU. */
4754
    /* loop in for 8x8 blocks */
4755
717k
    if(ps_ctxt->u1_enable_psyRDOPT)
4756
0
    {
4757
0
        UWORD8 *pu1_recon_cu;
4758
0
        WORD32 recon_stride;
4759
0
        WORD32 curr_pos_x;
4760
0
        WORD32 curr_pos_y;
4761
0
        WORD32 start_index;
4762
0
        WORD32 num_horz_cu_in_ctb;
4763
0
        WORD32 had_block_size;
4764
4765
        /* tODO: sreenivasa ctb size has to be used appropriately */
4766
0
        had_block_size = 8;
4767
0
        num_horz_cu_in_ctb = 64 / had_block_size;
4768
4769
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
4770
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
4771
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
4772
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
4773
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
4774
        //+ \curr_pos_x + curr_pos_y * recon_stride;
4775
4776
        /* start index to index the source satd of curr cu int he current ctb*/
4777
0
        start_index =
4778
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
4779
4780
0
        {
4781
0
            total_rdopt_cost += ihevce_psy_rd_cost(
4782
0
                ps_ctxt->ai4_source_satd_8x8,
4783
0
                pu1_recon_cu,
4784
0
                recon_stride,
4785
0
                1,  //howz stride
4786
0
                cu_size,
4787
0
                0,  // pic type
4788
0
                0,  //layer id
4789
0
                ps_ctxt->i4_satd_lamda,  // lambda
4790
0
                start_index,
4791
0
                ps_ctxt->u1_is_input_data_hbd,
4792
0
                ps_ctxt->u4_psy_strength,
4793
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
4794
0
        }
4795
0
    }
4796
4797
    /* store the num TUs*/
4798
717k
    ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
4799
4800
    /* update the bytes consumed */
4801
717k
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
4802
4803
    /* store the current cu size to final prms */
4804
717k
    ps_final_prms->u1_cu_size = cu_size;
4805
4806
    /* cu bits will be having luma residual bits till this point    */
4807
    /* if zero_cbf eval is disabled then cu bits will be zero       */
4808
717k
    ps_final_prms->u4_cu_luma_res_bits = cu_bits;
4809
4810
    /* ------------- Chroma processing -------------- */
4811
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
4812
717k
    if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
4813
310k
    {
4814
310k
        LWORD64 chrm_rdopt_cost;
4815
310k
        WORD32 chrm_rdopt_tu_bits;
4816
4817
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
4818
310k
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
4819
4820
310k
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
4821
310k
            ps_ctxt,
4822
310k
            curr_buf_idx,
4823
310k
            0, /* TU mode : Don't care in Inter patrh */
4824
310k
            ps_chrm_cu_buf_prms->pu1_curr_src,
4825
310k
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
4826
310k
            ps_chrm_cu_buf_prms->pu1_cu_left,
4827
310k
            ps_chrm_cu_buf_prms->pu1_cu_top,
4828
310k
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
4829
310k
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
4830
310k
            (cu_pos_x >> 1),
4831
310k
            (cu_pos_y >> 1),
4832
310k
            &chrm_rdopt_tu_bits,
4833
310k
            i4_alpha_stim_multiplier,
4834
310k
            u1_is_cu_noisy);
4835
4836
310k
#if WEIGH_CHROMA_COST
4837
310k
        chrm_rdopt_cost = (LWORD64)(
4838
310k
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
4839
310k
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
4840
310k
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
4841
310k
#endif
4842
4843
310k
#if CHROMA_RDOPT_ENABLE
4844
310k
        total_rdopt_cost += chrm_rdopt_cost;
4845
310k
#endif
4846
310k
        cu_bits += chrm_rdopt_tu_bits;
4847
4848
        /* during chroma evaluation if skip decision was over written     */
4849
        /* then the current skip candidate is set to a non skip candidate */
4850
310k
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
4851
4852
        /* cu bits for chroma residual if chroma rdopt is on       */
4853
        /* if zero_cbf eval is disabled then cu bits will be zero  */
4854
310k
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
4855
4856
310k
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
4857
310k
        {
4858
            /* Early exit : If the current running cost exceeds
4859
            the prev. best mode cost, break */
4860
310k
            if(total_rdopt_cost > prev_best_rdopt_cost)
4861
19.1k
            {
4862
19.1k
                return (total_rdopt_cost);
4863
19.1k
            }
4864
310k
        }
4865
310k
    }
4866
406k
    else
4867
406k
    {}
4868
4869
697k
#if SHRINK_INTER_TUTREE
4870
    /* ------------- Quadtree TU split  optimization ------------  */
4871
697k
    if(ps_final_prms->u1_is_cu_coded)
4872
108k
    {
4873
108k
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
4874
108k
            &ps_final_prms->as_tu_enc_loop[0],
4875
108k
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
4876
108k
            &ps_final_prms->s_recon_datastore,
4877
108k
            num_tu_in_cu,
4878
108k
            (ps_ctxt->u1_chroma_array_type == 2));
4879
108k
    }
4880
697k
#endif
4881
4882
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
4883
697k
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
4884
697k
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
4885
697k
                .s_cabac_ctxt.au1_ctxt_models[0] +
4886
697k
            IHEVC_CAB_COEFFX_PREFIX,
4887
697k
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
4888
697k
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
4889
4890
    /* -------- Bit estimate for RD opt -------------- */
4891
697k
    {
4892
697k
        nbr_avail_flags_t s_nbr;
4893
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
4894
697k
        WORD32 cbf_bits, header_bits;
4895
4896
        /* get the neighbour availability flags for current cu  */
4897
697k
        ihevce_get_only_nbr_flag(
4898
697k
            &s_nbr,
4899
697k
            ps_ctxt->pu1_ctb_nbr_map,
4900
697k
            ps_ctxt->i4_nbr_map_strd,
4901
697k
            cu_pos_x,
4902
697k
            cu_pos_y,
4903
697k
            (cu_size >> 2),
4904
697k
            (cu_size >> 2));
4905
4906
        /* call the entropy rdo encode to get the bit estimate for current cu */
4907
697k
        header_bits = ihevce_entropy_rdo_encode_cu(
4908
697k
            &ps_ctxt->s_rdopt_entropy_ctxt,
4909
697k
            ps_final_prms,
4910
697k
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
4911
697k
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
4912
697k
            cu_size,
4913
697k
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
4914
697k
                                           : s_nbr.u1_top_avail,
4915
697k
            s_nbr.u1_left_avail,
4916
697k
            &ps_final_prms->pu1_cu_coeffs[0],
4917
697k
            &cbf_bits);
4918
4919
697k
        cu_bits += header_bits;
4920
4921
        /* cbf bits are excluded from header bits, instead considered as texture bits */
4922
        /* incase if zero cbf eval is disabled then texture bits gets added here */
4923
697k
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
4924
697k
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
4925
4926
697k
#if RDOPT_ENABLE
4927
        /* add the cost of coding the header bits */
4928
697k
        total_rdopt_cost +=
4929
697k
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
4930
4931
697k
#if ENABLE_INTER_ZCU_COST
4932
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
4933
697k
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
4934
108k
        {
4935
108k
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
4936
4937
108k
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
4938
87.6k
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
4939
4940
108k
            cab_ctxt_t *ps_cab_ctxt =
4941
108k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
4942
4943
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
4944
108k
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
4945
4946
            /* account for coding qt_root_cbf = 0 */
4947
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
4948
108k
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
4949
108k
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
4950
129
                u4_cu_hdr_bits_q12 = 0;
4951
108k
            else
4952
108k
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
4953
4954
            /* add the cost of coding the header bits */
4955
108k
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
4956
108k
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
4957
108k
                ps_ctxt->i8_cl_ssd_lambda_qf,
4958
108k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
4959
4960
108k
            if(ps_ctxt->u1_enable_psyRDOPT)
4961
0
            {
4962
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
4963
0
            }
4964
4965
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
4966
108k
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
4967
2.05k
            {
4968
2.05k
                WORD32 tx_size;
4969
4970
                /* force cu as not coded and update the cost */
4971
2.05k
                ps_final_prms->u1_is_cu_coded = 0;
4972
2.05k
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
4973
2.05k
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
4974
4975
2.05k
                total_rdopt_cost = i8_cu_not_coded_cost;
4976
4977
                /* reset num TUs to 1 unless cu size id 64 */
4978
2.05k
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
4979
2.05k
                trans_size = (64 == cu_size) ? 32 : cu_size;
4980
2.05k
                GETRANGE(tx_size, trans_size);
4981
4982
                /* reset the bytes consumed */
4983
2.05k
                ps_final_prms->i4_num_bytes_ecd_data = 0;
4984
4985
                /* reset texture related bits and roll back header bits*/
4986
2.05k
                ps_final_prms->u4_cu_cbf_bits = 0;
4987
2.05k
                ps_final_prms->u4_cu_luma_res_bits = 0;
4988
2.05k
                ps_final_prms->u4_cu_chroma_res_bits = 0;
4989
2.05k
                ps_final_prms->u4_cu_hdr_bits =
4990
2.05k
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
4991
4992
                /* update cabac model with qtroot cbf = 0 decision */
4993
2.05k
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
4994
2.05k
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
4995
4996
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
4997
2.05k
                memcpy(
4998
2.05k
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
4999
2.05k
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5000
2.05k
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5001
5002
                /* mark all tus as not coded for final eval */
5003
6.23k
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5004
4.17k
                {
5005
4.17k
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5006
4.17k
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5007
5008
4.17k
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5009
4.17k
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5010
5011
4.17k
                    num_4x4_in_tu = trans_size >> 2;
5012
5013
4.17k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5014
4.17k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5015
4.17k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5016
5017
4.17k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5018
4.17k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5019
4.17k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5020
5021
4.17k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5022
4.17k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5023
5024
4.17k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5025
4.17k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5026
4.17k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5027
5028
                    /* reset cbf for the all 4x4 in TU */
5029
4.17k
                    {
5030
4.17k
                        WORD32 i, j;
5031
4.17k
                        nbr_4x4_t *ps_tmp_4x4;
5032
4.17k
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5033
5034
33.6k
                        for(i = 0; i < num_4x4_in_tu; i++)
5035
29.4k
                        {
5036
252k
                            for(j = 0; j < num_4x4_in_tu; j++)
5037
222k
                            {
5038
222k
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5039
222k
                            }
5040
                            /* row level update*/
5041
29.4k
                            ps_tmp_4x4 += num_4x4_in_cu;
5042
29.4k
                        }
5043
4.17k
                    }
5044
4.17k
                }
5045
2.05k
            }
5046
108k
        }
5047
697k
#endif /* ENABLE_INTER_ZCU_COST */
5048
5049
697k
#endif /* RDOPT_ENABLE */
5050
697k
    }
5051
5052
697k
    return (total_rdopt_cost);
5053
717k
}
5054
5055
#if ENABLE_RDO_BASED_TU_RECURSION
5056
LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
5057
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5058
    enc_loop_cu_prms_t *ps_cu_prms,
5059
    void *pv_src,
5060
    WORD32 cu_size,
5061
    WORD32 cu_pos_x,
5062
    WORD32 cu_pos_y,
5063
    WORD32 curr_buf_idx,
5064
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
5065
    cu_inter_cand_t *ps_inter_cand,
5066
    cu_analyse_t *ps_cu_analyse,
5067
    WORD32 i4_alpha_stim_multiplier)
5068
2.72M
{
5069
2.72M
    tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
5070
2.72M
    buffer_data_for_tu_t s_buffer_data_for_tu;
5071
2.72M
    enc_loop_cu_final_prms_t *ps_final_prms;
5072
2.72M
    nbr_4x4_t *ps_nbr_4x4;
5073
5074
2.72M
    WORD32 num_split_flags = 1;
5075
2.72M
    UWORD8 u1_tu_size;
5076
2.72M
    UWORD8 *pu1_pred;
5077
2.72M
    UWORD8 *pu1_ecd_data;
5078
2.72M
    WORD16 *pi2_deq_data;
5079
2.72M
    UWORD8 *pu1_csbf_buf;
5080
2.72M
    UWORD8 *pu1_tu_sz_sft;
5081
2.72M
    UWORD8 *pu1_tu_posx;
5082
2.72M
    UWORD8 *pu1_tu_posy;
5083
2.72M
    LWORD64 total_rdopt_cost;
5084
2.72M
    WORD32 ctr;
5085
2.72M
    WORD32 chrm_ctr;
5086
2.72M
    WORD32 pred_stride;
5087
2.72M
    WORD32 recon_stride;
5088
2.72M
    WORD32 trans_size = ps_cu_analyse->u1_cu_size;
5089
2.72M
    WORD32 csbf_strd;
5090
2.72M
    WORD32 ecd_data_bytes_cons;
5091
2.72M
    WORD32 num_4x4_in_cu;
5092
2.72M
    WORD32 num_4x4_in_tu;
5093
2.72M
    WORD32 recon_func_mode;
5094
2.72M
    WORD32 cu_bits;
5095
2.72M
    UWORD8 u1_compute_spatial_ssd;
5096
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5097
2.72M
    UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
5098
5099
2.72M
    WORD32 i4_min_trans_size = 256;
5100
2.72M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
5101
2.72M
    WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
5102
    /* model for no residue syntax qt root cbf flag */
5103
2.72M
    UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
5104
2.72M
    UWORD8 u1_skip_tu_sz_sft = 0;
5105
2.72M
    UWORD8 u1_skip_tu_posx = 0;
5106
2.72M
    UWORD8 u1_skip_tu_posy = 0;
5107
2.72M
    UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
5108
5109
2.72M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5110
2.72M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5111
2.72M
    pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
5112
2.72M
    pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
5113
2.72M
    csbf_strd = ps_ctxt->i4_cu_csbf_strd;
5114
2.72M
    pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
5115
2.72M
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5116
2.72M
    recon_stride = cu_size;
5117
2.72M
    pu1_pred = ps_inter_cand->pu1_pred_data;
5118
2.72M
    chrm_ctr = 0;
5119
2.72M
    ecd_data_bytes_cons = 0;
5120
2.72M
    total_rdopt_cost = 0;
5121
2.72M
    num_4x4_in_cu = cu_size >> 2;
5122
2.72M
    recon_func_mode = PRED_MODE_INTER;
5123
2.72M
    cu_bits = 0;
5124
5125
    /* get the 4x4 level postion of current cu */
5126
2.72M
    cu_pos_x = cu_pos_x << 1;
5127
2.72M
    cu_pos_y = cu_pos_y << 1;
5128
5129
2.72M
    ps_final_prms->u1_is_cu_coded = 0;
5130
2.72M
    ps_final_prms->u4_cu_sad = 0;
5131
5132
    /* populate the coeffs scan idx */
5133
2.72M
    ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
5134
5135
2.72M
#if ENABLE_INTER_ZCU_COST
5136
    /* reset cu not coded cost */
5137
2.72M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5138
5139
    /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
5140
2.72M
    memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
5141
2.72M
#endif
5142
5143
2.72M
    if(ps_cu_analyse->u1_cu_size == 64)
5144
34.3k
    {
5145
34.3k
        num_split_flags = 4;
5146
34.3k
        u1_tu_size = 32;
5147
34.3k
    }
5148
2.68M
    else
5149
2.68M
    {
5150
2.68M
        num_split_flags = 1;
5151
2.68M
        u1_tu_size = ps_cu_analyse->u1_cu_size;
5152
2.68M
    }
5153
5154
2.72M
    if(1 == ps_final_prms->u1_skip_flag)
5155
803k
    {
5156
803k
        if(64 == cu_size)
5157
9.08k
        {
5158
            /* TU = CU/2 is set but no trnaform is evaluated  */
5159
9.08k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5160
9.08k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5161
9.08k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5162
9.08k
        }
5163
794k
        else
5164
794k
        {
5165
            /* TU = CU is set but no trnaform is evaluated  */
5166
794k
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5167
794k
            pu1_tu_posx = &u1_skip_tu_posx;
5168
794k
            pu1_tu_posy = &u1_skip_tu_posy;
5169
794k
        }
5170
5171
803k
        recon_func_mode = PRED_MODE_SKIP;
5172
803k
    }
5173
    /* check for PU part mode being AMP or No AMP */
5174
1.91M
    else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
5175
1.41M
    {
5176
1.41M
        if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
5177
1.31M
        {
5178
            /* TU= CU is evaluated 2Nx2N inter case */
5179
1.31M
            pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
5180
1.31M
            pu1_tu_posx = &u1_skip_tu_posx;
5181
1.31M
            pu1_tu_posy = &u1_skip_tu_posy;
5182
1.31M
        }
5183
100k
        else
5184
100k
        {
5185
            /* currently TU= CU/2 is evaluated for all inter case */
5186
100k
            pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
5187
100k
            pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
5188
100k
            pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
5189
100k
        }
5190
1.41M
    }
5191
504k
    else
5192
504k
    {
5193
        /* for AMP cases one level of TU recurssion is done */
5194
        /* based on oreintation of the partitions           */
5195
504k
        pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5196
504k
        pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5197
504k
        pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
5198
504k
    }
5199
5200
2.72M
    i4_min_trans_size = 4;
5201
5202
2.72M
    if(ps_ctxt->i1_cu_qp_delta_enable)
5203
1.61M
    {
5204
1.61M
        ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0);
5205
1.61M
    }
5206
5207
2.72M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
5208
0
    {
5209
0
        ps_ctxt->i8_cl_ssd_lambda_qf =
5210
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
5211
0
             100.0f);
5212
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
5213
0
            ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
5214
0
             (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
5215
0
    }
5216
5217
2.72M
    u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
5218
1.59M
                             (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
5219
1.59M
                             CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5220
5221
2.72M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
5222
0
    {
5223
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
5224
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
5225
0
    }
5226
5227
2.72M
    if(!u1_compute_spatial_ssd)
5228
1.12M
    {
5229
1.12M
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5230
1.12M
        ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5231
1.12M
    }
5232
1.59M
    else
5233
1.59M
    {
5234
1.59M
        ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
5235
5236
1.59M
        if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5237
0
        {
5238
0
            ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
5239
0
        }
5240
1.59M
    }
5241
5242
    /* RDOPT copy States :  TU init (best until prev TU) to current */
5243
2.72M
    memcpy(
5244
2.72M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5245
2.72M
             .s_cabac_ctxt.au1_ctxt_models[0],
5246
2.72M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
5247
2.72M
        IHEVC_CAB_COEFFX_PREFIX);
5248
5249
2.72M
    ihevce_tu_tree_init(
5250
2.72M
        as_tu_nodes,
5251
2.72M
        cu_size,
5252
2.72M
        (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
5253
2.72M
        ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
5254
2.72M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5255
2.72M
        ps_ctxt->u1_chroma_array_type == 2);
5256
5257
2.72M
    if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
5258
0
    {
5259
0
        ihevce_tuSplitArray_to_tuTree_mapper(
5260
0
            as_tu_nodes,
5261
0
            ps_inter_cand->ai4_tu_split_flag,
5262
0
            cu_size,
5263
0
            cu_size,
5264
0
            MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
5265
0
            MIN(MAX_TU_SIZE, cu_size),
5266
0
            ps_inter_cand->b1_skip_flag);
5267
0
    }
5268
5269
2.72M
    ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
5270
5271
2.72M
#if ENABLE_INTER_ZCU_COST
5272
2.72M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5273
2.72M
#endif
5274
5275
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
5276
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
5277
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
5278
2.72M
        ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
5279
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
5280
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
5281
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
5282
2.72M
        ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5283
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
5284
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
5285
2.72M
        ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
5286
2.72M
        curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
5287
2.72M
                                                              (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
5288
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
5289
2.72M
        ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
5290
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
5291
2.72M
        ps_chrm_cu_buf_prms->i4_chrm_src_stride;
5292
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
5293
2.72M
        ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
5294
2.72M
    s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
5295
2.72M
        ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
5296
2.72M
    s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
5297
2.72M
    s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
5298
2.72M
    s_buffer_data_for_tu.pi2_deq_data_chroma =
5299
2.72M
        pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
5300
2.72M
    s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
5301
2.72M
    s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
5302
2.72M
    s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
5303
2.72M
    s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
5304
5305
2.72M
    if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
5306
0
    {
5307
0
        UWORD8 i;
5308
5309
0
        UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
5310
5311
0
        for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
5312
0
        {
5313
0
            pu_t *ps_pu;
5314
5315
0
            WORD32 inter_pu_wd;
5316
0
            WORD32 inter_pu_ht;
5317
5318
0
            ps_pu = ps_inter_cand->as_inter_pu + i;
5319
5320
0
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
5321
0
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
5322
0
            inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
5323
0
            ihevce_chroma_inter_pred_pu(
5324
0
                &ps_ctxt->s_mc_ctxt,
5325
0
                ps_pu,
5326
0
                pu1_pred,
5327
0
                s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5328
0
            if(!!ps_inter_cand->b3_part_size)
5329
0
            {
5330
                /* 2Nx__ partion case */
5331
0
                if(inter_pu_wd == cu_size)
5332
0
                {
5333
0
                    pu1_pred +=
5334
0
                        (inter_pu_ht *
5335
0
                         s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
5336
0
                }
5337
5338
                /* __x2N partion case */
5339
0
                if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
5340
0
                {
5341
0
                    pu1_pred += inter_pu_wd;
5342
0
                }
5343
0
            }
5344
0
        }
5345
0
    }
5346
5347
#if !ENABLE_TOP_DOWN_TU_RECURSION
5348
    total_rdopt_cost = ihevce_tu_tree_selector(
5349
        ps_ctxt,
5350
        as_tu_nodes,
5351
        &s_buffer_data_for_tu,
5352
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5353
             .s_cabac_ctxt.au1_ctxt_models[0],
5354
        recon_func_mode,
5355
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5356
        i4_alpha_stim_multiplier,
5357
        u1_is_cu_noisy,
5358
#endif
5359
        0,
5360
        ps_ctxt->u1_max_inter_tr_depth,
5361
        ps_inter_cand->b3_part_size,
5362
        u1_compute_spatial_ssd);
5363
#else
5364
2.72M
    total_rdopt_cost = ihevce_topDown_tu_tree_selector(
5365
2.72M
        ps_ctxt,
5366
2.72M
        as_tu_nodes,
5367
2.72M
        &s_buffer_data_for_tu,
5368
2.72M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5369
2.72M
             .s_cabac_ctxt.au1_ctxt_models[0],
5370
2.72M
        recon_func_mode,
5371
2.72M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
5372
2.72M
        i4_alpha_stim_multiplier,
5373
2.72M
        u1_is_cu_noisy,
5374
2.72M
#endif
5375
2.72M
        0,
5376
2.72M
        ps_ctxt->u1_max_inter_tr_depth,
5377
2.72M
        ps_inter_cand->b3_part_size,
5378
2.72M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5379
2.72M
        u1_compute_spatial_ssd);
5380
2.72M
#endif
5381
5382
2.72M
    ps_final_prms->u2_num_tus_in_cu = 0;
5383
2.72M
    ps_final_prms->u4_cu_luma_res_bits = 0;
5384
2.72M
    ps_final_prms->u4_cu_sad = 0;
5385
2.72M
    total_rdopt_cost = 0;
5386
2.72M
    ecd_data_bytes_cons = 0;
5387
2.72M
    cu_bits = 0;
5388
2.72M
#if ENABLE_INTER_ZCU_COST
5389
2.72M
    ps_ctxt->i8_cu_not_coded_cost = 0;
5390
2.72M
#endif
5391
2.72M
    ps_final_prms->u1_is_cu_coded = 0;
5392
2.72M
    ps_final_prms->u1_cu_size = cu_size;
5393
5394
2.72M
    ihevce_tu_selector_debriefer(
5395
2.72M
        as_tu_nodes,
5396
2.72M
        ps_final_prms,
5397
2.72M
        &total_rdopt_cost,
5398
2.72M
#if ENABLE_INTER_ZCU_COST
5399
2.72M
        &ps_ctxt->i8_cu_not_coded_cost,
5400
2.72M
#endif
5401
2.72M
        &ecd_data_bytes_cons,
5402
2.72M
        &cu_bits,
5403
2.72M
        &ps_final_prms->u2_num_tus_in_cu,
5404
2.72M
        ps_ctxt->i4_cu_qp,
5405
2.72M
        cu_pos_x * 4,
5406
2.72M
        cu_pos_y * 4,
5407
2.72M
        INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
5408
2.72M
        (ps_ctxt->u1_chroma_array_type == 2),
5409
2.72M
        POS_TL);
5410
5411
2.72M
    if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5412
2.72M
    {
5413
2.72M
        ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
5414
2.72M
    }
5415
5416
    /* Modify the cost function for this CU. */
5417
    /* loop in for 8x8 blocks */
5418
2.72M
    if(ps_ctxt->u1_enable_psyRDOPT)
5419
0
    {
5420
0
        UWORD8 *pu1_recon_cu;
5421
0
        WORD32 recon_stride;
5422
0
        WORD32 curr_pos_x;
5423
0
        WORD32 curr_pos_y;
5424
0
        WORD32 start_index;
5425
0
        WORD32 num_horz_cu_in_ctb;
5426
0
        WORD32 had_block_size;
5427
5428
        /* tODO: sreenivasa ctb size has to be used appropriately */
5429
0
        had_block_size = 8;
5430
0
        num_horz_cu_in_ctb = 64 / had_block_size;
5431
5432
0
        curr_pos_x = cu_pos_x << 2; /* pel units */
5433
0
        curr_pos_y = cu_pos_y << 2; /* pel units */
5434
0
        recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
5435
0
        pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
5436
0
                            .apv_luma_recon_bufs[0]);  // already pointing to the current CU recon
5437
        //+ \curr_pos_x + curr_pos_y * recon_stride;
5438
5439
        /* start index to index the source satd of curr cu int he current ctb*/
5440
0
        start_index =
5441
0
            (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
5442
5443
0
        {
5444
0
            total_rdopt_cost += ihevce_psy_rd_cost(
5445
0
                ps_ctxt->ai4_source_satd_8x8,
5446
0
                pu1_recon_cu,
5447
0
                recon_stride,
5448
0
                1,  //howz stride
5449
0
                cu_size,
5450
0
                0,  // pic type
5451
0
                0,  //layer id
5452
0
                ps_ctxt->i4_satd_lamda,  // lambda
5453
0
                start_index,
5454
0
                ps_ctxt->u1_is_input_data_hbd,
5455
0
                ps_ctxt->u4_psy_strength,
5456
0
                &ps_ctxt->s_cmn_opt_func);  // 8 bit
5457
0
        }
5458
0
    }
5459
5460
2.72M
    ps_final_prms->u1_chroma_intra_pred_mode = 4;
5461
5462
    /* update the bytes consumed */
5463
2.72M
    ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
5464
5465
    /* store the current cu size to final prms */
5466
2.72M
    ps_final_prms->u1_cu_size = cu_size;
5467
    /* ------------- Chroma processing -------------- */
5468
    /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
5469
2.72M
    if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
5470
2.72M
       !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
5471
2.72M
    {
5472
2.72M
        LWORD64 chrm_rdopt_cost;
5473
2.72M
        WORD32 chrm_rdopt_tu_bits;
5474
5475
        /* Store the current RDOPT cost to enable early exit in chrom_prcs */
5476
2.72M
        ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
5477
5478
2.72M
        chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
5479
2.72M
            ps_ctxt,
5480
2.72M
            curr_buf_idx,
5481
2.72M
            0, /* TU mode : Don't care in Inter patrh */
5482
2.72M
            ps_chrm_cu_buf_prms->pu1_curr_src,
5483
2.72M
            ps_chrm_cu_buf_prms->i4_chrm_src_stride,
5484
2.72M
            ps_chrm_cu_buf_prms->pu1_cu_left,
5485
2.72M
            ps_chrm_cu_buf_prms->pu1_cu_top,
5486
2.72M
            ps_chrm_cu_buf_prms->pu1_cu_top_left,
5487
2.72M
            ps_chrm_cu_buf_prms->i4_cu_left_stride,
5488
2.72M
            (cu_pos_x >> 1),
5489
2.72M
            (cu_pos_y >> 1),
5490
2.72M
            &chrm_rdopt_tu_bits,
5491
2.72M
            i4_alpha_stim_multiplier,
5492
2.72M
            u1_is_cu_noisy);
5493
5494
2.72M
#if WEIGH_CHROMA_COST
5495
2.72M
        chrm_rdopt_cost = (LWORD64)(
5496
2.72M
            (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
5497
2.72M
             (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
5498
2.72M
            CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
5499
2.72M
#endif
5500
5501
2.72M
#if CHROMA_RDOPT_ENABLE
5502
2.72M
        total_rdopt_cost += chrm_rdopt_cost;
5503
2.72M
#endif
5504
2.72M
        cu_bits += chrm_rdopt_tu_bits;
5505
5506
        /* during chroma evaluation if skip decision was over written     */
5507
        /* then the current skip candidate is set to a non skip candidate */
5508
2.72M
        ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
5509
5510
        /* cu bits for chroma residual if chroma rdopt is on       */
5511
        /* if zero_cbf eval is disabled then cu bits will be zero  */
5512
2.72M
        ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
5513
5514
2.72M
        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
5515
2.72M
        {
5516
            /* Early exit : If the current running cost exceeds
5517
            the prev. best mode cost, break */
5518
2.72M
            if(total_rdopt_cost > prev_best_rdopt_cost)
5519
375k
            {
5520
375k
                return (total_rdopt_cost);
5521
375k
            }
5522
2.72M
        }
5523
2.72M
    }
5524
0
    else
5525
0
    {}
5526
5527
2.34M
#if SHRINK_INTER_TUTREE
5528
    /* ------------- Quadtree TU split  optimization ------------  */
5529
2.34M
    if(ps_final_prms->u1_is_cu_coded)
5530
330k
    {
5531
330k
        ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
5532
330k
            &ps_final_prms->as_tu_enc_loop[0],
5533
330k
            &ps_final_prms->as_tu_enc_loop_temp_prms[0],
5534
330k
            &ps_final_prms->s_recon_datastore,
5535
330k
            ps_final_prms->u2_num_tus_in_cu,
5536
330k
            (ps_ctxt->u1_chroma_array_type == 2));
5537
330k
    }
5538
2.34M
#endif
5539
5540
    /* RDOPT copy States :  Best after all luma TUs (and chroma,if enabled)to current */
5541
2.34M
    COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
5542
2.34M
        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
5543
2.34M
                .s_cabac_ctxt.au1_ctxt_models[0] +
5544
2.34M
            IHEVC_CAB_COEFFX_PREFIX,
5545
2.34M
        &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
5546
2.34M
        IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
5547
5548
    /* -------- Bit estimate for RD opt -------------- */
5549
2.34M
    {
5550
2.34M
        nbr_avail_flags_t s_nbr;
5551
        /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
5552
2.34M
        WORD32 cbf_bits, header_bits;
5553
5554
        /* get the neighbour availability flags for current cu  */
5555
2.34M
        ihevce_get_only_nbr_flag(
5556
2.34M
            &s_nbr,
5557
2.34M
            ps_ctxt->pu1_ctb_nbr_map,
5558
2.34M
            ps_ctxt->i4_nbr_map_strd,
5559
2.34M
            cu_pos_x,
5560
2.34M
            cu_pos_y,
5561
2.34M
            (cu_size >> 2),
5562
2.34M
            (cu_size >> 2));
5563
5564
        /* call the entropy rdo encode to get the bit estimate for current cu */
5565
2.34M
        header_bits = ihevce_entropy_rdo_encode_cu(
5566
2.34M
            &ps_ctxt->s_rdopt_entropy_ctxt,
5567
2.34M
            ps_final_prms,
5568
2.34M
            (cu_pos_x >> 1), /*  back to 8x8 pel units   */
5569
2.34M
            (cu_pos_y >> 1), /*  back to 8x8 pel units   */
5570
2.34M
            cu_size,
5571
2.34M
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
5572
2.34M
                                           : s_nbr.u1_top_avail,
5573
2.34M
            s_nbr.u1_left_avail,
5574
2.34M
            &ps_final_prms->pu1_cu_coeffs[0],
5575
2.34M
            &cbf_bits);
5576
5577
2.34M
        cu_bits += header_bits;
5578
5579
        /* cbf bits are excluded from header bits, instead considered as texture bits */
5580
        /* incase if zero cbf eval is disabled then texture bits gets added here */
5581
2.34M
        ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
5582
2.34M
        ps_final_prms->u4_cu_cbf_bits = cbf_bits;
5583
5584
2.34M
#if RDOPT_ENABLE
5585
        /* add the cost of coding the header bits */
5586
2.34M
        total_rdopt_cost +=
5587
2.34M
            COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
5588
5589
2.34M
#if ENABLE_INTER_ZCU_COST
5590
        /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
5591
2.34M
        if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
5592
330k
        {
5593
330k
            LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
5594
5595
330k
            WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
5596
190k
                                      (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
5597
5598
330k
            cab_ctxt_t *ps_cab_ctxt =
5599
330k
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
5600
5601
            /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call  */
5602
330k
            UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
5603
5604
            /* account for coding qt_root_cbf = 0 */
5605
            /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
5606
330k
            u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
5607
330k
            if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
5608
691
                u4_cu_hdr_bits_q12 = 0;
5609
329k
            else
5610
329k
                u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
5611
5612
            /* add the cost of coding the header bits */
5613
330k
            i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
5614
330k
                u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
5615
330k
                ps_ctxt->i8_cl_ssd_lambda_qf,
5616
330k
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
5617
5618
330k
            if(ps_ctxt->u1_enable_psyRDOPT)
5619
0
            {
5620
0
                i8_cu_not_coded_cost = total_rdopt_cost + 1;
5621
0
            }
5622
5623
            /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
5624
330k
            if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
5625
4.05k
            {
5626
4.05k
                WORD32 tx_size;
5627
5628
                /* force cu as not coded and update the cost */
5629
4.05k
                ps_final_prms->u1_is_cu_coded = 0;
5630
4.05k
                ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
5631
4.05k
                ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
5632
5633
4.05k
                total_rdopt_cost = i8_cu_not_coded_cost;
5634
5635
                /* reset num TUs to 1 unless cu size id 64 */
5636
4.05k
                ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
5637
4.05k
                trans_size = (64 == cu_size) ? 32 : cu_size;
5638
4.05k
                GETRANGE(tx_size, trans_size);
5639
5640
                /* reset the bytes consumed */
5641
4.05k
                ps_final_prms->i4_num_bytes_ecd_data = 0;
5642
5643
                /* reset texture related bits and roll back header bits*/
5644
4.05k
                ps_final_prms->u4_cu_cbf_bits = 0;
5645
4.05k
                ps_final_prms->u4_cu_luma_res_bits = 0;
5646
4.05k
                ps_final_prms->u4_cu_chroma_res_bits = 0;
5647
4.05k
                ps_final_prms->u4_cu_hdr_bits =
5648
4.05k
                    (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
5649
5650
                /* update cabac model with qtroot cbf = 0 decision */
5651
4.05k
                ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
5652
4.05k
                    gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
5653
5654
                /* restore untouched cabac models for, tusplit, cbfs, texture etc */
5655
4.05k
                memcpy(
5656
4.05k
                    &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5657
4.05k
                    &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
5658
4.05k
                    (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
5659
5660
                /* mark all tus as not coded for final eval */
5661
9.76k
                for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
5662
5.70k
                {
5663
5.70k
                    WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
5664
5.70k
                    WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
5665
5666
5.70k
                    nbr_4x4_t *ps_cur_nbr_4x4 =
5667
5.70k
                        ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
5668
5669
5.70k
                    num_4x4_in_tu = trans_size >> 2;
5670
5671
5.70k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
5672
5.70k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
5673
5.70k
                    ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
5674
5675
5.70k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
5676
5.70k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
5677
5.70k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
5678
5679
5.70k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
5680
5.70k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
5681
5682
5.70k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
5683
5.70k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
5684
5.70k
                    ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
5685
5686
                    /* reset cbf for the all 4x4 in TU */
5687
5.70k
                    {
5688
5.70k
                        WORD32 i, j;
5689
5.70k
                        nbr_4x4_t *ps_tmp_4x4;
5690
5.70k
                        ps_tmp_4x4 = ps_cur_nbr_4x4;
5691
5692
39.7k
                        for(i = 0; i < num_4x4_in_tu; i++)
5693
34.0k
                        {
5694
270k
                            for(j = 0; j < num_4x4_in_tu; j++)
5695
236k
                            {
5696
236k
                                ps_tmp_4x4[j].b1_y_cbf = 0;
5697
236k
                            }
5698
                            /* row level update*/
5699
34.0k
                            ps_tmp_4x4 += num_4x4_in_cu;
5700
34.0k
                        }
5701
5.70k
                    }
5702
5.70k
                }
5703
4.05k
            }
5704
330k
        }
5705
2.34M
#endif /* ENABLE_INTER_ZCU_COST */
5706
5707
2.34M
#endif /* RDOPT_ENABLE */
5708
2.34M
    }
5709
5710
2.34M
    return (total_rdopt_cost);
5711
2.72M
}
5712
#endif
5713
5714
/*!
5715
******************************************************************************
5716
* \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
5717
*
5718
* \brief
5719
*    Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
5720
*
5721
* \param[in] ps_ctxt       enc_loop module ctxt pointer
5722
* \param[in] ps_inter_cand pointer to inter candidate structure
5723
* \param[in] cu_size         Current CU size
5724
* \param[in] cu_pos_x        cu position x w.r.t to ctb
5725
* \param[in] cu_pos_y        cu position y w.r.t to ctb
5726
* \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
5727
* \param[in] ps_top_nbr_4x4  top neighbour 4x4 structure pointer
5728
* \param[in] ps_topleft_nbr_4x4  top left neighbour 4x4 structure pointer
5729
* \param[in] nbr_4x4_left_strd  left neighbour 4x4 buffer stride
5730
* \param[in] curr_buf_idx Current Buffer index
5731
*
5732
* \return
5733
*    Rdopt cost
5734
*
5735
* \author
5736
*  Ittiam
5737
*
5738
*****************************************************************************
5739
*/
5740
LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
5741
    ihevce_enc_loop_ctxt_t *ps_ctxt,
5742
    cu_inter_cand_t *ps_inter_cand,
5743
    WORD32 cu_size,
5744
    WORD32 cu_pos_x,
5745
    WORD32 cu_pos_y,
5746
    nbr_4x4_t *ps_left_nbr_4x4,
5747
    nbr_4x4_t *ps_top_nbr_4x4,
5748
    nbr_4x4_t *ps_topleft_nbr_4x4,
5749
    WORD32 nbr_4x4_left_strd,
5750
    WORD32 curr_buf_idx)
5751
3.52M
{
5752
    /* local variables */
5753
3.52M
    enc_loop_cu_final_prms_t *ps_final_prms;
5754
3.52M
    nbr_avail_flags_t s_nbr;
5755
3.52M
    nbr_4x4_t *ps_nbr_4x4;
5756
5757
3.52M
    UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
5758
3.52M
    UWORD8 *pu1_pred;
5759
3.52M
    WORD32 rdopt_cost;
5760
3.52M
    WORD32 ctr;
5761
3.52M
    WORD32 num_cu_part;
5762
3.52M
    WORD32 inter_pu_wd;
5763
3.52M
    WORD32 inter_pu_ht;
5764
3.52M
    WORD32 pred_stride;
5765
5766
    /* get the pointers based on curbuf idx */
5767
3.52M
    ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
5768
3.52M
    ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
5769
3.52M
    pu1_pred = ps_inter_cand->pu1_pred_data;
5770
5771
3.52M
    pred_stride = ps_inter_cand->i4_pred_data_stride;
5772
5773
    /* store the partition mode in final prms */
5774
3.52M
    ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
5775
5776
    /* since encoder does not support NXN part type */
5777
    /* num parts can be either 1 or 2 only          */
5778
3.52M
    ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
5779
5780
3.52M
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
5781
5782
    /* get the 4x4 level position of current cu */
5783
3.52M
    cu_pos_x = cu_pos_x << 1;
5784
3.52M
    cu_pos_y = cu_pos_y << 1;
5785
5786
    /* populate cu level params */
5787
3.52M
    ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
5788
3.52M
    ps_final_prms->u2_num_pus_in_cu = num_cu_part;
5789
5790
    /* run a loop over all the partitons in cu */
5791
7.70M
    for(ctr = 0; ctr < num_cu_part; ctr++)
5792
4.18M
    {
5793
4.18M
        pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
5794
4.18M
        pu_t *ps_pu;
5795
4.18M
        WORD32 skip_or_merge_flag;
5796
4.18M
        UWORD8 u1_use_mvp_from_top_row;
5797
5798
4.18M
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
5799
5800
        /* IF AMP then each partitions can have diff wd ht */
5801
4.18M
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
5802
4.18M
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
5803
5804
        /* populate reference pic buf id for bs compute */
5805
5806
        /* L0 */
5807
4.18M
        if(-1 != ps_pu->mv.i1_l0_ref_idx)
5808
3.92M
        {
5809
3.92M
            ps_pu->mv.i1_l0_ref_pic_buf_id =
5810
3.92M
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
5811
3.92M
        }
5812
5813
        /* L1 */
5814
4.18M
        if(-1 != ps_pu->mv.i1_l1_ref_idx)
5815
1.44M
        {
5816
1.44M
            ps_pu->mv.i1_l1_ref_pic_buf_id =
5817
1.44M
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
5818
1.44M
        }
5819
5820
        /* SKIP or merge check for every part */
5821
4.18M
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
5822
5823
        /* ----------- MV Prediction ----------------- */
5824
4.18M
        if(0 == skip_or_merge_flag)
5825
1.03M
        {
5826
            /* get the neighbour availability flags */
5827
1.03M
            ihevce_get_only_nbr_flag(
5828
1.03M
                &s_nbr,
5829
1.03M
                ps_ctxt->pu1_ctb_nbr_map,
5830
1.03M
                ps_ctxt->i4_nbr_map_strd,
5831
1.03M
                cu_pos_x,
5832
1.03M
                cu_pos_y,
5833
1.03M
                inter_pu_wd >> 2,
5834
1.03M
                inter_pu_ht >> 2);
5835
5836
1.03M
            if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
5837
0
            {
5838
0
                u1_use_mvp_from_top_row = 0;
5839
0
            }
5840
1.03M
            else
5841
1.03M
            {
5842
1.03M
                u1_use_mvp_from_top_row = 1;
5843
1.03M
            }
5844
5845
1.03M
            if(!u1_use_mvp_from_top_row)
5846
0
            {
5847
0
                if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
5848
0
                {
5849
0
                    if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
5850
0
                    {
5851
0
                        WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
5852
5853
                        /* Ensure Top Right Sync */
5854
0
                        if(!ps_ctxt->u1_use_top_at_ctb_boundary)
5855
0
                        {
5856
0
                            curr_cu_pos_in_row =
5857
0
                                ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
5858
5859
0
                            if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
5860
0
                            {
5861
                                /* No wait for 1st row */
5862
0
                                cu_top_right_offset = -(MAX_CTB_SIZE);
5863
0
                                {
5864
0
                                    ihevce_tile_params_t *ps_col_tile_params =
5865
0
                                        ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
5866
0
                                         ps_ctxt->i4_tile_col_idx);
5867
5868
                                    /* No wait for 1st row */
5869
0
                                    cu_top_right_offset =
5870
0
                                        -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
5871
0
                                }
5872
0
                                cu_top_right_dep_pos = 0;
5873
0
                            }
5874
0
                            else
5875
0
                            {
5876
0
                                cu_top_right_offset = (cu_size) + 4;
5877
0
                                cu_top_right_dep_pos =
5878
0
                                    (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
5879
0
                            }
5880
5881
0
                            ihevce_dmgr_chk_row_row_sync(
5882
0
                                ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
5883
0
                                curr_cu_pos_in_row,
5884
0
                                cu_top_right_offset,
5885
0
                                cu_top_right_dep_pos,
5886
0
                                ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
5887
0
                                ps_ctxt->thrd_id);
5888
0
                        }
5889
5890
0
                        u1_use_mvp_from_top_row = 1;
5891
0
                    }
5892
0
                    else
5893
0
                    {
5894
0
                        s_nbr.u1_top_avail = 0;
5895
0
                        s_nbr.u1_top_lt_avail = 0;
5896
0
                        s_nbr.u1_top_rt_avail = 0;
5897
0
                    }
5898
0
                }
5899
0
                else
5900
0
                {
5901
0
                    u1_use_mvp_from_top_row = 1;
5902
0
                }
5903
0
            }
5904
            /* Call the MV prediction module to get MVP */
5905
1.03M
            ihevce_mv_pred(
5906
1.03M
                &ps_ctxt->s_mv_pred_ctxt,
5907
1.03M
                ps_top_nbr_4x4,
5908
1.03M
                ps_left_nbr_4x4,
5909
1.03M
                ps_topleft_nbr_4x4,
5910
1.03M
                nbr_4x4_left_strd,
5911
1.03M
                &s_nbr,
5912
1.03M
                NULL, /* colocated MV */
5913
1.03M
                ps_pu,
5914
1.03M
                &as_pred_mv[0],
5915
1.03M
                au1_is_top_used);
5916
1.03M
        }
5917
5918
        /* store the nbr 4x4 structure */
5919
4.18M
        ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
5920
4.18M
        ps_nbr_4x4->b1_intra_flag = 0;
5921
4.18M
        ps_nbr_4x4->b1_pred_l0_flag = 0;
5922
4.18M
        ps_nbr_4x4->b1_pred_l1_flag = 0;
5923
5924
        /* DC is default mode for inter cu, required for intra mode signalling */
5925
4.18M
        ps_nbr_4x4->b6_luma_intra_mode = 1;
5926
5927
        /* copy the motion vectors to neighbour structure */
5928
4.18M
        ps_nbr_4x4->mv = ps_pu->mv;
5929
5930
        /* copy the PU to final out pu */
5931
4.18M
        ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
5932
5933
        /* copy the PU to chroma */
5934
4.18M
        ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
5935
5936
        /* store the skip flag to final prms */
5937
4.18M
        ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
5938
5939
        /* MVP index & MVD calc is gated on skip/merge flag */
5940
4.18M
        if(0 == skip_or_merge_flag)
5941
1.03M
        {
5942
            /* calculate the MVDs and popluate the MVP idx for L0 */
5943
1.03M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
5944
940k
            {
5945
940k
                WORD32 idx0_cost, idx1_cost;
5946
5947
                /* calculate the ABS mvd for cand 0 */
5948
940k
                idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
5949
940k
                idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
5950
5951
                /* calculate the ABS mvd for cand 1 */
5952
940k
                if(u1_use_mvp_from_top_row)
5953
940k
                {
5954
940k
                    idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
5955
940k
                    idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
5956
940k
                }
5957
0
                else
5958
0
                {
5959
0
                    idx1_cost = INT_MAX;
5960
0
                }
5961
5962
                /* based on the least cost choose the mvp idx */
5963
940k
                if(idx0_cost <= idx1_cost)
5964
641k
                {
5965
641k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5966
641k
                        as_pred_mv[0].s_l0_mv.i2_mvx;
5967
641k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5968
641k
                        as_pred_mv[0].s_l0_mv.i2_mvy;
5969
5970
641k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
5971
641k
                }
5972
299k
                else
5973
299k
                {
5974
299k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
5975
299k
                        as_pred_mv[1].s_l0_mv.i2_mvx;
5976
299k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
5977
299k
                        as_pred_mv[1].s_l0_mv.i2_mvy;
5978
5979
299k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
5980
299k
                }
5981
5982
                /* set the pred l0 flag for neighbour storage */
5983
940k
                ps_nbr_4x4->b1_pred_l0_flag = 1;
5984
940k
            }
5985
            /* calculate the MVDs and popluate the MVP idx for L1 */
5986
1.03M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
5987
134k
            {
5988
134k
                WORD32 idx0_cost, idx1_cost;
5989
5990
                /* calculate the ABS mvd for cand 0 */
5991
134k
                idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
5992
134k
                idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
5993
5994
                /* calculate the ABS mvd for cand 1 */
5995
134k
                if(u1_use_mvp_from_top_row)
5996
134k
                {
5997
134k
                    idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
5998
134k
                    idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
5999
134k
                }
6000
0
                else
6001
0
                {
6002
0
                    idx1_cost = INT_MAX;
6003
0
                }
6004
6005
                /* based on the least cost choose the mvp idx */
6006
134k
                if(idx0_cost <= idx1_cost)
6007
90.1k
                {
6008
90.1k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6009
90.1k
                        as_pred_mv[0].s_l1_mv.i2_mvx;
6010
90.1k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6011
90.1k
                        as_pred_mv[0].s_l1_mv.i2_mvy;
6012
6013
90.1k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
6014
90.1k
                }
6015
44.2k
                else
6016
44.2k
                {
6017
44.2k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
6018
44.2k
                        as_pred_mv[1].s_l1_mv.i2_mvx;
6019
44.2k
                    ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
6020
44.2k
                        as_pred_mv[1].s_l1_mv.i2_mvy;
6021
6022
44.2k
                    ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
6023
44.2k
                }
6024
6025
                /* set the pred l1 flag for neighbour storage */
6026
134k
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6027
134k
            }
6028
6029
            /* set the merge flag to 0 */
6030
1.03M
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
6031
1.03M
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
6032
1.03M
        }
6033
3.14M
        else
6034
3.14M
        {
6035
            /* copy the merge index from candidate */
6036
3.14M
            ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
6037
6038
3.14M
            ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
6039
6040
3.14M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
6041
2.98M
            {
6042
                /* set the pred l0 flag for neighbour storage */
6043
2.98M
                ps_nbr_4x4->b1_pred_l0_flag = 1;
6044
2.98M
            }
6045
6046
            /* calculate the MVDs and popluate the MVP idx for L1 */
6047
3.14M
            if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
6048
603k
            {
6049
                /* set the pred l1 flag for neighbour storage */
6050
603k
                ps_nbr_4x4->b1_pred_l1_flag = 1;
6051
603k
            }
6052
3.14M
        }
6053
6054
        /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
6055
4.18M
        rdopt_cost = 0;
6056
6057
        /* copy the MV to colocated Mv structure */
6058
4.18M
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
6059
4.18M
        ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
6060
4.18M
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
6061
4.18M
        ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
6062
4.18M
        ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
6063
4.18M
        ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
6064
6065
        /* replicate neighbour 4x4 strcuture for entire partition */
6066
4.18M
        {
6067
4.18M
            WORD32 i, j;
6068
4.18M
            nbr_4x4_t *ps_tmp_4x4;
6069
6070
4.18M
            ps_tmp_4x4 = ps_nbr_4x4;
6071
6072
20.1M
            for(i = 0; i < (inter_pu_ht >> 2); i++)
6073
16.0M
            {
6074
104M
                for(j = 0; j < (inter_pu_wd >> 2); j++)
6075
88.0M
                {
6076
88.0M
                    ps_tmp_4x4[j] = *ps_nbr_4x4;
6077
88.0M
                }
6078
                /* row level update*/
6079
16.0M
                ps_tmp_4x4 += (cu_size >> 2);
6080
16.0M
            }
6081
4.18M
        }
6082
        /* set the neighbour map to 1 */
6083
4.18M
        ihevce_set_inter_nbr_map(
6084
4.18M
            ps_ctxt->pu1_ctb_nbr_map,
6085
4.18M
            ps_ctxt->i4_nbr_map_strd,
6086
4.18M
            cu_pos_x,
6087
4.18M
            cu_pos_y,
6088
4.18M
            (inter_pu_wd >> 2),
6089
4.18M
            (inter_pu_ht >> 2),
6090
4.18M
            1);
6091
        /* ----------- Motion Compensation for Luma ----------- */
6092
#if !ENABLE_MIXED_INTER_MODE_EVAL
6093
        {
6094
            IV_API_CALL_STATUS_T valid_mv_cand;
6095
6096
            /*If the inter candidate is neither merge cand nor skip cand
6097
            then calculate the mc.*/
6098
            if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
6099
            {
6100
                valid_mv_cand =
6101
                    ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
6102
6103
                /* assert if the MC is given a valid mv candidate */
6104
                ASSERT(valid_mv_cand == IV_SUCCESS);
6105
            }
6106
        }
6107
#endif
6108
4.18M
        if((2 == num_cu_part) && (0 == ctr))
6109
658k
        {
6110
            /* 2Nx__ partion case */
6111
658k
            if(inter_pu_wd == cu_size)
6112
525k
            {
6113
525k
                cu_pos_y += (inter_pu_ht >> 2);
6114
525k
                pu1_pred += (inter_pu_ht * pred_stride);
6115
525k
                ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
6116
525k
                ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
6117
525k
                ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
6118
525k
                ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
6119
525k
            }
6120
6121
            /* __x2N partion case */
6122
658k
            if(inter_pu_ht == cu_size)
6123
133k
            {
6124
133k
                cu_pos_x += (inter_pu_wd >> 2);
6125
133k
                pu1_pred += inter_pu_wd;
6126
133k
                ps_nbr_4x4 += (inter_pu_wd >> 2);
6127
133k
                ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
6128
133k
                ps_top_nbr_4x4 += (inter_pu_wd >> 2);
6129
133k
                ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
6130
133k
                nbr_4x4_left_strd = (cu_size >> 2);
6131
133k
            }
6132
658k
        }
6133
4.18M
    }
6134
6135
3.52M
    return (rdopt_cost);
6136
3.52M
}
6137
6138
/*!
6139
******************************************************************************
6140
* \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
6141
*
6142
* \brief
6143
*    Coding unit processing function for chroma special modes (Non-Luma modes)
6144
*
6145
* \param[in] ps_ctxt       enc_loop module ctxt pointer
6146
* \param[in] ps_chrm_cu_buf_prms    ctxt having chroma related prms
6147
* \param[in] ps_cu_analyse      pointer to cu analyse
6148
* \param[in] rd_opt_curr_idx    index in the array of RDopt params
6149
* \param[in] tu_mode            TU_EQ_CU or other case
6150
*
6151
* \return
6152
*    Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
6153
*
6154
* \author
6155
*  Ittiam
6156
*
6157
*****************************************************************************
6158
*/
6159
UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
6160
    cu_analyse_t *ps_cu_analyse,
6161
    ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
6162
    pf_intra_pred *ppf_chroma_ip,
6163
    pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
6164
    UWORD8 *pu1_src,
6165
    WORD32 i4_src_stride,
6166
    UWORD8 *pu1_pred,
6167
    WORD32 i4_pred_stride,
6168
    UWORD8 *pu1_ctb_nbr_map,
6169
    WORD32 i4_nbr_map_strd,
6170
    UWORD8 *pu1_ref_sub_out,
6171
    WORD32 i4_alpha_stim_multiplier,
6172
    UWORD8 u1_is_cu_noisy,
6173
    UWORD8 u1_trans_size,
6174
    UWORD8 u1_trans_idx,
6175
    UWORD8 u1_num_tus_in_cu,
6176
    UWORD8 u1_num_4x4_luma_blks_in_tu,
6177
    UWORD8 u1_enable_psyRDOPT,
6178
    UWORD8 u1_is_422)
6179
2.04M
{
6180
2.04M
    UWORD8 u1_chrm_mode;
6181
2.04M
    UWORD8 ctr;
6182
2.04M
    WORD32 i4_subtu_idx;
6183
6184
2.04M
    WORD32 i = 0;
6185
2.04M
    UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
6186
2.04M
    WORD32 i4_satd_had[4] = { 0 };
6187
2.04M
    WORD32 i4_best_satd_had = INT_MAX;
6188
2.04M
    UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6189
2.04M
    UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6190
2.04M
    WORD32 i4_num_sub_tus = u1_is_422 + 1;
6191
2.04M
    UWORD8 u1_best_chrm_mode = 0;
6192
6193
    /* Get the best satd among all possible modes */
6194
10.2M
    for(i = 0; i < 4; i++)
6195
8.18M
    {
6196
8.18M
        WORD32 left_strd = i4_src_stride;
6197
6198
8.18M
        u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
6199
8.18M
                                        : u1_chrm_modes[i];
6200
6201
        /* loop based on num tus in a cu */
6202
21.2M
        for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
6203
13.1M
        {
6204
13.1M
            WORD32 luma_nbr_flags;
6205
13.1M
            WORD32 chrm_pred_func_idx;
6206
6207
13.1M
            WORD32 i4_trans_size_m2 = u1_trans_size << 1;
6208
13.1M
            UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
6209
13.1M
                                 (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
6210
13.1M
            UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
6211
13.1M
                                  (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
6212
13.1M
            WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
6213
13.1M
            WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
6214
6215
13.1M
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6216
13.1M
                pu1_ctb_nbr_map,
6217
13.1M
                i4_nbr_map_strd,
6218
13.1M
                i4_curr_tu_pos_x,
6219
13.1M
                i4_curr_tu_pos_y,
6220
13.1M
                u1_num_4x4_luma_blks_in_tu,
6221
13.1M
                u1_num_4x4_luma_blks_in_tu);
6222
6223
26.2M
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6224
13.1M
            {
6225
13.1M
                WORD32 nbr_flags;
6226
6227
13.1M
                UWORD8 *pu1_cur_src =
6228
13.1M
                    pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
6229
13.1M
                UWORD8 *pu1_cur_pred =
6230
13.1M
                    pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
6231
13.1M
                UWORD8 *pu1_left = pu1_cur_src - 2;
6232
13.1M
                UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
6233
13.1M
                UWORD8 *pu1_top_left = pu1_top - 2;
6234
6235
13.1M
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6236
13.1M
                    luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
6237
6238
                /* call the chroma reference array substitution */
6239
13.1M
                pf_ref_substitution(
6240
13.1M
                    pu1_top_left,
6241
13.1M
                    pu1_top,
6242
13.1M
                    pu1_left,
6243
13.1M
                    left_strd,
6244
13.1M
                    u1_trans_size,
6245
13.1M
                    nbr_flags,
6246
13.1M
                    pu1_ref_sub_out,
6247
13.1M
                    1);
6248
6249
                /* use the look up to get the function idx */
6250
13.1M
                chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
6251
6252
                /* call the intra prediction function */
6253
13.1M
                ppf_chroma_ip[chrm_pred_func_idx](
6254
13.1M
                    pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
6255
6256
13.1M
                if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
6257
13.1M
                {
6258
                    /* compute Hadamard-transform satd : Cb */
6259
13.1M
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6260
13.1M
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6261
6262
                    /* compute Hadamard-transform satd : Cr */
6263
13.1M
                    i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
6264
13.1M
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6265
13.1M
                }
6266
0
                else
6267
0
                {
6268
0
                    WORD32 i4_satd;
6269
6270
                    /* compute Hadamard-transform satd : Cb */
6271
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6272
0
                        pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
6273
6274
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6275
0
                        pu1_cur_src,
6276
0
                        i4_src_stride,
6277
0
                        pu1_cur_pred,
6278
0
                        i4_pred_stride,
6279
0
                        i4_satd,
6280
0
                        i4_alpha_stim_multiplier,
6281
0
                        u1_trans_size,
6282
0
                        0,
6283
0
                        u1_enable_psyRDOPT,
6284
0
                        U_PLANE);
6285
6286
0
                    i4_satd_had[i] += i4_satd;
6287
6288
                    /* compute Hadamard-transform satd : Cr */
6289
0
                    i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
6290
0
                        pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
6291
6292
0
                    i4_satd = ihevce_inject_stim_into_distortion(
6293
0
                        pu1_cur_src,
6294
0
                        i4_src_stride,
6295
0
                        pu1_cur_pred,
6296
0
                        i4_pred_stride,
6297
0
                        i4_satd,
6298
0
                        i4_alpha_stim_multiplier,
6299
0
                        u1_trans_size,
6300
0
                        0,
6301
0
                        u1_enable_psyRDOPT,
6302
0
                        V_PLANE);
6303
6304
0
                    i4_satd_had[i] += i4_satd;
6305
0
                }
6306
13.1M
            }
6307
6308
            /* set the neighbour map to 1 */
6309
13.1M
            ihevce_set_nbr_map(
6310
13.1M
                pu1_ctb_nbr_map,
6311
13.1M
                i4_nbr_map_strd,
6312
13.1M
                i4_curr_tu_pos_x,
6313
13.1M
                i4_curr_tu_pos_y,
6314
13.1M
                u1_num_4x4_luma_blks_in_tu,
6315
13.1M
                1);
6316
13.1M
        }
6317
6318
        /* set the neighbour map to 0 */
6319
8.18M
        ihevce_set_nbr_map(
6320
8.18M
            pu1_ctb_nbr_map,
6321
8.18M
            i4_nbr_map_strd,
6322
8.18M
            (ps_cu_analyse->b3_cu_pos_x << 1),
6323
8.18M
            (ps_cu_analyse->b3_cu_pos_y << 1),
6324
8.18M
            (ps_cu_analyse->u1_cu_size >> 2),
6325
8.18M
            0);
6326
6327
        /* Get the least SATD and corresponding mode */
6328
8.18M
        if(i4_best_satd_had > i4_satd_had[i])
6329
2.41M
        {
6330
2.41M
            i4_best_satd_had = i4_satd_had[i];
6331
2.41M
            u1_best_chrm_mode = u1_chrm_mode;
6332
2.41M
        }
6333
8.18M
    }
6334
6335
2.04M
    return u1_best_chrm_mode;
6336
2.04M
}
6337
6338
void ihevce_intra_chroma_pred_mode_selector(
6339
    ihevce_enc_loop_ctxt_t *ps_ctxt,
6340
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
6341
    cu_analyse_t *ps_cu_analyse,
6342
    WORD32 rd_opt_curr_idx,
6343
    WORD32 tu_mode,
6344
    WORD32 i4_alpha_stim_multiplier,
6345
    UWORD8 u1_is_cu_noisy)
6346
2.04M
{
6347
2.04M
    chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
6348
6349
2.04M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
6350
6351
2.04M
    UWORD8 *pu1_pred;
6352
2.04M
    WORD32 trans_size;
6353
2.04M
    WORD32 num_tus_in_cu;
6354
2.04M
    WORD32 pred_strd;
6355
2.04M
    WORD32 ctr;
6356
2.04M
    WORD32 i4_subtu_idx;
6357
2.04M
    WORD32 i4_num_sub_tus;
6358
2.04M
    WORD32 trans_idx;
6359
2.04M
    WORD32 scan_idx;
6360
2.04M
    WORD32 num_4x4_luma_in_tu;
6361
2.04M
    WORD32 cu_pos_x;
6362
2.04M
    WORD32 cu_pos_y;
6363
6364
2.04M
    recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
6365
2.04M
                                                  &ps_ctxt->as_cu_prms[1].s_recon_datastore };
6366
6367
2.04M
    LWORD64 chrm_cod_cost = 0;
6368
2.04M
    WORD32 chrm_tu_bits = 0;
6369
2.04M
    WORD32 best_chrm_mode = DM_CHROMA_IDX;
6370
2.04M
    UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
6371
2.04M
    WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
6372
2.04M
    UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
6373
2.04M
    UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
6374
2.04M
    UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
6375
2.04M
    WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
6376
2.04M
    WORD32 cu_size = ps_cu_analyse->u1_cu_size;
6377
2.04M
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
6378
2.04M
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
6379
2.04M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
6380
6381
2.04M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
6382
2.04M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
6383
2.04M
    i4_num_sub_tus = (u1_is_422 == 1) + 1;
6384
6385
#if DISABLE_RDOQ_INTRA
6386
    i4_perform_rdoq = 0;
6387
#endif
6388
6389
2.04M
    if(TU_EQ_CU == tu_mode)
6390
1.63M
    {
6391
1.63M
        num_tus_in_cu = 1;
6392
1.63M
        trans_size = cu_size >> 1;
6393
1.63M
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6394
1.63M
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6395
1.63M
    }
6396
410k
    else
6397
410k
    {
6398
410k
        num_tus_in_cu = 4;
6399
410k
        trans_size = cu_size >> 2;
6400
410k
        num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
6401
6402
        /* For 8x8 CU only one TU */
6403
410k
        if(MIN_TU_SIZE > trans_size)
6404
0
        {
6405
0
            trans_size = MIN_TU_SIZE;
6406
0
            num_tus_in_cu = 1;
6407
            /* chroma nbr avail. is derived based on luma.
6408
            for 4x4 chrm use 8x8 luma's size */
6409
0
            num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
6410
0
        }
6411
6412
410k
        ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
6413
410k
    }
6414
6415
    /* Can't be TU_EQ_SUBCU case */
6416
2.04M
    ASSERT(TU_EQ_SUBCU != tu_mode);
6417
6418
    /* translate the transform size to index */
6419
2.04M
    trans_idx = trans_size >> 2;
6420
6421
2.04M
    pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
6422
6423
2.04M
    pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
6424
6425
    /* for 16x16 cases */
6426
2.04M
    if(16 == trans_size)
6427
330k
    {
6428
330k
        trans_idx = 3;
6429
330k
    }
6430
6431
2.04M
    best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
6432
2.04M
        ps_cu_analyse,
6433
2.04M
        ihevc_intra_pred_chroma_ref_substitution_fptr,
6434
2.04M
        ps_ctxt->apf_chrm_ip,
6435
2.04M
        ps_ctxt->apf_chrm_resd_trns_had,
6436
2.04M
        pu1_chrm_src,
6437
2.04M
        chrm_src_stride,
6438
2.04M
        pu1_pred,
6439
2.04M
        pred_strd,
6440
2.04M
        ps_ctxt->pu1_ctb_nbr_map,
6441
2.04M
        ps_ctxt->i4_nbr_map_strd,
6442
2.04M
        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6443
2.04M
        i4_alpha_stim_multiplier,
6444
2.04M
        u1_is_cu_noisy,
6445
2.04M
        trans_size,
6446
2.04M
        trans_idx,
6447
2.04M
        num_tus_in_cu,
6448
2.04M
        num_4x4_luma_in_tu,
6449
2.04M
        ps_ctxt->u1_enable_psyRDOPT,
6450
2.04M
        u1_is_422);
6451
6452
    /* Store the best chroma mode */
6453
2.04M
    ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
6454
6455
    /* evaluate RDOPT cost for the Best mode */
6456
2.04M
    {
6457
2.04M
        WORD32 i4_subtu_pos_x;
6458
2.04M
        WORD32 i4_subtu_pos_y;
6459
2.04M
        UWORD8 u1_compute_spatial_ssd;
6460
6461
2.04M
        WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
6462
2.04M
        WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
6463
        /* State for prefix bin of chroma intra pred mode before CU encode */
6464
2.04M
        UWORD8 u1_chroma_intra_mode_prefix_state =
6465
2.04M
            ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
6466
2.04M
        WORD32 luma_trans_size = trans_size << 1;
6467
2.04M
        WORD32 calc_recon = 0;
6468
2.04M
        UWORD8 *pu1_left = pu1_cu_left;
6469
2.04M
        UWORD8 *pu1_top = pu1_cu_top;
6470
2.04M
        UWORD8 *pu1_top_left = pu1_cu_top_left;
6471
2.04M
        WORD32 left_strd = cu_left_stride;
6472
6473
2.04M
        if(ps_ctxt->i1_cu_qp_delta_enable)
6474
1.02M
        {
6475
1.02M
            ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1);
6476
1.02M
        }
6477
6478
2.04M
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
6479
1.24M
                                 (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
6480
1.24M
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6481
6482
2.04M
        if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
6483
0
        {
6484
0
            u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
6485
0
                                     CONVERT_SSDS_TO_SPATIAL_DOMAIN;
6486
0
        }
6487
6488
        /* get the 4x4 level postion of current cu */
6489
2.04M
        cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
6490
2.04M
        cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
6491
6492
2.04M
        calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
6493
6494
2.04M
        if(calc_recon || u1_compute_spatial_ssd)
6495
1.37M
        {
6496
1.37M
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6497
1.37M
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
6498
1.37M
        }
6499
674k
        else
6500
674k
        {
6501
674k
            aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6502
674k
            aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
6503
674k
        }
6504
6505
        /* loop based on num tus in a cu */
6506
5.32M
        for(ctr = 0; ctr < num_tus_in_cu; ctr++)
6507
3.27M
        {
6508
3.27M
            WORD16 *pi2_cur_deq_data_cb;
6509
3.27M
            WORD16 *pi2_cur_deq_data_cr;
6510
6511
3.27M
            WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
6512
3.27M
            WORD32 luma_nbr_flags = 0;
6513
6514
3.27M
            luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
6515
3.27M
                ps_ctxt->pu1_ctb_nbr_map,
6516
3.27M
                ps_ctxt->i4_nbr_map_strd,
6517
3.27M
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6518
3.27M
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6519
3.27M
                (luma_trans_size >> 2),
6520
3.27M
                (luma_trans_size >> 2));
6521
6522
6.55M
            for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
6523
3.27M
            {
6524
3.27M
                WORD32 cbf, num_bytes;
6525
3.27M
                LWORD64 trans_ssd_u, trans_ssd_v;
6526
3.27M
                UWORD8 u1_is_recon_available;
6527
6528
3.27M
                WORD32 trans_size_m2 = trans_size << 1;
6529
3.27M
                UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
6530
3.27M
                                      (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
6531
3.27M
                                      (i4_subtu_idx * trans_size * chrm_src_stride);
6532
3.27M
                UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
6533
3.27M
                                       (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
6534
3.27M
                                       (i4_subtu_idx * trans_size * pred_strd);
6535
3.27M
                WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6536
3.27M
                UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
6537
3.27M
                                             ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
6538
3.27M
                                        ((ctr & 1) * trans_size_m2) +
6539
3.27M
                                        (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
6540
3.27M
                                        (i4_subtu_idx * trans_size * i4_recon_stride);
6541
6542
                /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
6543
                chroma coeff/iq for high quality intra SATD special modes. Will
6544
                be over written by coeff of luma mode in chroma_rdopt call */
6545
3.27M
                UWORD8 *pu1_ecd_data_cb =
6546
3.27M
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
6547
3.27M
                UWORD8 *pu1_ecd_data_cr =
6548
3.27M
                    &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
6549
6550
3.27M
                WORD32 chrm_pred_func_idx = 0;
6551
3.27M
                LWORD64 curr_cb_cod_cost = 0;
6552
3.27M
                LWORD64 curr_cr_cod_cost = 0;
6553
3.27M
                WORD32 nbr_flags = 0;
6554
6555
3.27M
                i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
6556
3.27M
                i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
6557
3.27M
                                 ((i4_subtu_idx * trans_size) >> 2);
6558
3.27M
                pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
6559
3.27M
                                      ((ctr & 1) * trans_size) +
6560
3.27M
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6561
3.27M
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6562
3.27M
                pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
6563
3.27M
                                      ((ctr & 1) * trans_size) +
6564
3.27M
                                      (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
6565
3.27M
                                      (i4_subtu_idx * trans_size * deq_data_strd);
6566
6567
                /* left cu boundary */
6568
3.27M
                if(0 == i4_subtu_pos_x)
6569
2.45M
                {
6570
2.45M
                    left_strd = cu_left_stride;
6571
2.45M
                    pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
6572
2.45M
                }
6573
821k
                else
6574
821k
                {
6575
821k
                    pu1_left = pu1_cur_recon - 2;
6576
821k
                    left_strd = i4_recon_stride;
6577
821k
                }
6578
6579
                /* top cu boundary */
6580
3.27M
                if(0 == i4_subtu_pos_y)
6581
2.45M
                {
6582
2.45M
                    pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
6583
2.45M
                }
6584
821k
                else
6585
821k
                {
6586
821k
                    pu1_top = pu1_cur_recon - i4_recon_stride;
6587
821k
                }
6588
6589
                /* by default top left is set to cu top left */
6590
3.27M
                pu1_top_left = pu1_cu_top_left;
6591
6592
                /* top left based on position */
6593
3.27M
                if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
6594
410k
                {
6595
410k
                    pu1_top_left = pu1_left - left_strd;
6596
410k
                }
6597
2.86M
                else if(0 != i4_subtu_pos_x)
6598
821k
                {
6599
821k
                    pu1_top_left = pu1_top - 2;
6600
821k
                }
6601
6602
                /* populate the coeffs scan idx */
6603
3.27M
                scan_idx = SCAN_DIAG_UPRIGHT;
6604
6605
                /* RDOPT copy States :  TU init (best until prev TU) to current */
6606
3.27M
                COPY_CABAC_STATES(
6607
3.27M
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6608
3.27M
                         .s_cabac_ctxt.au1_ctxt_models[0],
6609
3.27M
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6610
3.27M
                    IHEVC_CAB_CTXT_END);
6611
6612
                /* for 4x4 transforms based on intra pred mode scan is choosen*/
6613
3.27M
                if(4 == trans_size)
6614
1.56M
                {
6615
                    /* for modes from 22 upto 30 horizontal scan is used */
6616
1.56M
                    if((best_chrm_mode > 21) && (best_chrm_mode < 31))
6617
46.3k
                    {
6618
46.3k
                        scan_idx = SCAN_HORZ;
6619
46.3k
                    }
6620
                    /* for modes from 6 upto 14 horizontal scan is used */
6621
1.51M
                    else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
6622
111k
                    {
6623
111k
                        scan_idx = SCAN_VERT;
6624
111k
                    }
6625
1.56M
                }
6626
6627
3.27M
                nbr_flags = ihevce_get_intra_chroma_tu_nbr(
6628
3.27M
                    luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
6629
6630
                /* call the chroma reference array substitution */
6631
3.27M
                ihevc_intra_pred_chroma_ref_substitution_fptr(
6632
3.27M
                    pu1_top_left,
6633
3.27M
                    pu1_top,
6634
3.27M
                    pu1_left,
6635
3.27M
                    left_strd,
6636
3.27M
                    trans_size,
6637
3.27M
                    nbr_flags,
6638
3.27M
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6639
3.27M
                    1);
6640
6641
                /* use the look up to get the function idx */
6642
3.27M
                chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
6643
6644
                /* call the intra prediction function */
6645
3.27M
                ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
6646
3.27M
                    (UWORD8 *)ps_ctxt->pv_ref_sub_out,
6647
3.27M
                    1,
6648
3.27M
                    pu1_cur_pred,
6649
3.27M
                    pred_strd,
6650
3.27M
                    trans_size,
6651
3.27M
                    best_chrm_mode);
6652
6653
                /* UPLANE RDOPT Loop */
6654
3.27M
                {
6655
3.27M
                    WORD32 tu_bits;
6656
6657
3.27M
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6658
3.27M
                        ps_ctxt,
6659
3.27M
                        pu1_cur_pred,
6660
3.27M
                        pred_strd,
6661
3.27M
                        pu1_cur_src,
6662
3.27M
                        chrm_src_stride,
6663
3.27M
                        pi2_cur_deq_data_cb,
6664
3.27M
                        deq_data_strd,
6665
3.27M
                        pu1_cur_recon,
6666
3.27M
                        i4_recon_stride,
6667
3.27M
                        pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
6668
3.27M
                        ps_ctxt->au1_cu_csbf,
6669
3.27M
                        ps_ctxt->i4_cu_csbf_strd,
6670
3.27M
                        trans_size,
6671
3.27M
                        scan_idx,
6672
3.27M
                        1,
6673
3.27M
                        &num_bytes,
6674
3.27M
                        &tu_bits,
6675
3.27M
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6676
3.27M
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6677
3.27M
                        &u1_is_recon_available,
6678
3.27M
                        i4_perform_sbh,
6679
3.27M
                        i4_perform_rdoq,
6680
3.27M
                        &trans_ssd_u,
6681
3.27M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6682
3.27M
                        i4_alpha_stim_multiplier,
6683
3.27M
                        u1_is_cu_noisy,
6684
3.27M
#endif
6685
3.27M
                        0,
6686
3.27M
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6687
3.27M
                        U_PLANE);
6688
6689
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6690
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6691
                    {
6692
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6693
                        trans_ssd_u = ihevce_inject_stim_into_distortion(
6694
                            pu1_cur_src,
6695
                            chrm_src_stride,
6696
                            pu1_cur_pred,
6697
                            pred_strd,
6698
                            trans_ssd_u,
6699
                            i4_alpha_stim_multiplier,
6700
                            trans_size,
6701
                            0,
6702
                            ps_ctxt->u1_enable_psyRDOPT,
6703
                            U_PLANE);
6704
#else
6705
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6706
                        {
6707
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6708
                                pu1_cur_src,
6709
                                chrm_src_stride,
6710
                                pu1_cur_recon,
6711
                                i4_recon_stride,
6712
                                trans_ssd_u,
6713
                                i4_alpha_stim_multiplier,
6714
                                trans_size,
6715
                                0,
6716
                                ps_ctxt->u1_enable_psyRDOPT,
6717
                                U_PLANE);
6718
                        }
6719
                        else
6720
                        {
6721
                            trans_ssd_u = ihevce_inject_stim_into_distortion(
6722
                                pu1_cur_src,
6723
                                chrm_src_stride,
6724
                                pu1_cur_pred,
6725
                                pred_strd,
6726
                                trans_ssd_u,
6727
                                i4_alpha_stim_multiplier,
6728
                                trans_size,
6729
                                0,
6730
                                ps_ctxt->u1_enable_psyRDOPT,
6731
                                U_PLANE);
6732
                        }
6733
#endif
6734
                    }
6735
#endif
6736
6737
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6738
3.27M
                    if(0 != cbf)
6739
373k
                    {
6740
373k
                        memcpy(
6741
373k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6742
373k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6743
373k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6744
373k
                            IHEVC_CAB_CTXT_END);
6745
373k
                    }
6746
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6747
2.90M
                    else
6748
2.90M
                    {
6749
2.90M
                        memcpy(
6750
2.90M
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6751
2.90M
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6752
2.90M
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6753
2.90M
                            IHEVC_CAB_CTXT_END);
6754
2.90M
                    }
6755
6756
3.27M
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6757
503k
                    {
6758
503k
                        ihevce_chroma_it_recon_fxn(
6759
503k
                            ps_ctxt,
6760
503k
                            pi2_cur_deq_data_cb,
6761
503k
                            deq_data_strd,
6762
503k
                            pu1_cur_pred,
6763
503k
                            pred_strd,
6764
503k
                            pu1_cur_recon,
6765
503k
                            i4_recon_stride,
6766
503k
                            (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
6767
503k
                            trans_size,
6768
503k
                            cbf,
6769
503k
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
6770
503k
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
6771
503k
                            U_PLANE);
6772
503k
                    }
6773
6774
3.27M
                    ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
6775
3.27M
                    curr_cb_cod_cost =
6776
3.27M
                        trans_ssd_u +
6777
3.27M
                        COMPUTE_RATE_COST_CLIP30(
6778
3.27M
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6779
3.27M
                    chrm_tu_bits += tu_bits;
6780
3.27M
                    ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
6781
3.27M
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
6782
3.27M
                        num_bytes;
6783
3.27M
                }
6784
6785
                /* VPLANE RDOPT Loop */
6786
3.27M
                {
6787
3.27M
                    WORD32 tu_bits;
6788
6789
3.27M
                    cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
6790
3.27M
                        ps_ctxt,
6791
3.27M
                        pu1_cur_pred,
6792
3.27M
                        pred_strd,
6793
3.27M
                        pu1_cur_src,
6794
3.27M
                        chrm_src_stride,
6795
3.27M
                        pi2_cur_deq_data_cr,
6796
3.27M
                        deq_data_strd,
6797
3.27M
                        pu1_cur_recon,
6798
3.27M
                        i4_recon_stride,
6799
3.27M
                        pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
6800
3.27M
                        ps_ctxt->au1_cu_csbf,
6801
3.27M
                        ps_ctxt->i4_cu_csbf_strd,
6802
3.27M
                        trans_size,
6803
3.27M
                        scan_idx,
6804
3.27M
                        1,
6805
3.27M
                        &num_bytes,
6806
3.27M
                        &tu_bits,
6807
3.27M
                        &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6808
3.27M
                        &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6809
3.27M
                        &u1_is_recon_available,
6810
3.27M
                        i4_perform_sbh,
6811
3.27M
                        i4_perform_rdoq,
6812
3.27M
                        &trans_ssd_v,
6813
3.27M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
6814
3.27M
                        i4_alpha_stim_multiplier,
6815
3.27M
                        u1_is_cu_noisy,
6816
3.27M
#endif
6817
3.27M
                        0,
6818
3.27M
                        u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
6819
3.27M
                        V_PLANE);
6820
6821
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
6822
                    if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
6823
                    {
6824
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
6825
                        trans_ssd_v = ihevce_inject_stim_into_distortion(
6826
                            pu1_cur_src,
6827
                            chrm_src_stride,
6828
                            pu1_cur_pred,
6829
                            pred_strd,
6830
                            trans_ssd_v,
6831
                            i4_alpha_stim_multiplier,
6832
                            trans_size,
6833
                            0,
6834
                            ps_ctxt->u1_enable_psyRDOPT,
6835
                            V_PLANE);
6836
#else
6837
                        if(u1_compute_spatial_ssd && u1_is_recon_available)
6838
                        {
6839
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6840
                                pu1_cur_src,
6841
                                chrm_src_stride,
6842
                                pu1_cur_recon,
6843
                                i4_recon_stride,
6844
                                trans_ssd_v,
6845
                                i4_alpha_stim_multiplier,
6846
                                trans_size,
6847
                                0,
6848
                                ps_ctxt->u1_enable_psyRDOPT,
6849
                                V_PLANE);
6850
                        }
6851
                        else
6852
                        {
6853
                            trans_ssd_v = ihevce_inject_stim_into_distortion(
6854
                                pu1_cur_src,
6855
                                chrm_src_stride,
6856
                                pu1_cur_pred,
6857
                                pred_strd,
6858
                                trans_ssd_v,
6859
                                i4_alpha_stim_multiplier,
6860
                                trans_size,
6861
                                0,
6862
                                ps_ctxt->u1_enable_psyRDOPT,
6863
                                V_PLANE);
6864
                        }
6865
#endif
6866
                    }
6867
#endif
6868
6869
                    /* RDOPT copy States :  New updated after curr TU to TU init */
6870
3.27M
                    if(0 != cbf)
6871
361k
                    {
6872
361k
                        COPY_CABAC_STATES(
6873
361k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6874
361k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6875
361k
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6876
361k
                            IHEVC_CAB_CTXT_END);
6877
361k
                    }
6878
                    /* RDOPT copy States :  Restoring back the Cb init state to Cr */
6879
2.91M
                    else
6880
2.91M
                    {
6881
2.91M
                        COPY_CABAC_STATES(
6882
2.91M
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
6883
2.91M
                                 .s_cabac_ctxt.au1_ctxt_models[0],
6884
2.91M
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
6885
2.91M
                            IHEVC_CAB_CTXT_END);
6886
2.91M
                    }
6887
6888
3.27M
                    if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
6889
503k
                    {
6890
503k
                        ihevce_chroma_it_recon_fxn(
6891
503k
                            ps_ctxt,
6892
503k
                            pi2_cur_deq_data_cr,
6893
503k
                            deq_data_strd,
6894
503k
                            pu1_cur_pred,
6895
503k
                            pred_strd,
6896
503k
                            pu1_cur_recon,
6897
503k
                            i4_recon_stride,
6898
503k
                            (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
6899
503k
                            trans_size,
6900
503k
                            cbf,
6901
503k
                            ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
6902
503k
                            ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
6903
503k
                            V_PLANE);
6904
503k
                    }
6905
6906
3.27M
                    ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
6907
3.27M
                    curr_cr_cod_cost =
6908
3.27M
                        trans_ssd_v +
6909
3.27M
                        COMPUTE_RATE_COST_CLIP30(
6910
3.27M
                            tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
6911
3.27M
                    chrm_tu_bits += tu_bits;
6912
3.27M
                    ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
6913
3.27M
                    ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
6914
3.27M
                        num_bytes;
6915
3.27M
                }
6916
6917
3.27M
                chrm_cod_cost += curr_cb_cod_cost;
6918
3.27M
                chrm_cod_cost += curr_cr_cod_cost;
6919
3.27M
            }
6920
6921
            /* set the neighbour map to 1 */
6922
3.27M
            ihevce_set_nbr_map(
6923
3.27M
                ps_ctxt->pu1_ctb_nbr_map,
6924
3.27M
                ps_ctxt->i4_nbr_map_strd,
6925
3.27M
                (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
6926
3.27M
                (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
6927
3.27M
                (luma_trans_size >> 2),
6928
3.27M
                1);
6929
3.27M
        }
6930
6931
        /* set the neighbour map to 0 */
6932
2.04M
        ihevce_set_nbr_map(
6933
2.04M
            ps_ctxt->pu1_ctb_nbr_map,
6934
2.04M
            ps_ctxt->i4_nbr_map_strd,
6935
2.04M
            (ps_cu_analyse->b3_cu_pos_x << 1),
6936
2.04M
            (ps_cu_analyse->b3_cu_pos_y << 1),
6937
2.04M
            (ps_cu_analyse->u1_cu_size >> 2),
6938
2.04M
            0);
6939
6940
        /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
6941
        /* This is done by adding the bits for signalling chroma mode (0-3)    */
6942
        /* and subtracting the bits for chroma mode same as luma mode (4)      */
6943
2.04M
#if CHROMA_RDOPT_ENABLE
6944
2.04M
        {
6945
            /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
6946
2.04M
            WORD32 bits_frac_1 =
6947
2.04M
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
6948
6949
2.04M
            WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
6950
6951
            /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
6952
2.04M
            WORD32 bits_for_mode4 =
6953
2.04M
                gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
6954
6955
            /* accumulate into final rd cost for chroma */
6956
2.04M
            ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
6957
2.04M
                (bits_for_mode_0to3 - bits_for_mode4),
6958
2.04M
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
6959
2.04M
                (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
6960
6961
2.04M
            chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
6962
2.04M
        }
6963
2.04M
#endif
6964
6965
2.04M
        if(ps_ctxt->u1_enable_psyRDOPT)
6966
0
        {
6967
0
            UWORD8 *pu1_recon_cu;
6968
0
            WORD32 recon_stride;
6969
0
            WORD32 curr_pos_x;
6970
0
            WORD32 curr_pos_y;
6971
0
            WORD32 start_index;
6972
0
            WORD32 num_horz_cu_in_ctb;
6973
0
            WORD32 had_block_size;
6974
6975
            /* tODO: sreenivasa ctb size has to be used appropriately */
6976
0
            had_block_size = 8;
6977
0
            num_horz_cu_in_ctb = 2 * 64 / had_block_size;
6978
0
            curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6979
0
            curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
6980
0
            recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
6981
0
            pu1_recon_cu =
6982
0
                aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)];  //
6983
6984
            /* start index to index the source satd of curr cu int he current ctb*/
6985
0
            start_index = 2 * (curr_pos_x / had_block_size) +
6986
0
                          (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
6987
6988
0
            {
6989
0
                chrm_cod_cost += ihevce_psy_rd_cost_croma(
6990
0
                    ps_ctxt->ai4_source_chroma_satd,
6991
0
                    pu1_recon_cu,
6992
0
                    recon_stride,
6993
0
                    1,  //
6994
0
                    cu_size,
6995
0
                    0,  // pic type
6996
0
                    0,  //layer id
6997
0
                    ps_ctxt->i4_satd_lamda,  // lambda
6998
0
                    start_index,
6999
0
                    ps_ctxt->u1_is_input_data_hbd,  // 8 bit
7000
0
                    ps_ctxt->u1_chroma_array_type,
7001
0
                    &ps_ctxt->s_cmn_opt_func
7002
7003
0
                );  // chroma subsampling 420
7004
0
            }
7005
0
        }
7006
7007
2.04M
        ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
7008
2.04M
        ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
7009
7010
2.04M
        memcpy(
7011
2.04M
            &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
7012
2.04M
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7013
2.04M
            IHEVC_CAB_CTXT_END);
7014
2.04M
    }
7015
2.04M
}
7016
7017
/*!
7018
******************************************************************************
7019
* \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
7020
*
7021
* \brief
7022
*    Coding unit processing function for chroma
7023
*
7024
* \param[in] ps_ctxt    enc_loop module ctxt pointer
7025
* \param[in] rd_opt_curr_idx index in the array of RDopt params
7026
* \param[in] func_proc_mode TU_EQ_CU or other case
7027
* \param[in] pu1_chrm_src  pointer to source data buffer
7028
* \param[in] chrm_src_stride   source buffer stride
7029
* \param[in] pu1_cu_left pointer to left recon data buffer
7030
* \param[in] pu1_cu_top  pointer to top recon data buffer
7031
* \param[in] pu1_cu_top_left pointer to top left recon data buffer
7032
* \param[in] left_stride left recon buffer stride
7033
* \param[out] cu_pos_x position x of current CU in CTB
7034
* \param[out] cu_pos_y position y of current CU in CTB
7035
* \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
7036
*
7037
* \return
7038
*    Chroma coding cost (cb adn Cr included)
7039
*
7040
* \author
7041
*  Ittiam
7042
*
7043
*****************************************************************************
7044
*/
7045
LWORD64 ihevce_chroma_cu_prcs_rdopt(
7046
    ihevce_enc_loop_ctxt_t *ps_ctxt,
7047
    WORD32 rd_opt_curr_idx,
7048
    WORD32 func_proc_mode,
7049
    UWORD8 *pu1_chrm_src,
7050
    WORD32 chrm_src_stride,
7051
    UWORD8 *pu1_cu_left,
7052
    UWORD8 *pu1_cu_top,
7053
    UWORD8 *pu1_cu_top_left,
7054
    WORD32 cu_left_stride,
7055
    WORD32 cu_pos_x,
7056
    WORD32 cu_pos_y,
7057
    WORD32 *pi4_chrm_tu_bits,
7058
    WORD32 i4_alpha_stim_multiplier,
7059
    UWORD8 u1_is_cu_noisy)
7060
9.71M
{
7061
9.71M
    tu_enc_loop_out_t *ps_tu;
7062
9.71M
    tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
7063
7064
9.71M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
7065
7066
9.71M
    UWORD8 *pu1_pred;
7067
9.71M
    UWORD8 *pu1_recon;
7068
9.71M
    WORD32 i4_recon_stride;
7069
9.71M
    WORD32 cu_size, trans_size = 0;
7070
9.71M
    WORD32 pred_strd;
7071
9.71M
    WORD32 ctr, i4_subtu_idx;
7072
9.71M
    WORD32 scan_idx;
7073
9.71M
    WORD32 u1_is_cu_coded_old;
7074
9.71M
    WORD32 init_bytes_offset;
7075
7076
9.71M
    enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
7077
9.71M
    recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
7078
7079
9.71M
    WORD32 total_bytes_offset = 0;
7080
9.71M
    LWORD64 chrm_cod_cost = 0;
7081
9.71M
    WORD32 chrm_tu_bits = 0;
7082
9.71M
    WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
7083
9.71M
    LWORD64 i8_ssd_cb = 0;
7084
9.71M
    WORD32 i4_bits_cb = 0;
7085
9.71M
    LWORD64 i8_ssd_cr = 0;
7086
9.71M
    WORD32 i4_bits_cr = 0;
7087
9.71M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
7088
9.71M
    UWORD8 u1_num_tus =
7089
        /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
7090
9.71M
        (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
7091
9.71M
            ? 1
7092
9.71M
            : ps_best_cu_prms->u2_num_tus_in_cu;
7093
9.71M
    UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
7094
9.71M
    UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
7095
5.80M
                                    (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
7096
5.26M
                                    CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7097
    /* Get the RDOPT cost of the best CU mode for early_exit */
7098
9.71M
    LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
7099
    /* Get the current running RDOPT (Luma RDOPT) for early_exit */
7100
9.71M
    LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
7101
9.71M
    WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
7102
9.71M
    WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
7103
7104
9.71M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
7105
9.71M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
7106
7107
9.71M
    if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
7108
0
    {
7109
0
        u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
7110
0
                                 CONVERT_SSDS_TO_SPATIAL_DOMAIN;
7111
0
    }
7112
7113
    /* Store the init bytes offset from luma */
7114
9.71M
    init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
7115
7116
    /* Unused pred buffer in merge_skip_pred_data_t structure is used as
7117
    Chroma pred storage buf. for final_recon function.
7118
    The buffer is split into two and used as a ping-pong buffer */
7119
9.71M
    pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
7120
9.71M
               rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
7121
9.71M
                                  (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
7122
7123
9.71M
    pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
7124
7125
9.71M
    pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
7126
9.71M
    i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
7127
9.71M
    cu_size = ps_best_cu_prms->u1_cu_size;
7128
9.71M
    chrm_tu_bits = 0;
7129
7130
    /* get the first TU pointer */
7131
9.71M
    ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7132
    /* get the first TU enc_loop temp prms pointer */
7133
9.71M
    ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7134
7135
9.71M
    if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7136
6.68M
    {
7137
        /* Mode signalled by intra prediction for luma */
7138
6.68M
        luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
7139
7140
#if DISABLE_RDOQ_INTRA
7141
        i4_perform_rdoq = 0;
7142
#endif
7143
6.68M
    }
7144
7145
3.03M
    else
7146
3.03M
    {
7147
3.03M
        UWORD8 *pu1_pred_org = pu1_pred;
7148
7149
        /* ------ Motion Compensation for Chroma -------- */
7150
6.72M
        for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
7151
3.68M
        {
7152
3.68M
            pu_t *ps_pu;
7153
3.68M
            WORD32 inter_pu_wd;
7154
3.68M
            WORD32 inter_pu_ht;
7155
7156
3.68M
            ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
7157
7158
3.68M
            inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
7159
3.68M
            inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
7160
3.68M
            inter_pu_ht <<= u1_is_422;
7161
7162
3.68M
            ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
7163
7164
3.68M
            if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
7165
1.31M
            {
7166
                /* 2Nx__ partion case */
7167
1.31M
                if(inter_pu_wd == cu_size)
7168
1.04M
                {
7169
1.04M
                    pu1_pred += (inter_pu_ht * pred_strd);
7170
1.04M
                }
7171
7172
                /* __x2N partion case */
7173
1.31M
                if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
7174
264k
                {
7175
264k
                    pu1_pred += inter_pu_wd;
7176
264k
                }
7177
1.31M
            }
7178
3.68M
        }
7179
7180
        /* restore the pred pointer to start for transform loop */
7181
3.03M
        pu1_pred = pu1_pred_org;
7182
3.03M
    }
7183
7184
    /* Used to store back only the luma based info. if SATD based chorma
7185
    mode also comes */
7186
9.71M
    u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
7187
7188
    /* evaluate chroma candidates (same as luma) and
7189
    if INTRA & HIGH_QUALITY compare with best SATD mode */
7190
9.71M
    {
7191
9.71M
        WORD32 calc_recon = 0, deq_data_strd;
7192
9.71M
        WORD16 *pi2_deq_data;
7193
9.71M
        UWORD8 *pu1_ecd_data;
7194
9.71M
        UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
7195
7196
9.71M
        pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
7197
9.71M
        pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
7198
9.71M
        deq_data_strd = cu_size;
7199
        /* update ecd buffer for storing coeff. */
7200
9.71M
        pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
7201
9.71M
        pu1_ecd_data += init_bytes_offset;
7202
        /* store chroma starting index */
7203
9.71M
        ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
7204
7205
        /* get the first TU pointer */
7206
9.71M
        ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
7207
9.71M
        ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
7208
7209
        /* Reset total_bytes_offset for each candidate */
7210
9.71M
        chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
7211
9.71M
                                          : luma_pred_mode;
7212
7213
9.71M
        total_bytes_offset = 0;
7214
7215
9.71M
        if(TU_EQ_SUBCU == func_proc_mode)
7216
405k
        {
7217
405k
            func_proc_mode = TU_EQ_CU_DIV2;
7218
405k
        }
7219
7220
        /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
7221
        TU_EQ_CU_DIV2 and  TU_EQ_SUBCU case */
7222
9.71M
        if(8 == cu_size)
7223
3.75M
        {
7224
3.75M
            func_proc_mode = TU_EQ_CU;
7225
3.75M
        }
7226
7227
        /* loop based on num tus in a cu */
7228
9.71M
        if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
7229
5.85M
           (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
7230
5.85M
            (chrm_pred_mode !=
7231
5.85M
             ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
7232
7.97M
        {
7233
            /* loop based on num tus in a cu */
7234
19.1M
            for(ctr = 0; ctr < u1_num_tus; ctr++)
7235
12.1M
            {
7236
12.1M
                WORD32 num_bytes = 0;
7237
12.1M
                LWORD64 curr_cb_cod_cost = 0;
7238
12.1M
                LWORD64 curr_cr_cod_cost = 0;
7239
12.1M
                WORD32 chrm_pred_func_idx = 0;
7240
12.1M
                UWORD8 u1_is_early_exit_condition_satisfied = 0;
7241
7242
                /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7   */
7243
                /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
7244
12.1M
                ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
7245
12.1M
                ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
7246
12.1M
                ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7247
12.1M
                ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
7248
12.1M
                ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7249
12.1M
                ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
7250
12.1M
                ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
7251
12.1M
                ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
7252
12.1M
                ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
7253
12.1M
                ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
7254
7255
                /* TU level inits */
7256
                /* check if chroma present flag is set */
7257
12.1M
                if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7258
11.4M
                {
7259
                    /* RDOPT copy States :  TU init (best until prev TU) to current */
7260
11.4M
                    COPY_CABAC_STATES(
7261
11.4M
                        &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
7262
11.4M
                             .s_cabac_ctxt.au1_ctxt_models[0],
7263
11.4M
                        &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7264
11.4M
                        IHEVC_CAB_CTXT_END);
7265
7266
                    /* get the current transform size */
7267
11.4M
                    trans_size = ps_tu->s_tu.b3_size;
7268
11.4M
                    trans_size = (1 << (trans_size + 1)); /* in chroma units */
7269
7270
                    /* since 2x2 transform is not allowed for chroma*/
7271
11.4M
                    if(2 == trans_size)
7272
852k
                    {
7273
852k
                        trans_size = 4;
7274
852k
                    }
7275
11.4M
                }
7276
7277
23.3M
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
7278
12.1M
                {
7279
12.1M
                    WORD32 cbf;
7280
12.1M
                    UWORD8 u1_is_recon_available;
7281
7282
12.1M
                    WORD32 nbr_flags = 0;
7283
12.1M
                    WORD32 zero_cols = 0;
7284
12.1M
                    WORD32 zero_rows = 0;
7285
7286
                    /* check if chroma present flag is set */
7287
12.1M
                    if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
7288
11.4M
                    {
7289
11.4M
                        UWORD8 *pu1_cur_pred;
7290
11.4M
                        UWORD8 *pu1_cur_recon;
7291
11.4M
                        UWORD8 *pu1_cur_src;
7292
11.4M
                        WORD16 *pi2_cur_deq_data;
7293
11.4M
                        WORD32 curr_pos_x, curr_pos_y;
7294
11.4M
                        LWORD64 trans_ssd_u, trans_ssd_v;
7295
7296
                        /* get the current sub-tu posx and posy w.r.t to cu */
7297
11.4M
                        curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
7298
11.4M
                        curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
7299
11.4M
                                     (i4_subtu_idx * trans_size);
7300
7301
                        /* 420sp case only vertical height will be half */
7302
11.4M
                        if(u1_is_422 == 0)
7303
11.4M
                        {
7304
11.4M
                            curr_pos_y >>= 1;
7305
11.4M
                        }
7306
7307
                        /* increment the pointers to start of current Sub-TU */
7308
11.4M
                        pu1_cur_recon = (pu1_recon + curr_pos_x);
7309
11.4M
                        pu1_cur_recon += (curr_pos_y * i4_recon_stride);
7310
11.4M
                        pu1_cur_src = (pu1_chrm_src + curr_pos_x);
7311
11.4M
                        pu1_cur_src += (curr_pos_y * chrm_src_stride);
7312
11.4M
                        pu1_cur_pred = (pu1_pred + curr_pos_x);
7313
11.4M
                        pu1_cur_pred += (curr_pos_y * pred_strd);
7314
11.4M
                        pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
7315
11.4M
                        pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
7316
7317
                        /* populate the coeffs scan idx */
7318
11.4M
                        scan_idx = SCAN_DIAG_UPRIGHT;
7319
7320
                        /* perform intra prediction only for Intra case */
7321
11.4M
                        if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
7322
7.62M
                        {
7323
7.62M
                            UWORD8 *pu1_top_left;
7324
7.62M
                            UWORD8 *pu1_top;
7325
7.62M
                            UWORD8 *pu1_left;
7326
7.62M
                            WORD32 left_strd;
7327
7328
7.62M
                            calc_recon = !u1_compute_spatial_ssd &&
7329
3.58M
                                         ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
7330
1.50M
                                         (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
7331
1.50M
                                          ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
7332
1.50M
                                          ((u1_num_tus == 4) && (ctr < 3)));
7333
7334
                            /* left cu boundary */
7335
7.62M
                            if(0 == curr_pos_x)
7336
5.83M
                            {
7337
5.83M
                                pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
7338
5.83M
                                left_strd = cu_left_stride;
7339
5.83M
                            }
7340
1.78M
                            else
7341
1.78M
                            {
7342
1.78M
                                pu1_left = pu1_cur_recon - 2;
7343
1.78M
                                left_strd = i4_recon_stride;
7344
1.78M
                            }
7345
7346
                            /* top cu boundary */
7347
7.62M
                            if(0 == curr_pos_y)
7348
5.84M
                            {
7349
5.84M
                                pu1_top = pu1_cu_top + curr_pos_x;
7350
5.84M
                            }
7351
1.77M
                            else
7352
1.77M
                            {
7353
1.77M
                                pu1_top = pu1_cur_recon - i4_recon_stride;
7354
1.77M
                            }
7355
7356
                            /* by default top left is set to cu top left */
7357
7.62M
                            pu1_top_left = pu1_cu_top_left;
7358
7359
                            /* top left based on position */
7360
7.62M
                            if((0 != curr_pos_y) && (0 == curr_pos_x))
7361
896k
                            {
7362
896k
                                pu1_top_left = pu1_left - cu_left_stride;
7363
896k
                            }
7364
6.72M
                            else if(0 != curr_pos_x)
7365
1.78M
                            {
7366
1.78M
                                pu1_top_left = pu1_top - 2;
7367
1.78M
                            }
7368
7369
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
7370
7.62M
                            if(4 == trans_size)
7371
4.11M
                            {
7372
                                /* for modes from 22 upto 30 horizontal scan is used */
7373
4.11M
                                if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
7374
1.40M
                                {
7375
1.40M
                                    scan_idx = SCAN_HORZ;
7376
1.40M
                                }
7377
                                /* for modes from 6 upto 14 horizontal scan is used */
7378
2.70M
                                else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
7379
738k
                                {
7380
738k
                                    scan_idx = SCAN_VERT;
7381
738k
                                }
7382
4.11M
                            }
7383
7384
7.62M
                            nbr_flags = ihevce_get_intra_chroma_tu_nbr(
7385
7.62M
                                ps_best_cu_prms->au4_nbr_flags[ctr],
7386
7.62M
                                i4_subtu_idx,
7387
7.62M
                                trans_size,
7388
7.62M
                                u1_is_422);
7389
7390
                            /* call the chroma reference array substitution */
7391
7.62M
                            ihevc_intra_pred_chroma_ref_substitution_fptr(
7392
7.62M
                                pu1_top_left,
7393
7.62M
                                pu1_top,
7394
7.62M
                                pu1_left,
7395
7.62M
                                left_strd,
7396
7.62M
                                trans_size,
7397
7.62M
                                nbr_flags,
7398
7.62M
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7399
7.62M
                                1);
7400
7401
                            /* use the look up to get the function idx */
7402
7.62M
                            chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
7403
7404
                            /* call the intra prediction function */
7405
7.62M
                            ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
7406
7.62M
                                (UWORD8 *)ps_ctxt->pv_ref_sub_out,
7407
7.62M
                                1,
7408
7.62M
                                pu1_cur_pred,
7409
7.62M
                                pred_strd,
7410
7.62M
                                trans_size,
7411
7.62M
                                chrm_pred_mode);
7412
7.62M
                        }
7413
7414
11.4M
                        if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
7415
4.55M
                        {
7416
4.55M
                            ps_recon_datastore->au1_is_chromaRecon_available[0] =
7417
4.55M
                                !ps_best_cu_prms->u1_skip_flag;
7418
4.55M
                        }
7419
6.87M
                        else if(!ctr && !i4_subtu_idx)
7420
3.42M
                        {
7421
3.42M
                            ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
7422
3.42M
                        }
7423
                        /************************************************************/
7424
                        /* recon loop is done for all cases including skip cu       */
7425
                        /* This is because skipping chroma reisdual based on luma   */
7426
                        /* skip decision can lead to chroma artifacts               */
7427
                        /************************************************************/
7428
                        /************************************************************/
7429
                        /*In the high quality and medium speed modes, wherein chroma*/
7430
                        /*and luma costs are included in the total cost calculation */
7431
                        /*the cost is just a ssd cost, and not that obtained through*/
7432
                        /*iq_it path                                                */
7433
                        /************************************************************/
7434
11.4M
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7435
10.5M
                        {
7436
10.5M
                            WORD32 tu_bits;
7437
7438
10.5M
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7439
10.5M
                                ps_ctxt,
7440
10.5M
                                pu1_cur_pred,
7441
10.5M
                                pred_strd,
7442
10.5M
                                pu1_cur_src,
7443
10.5M
                                chrm_src_stride,
7444
10.5M
                                pi2_cur_deq_data,
7445
10.5M
                                deq_data_strd,
7446
10.5M
                                pu1_cur_recon,
7447
10.5M
                                i4_recon_stride,
7448
10.5M
                                pu1_ecd_data + total_bytes_offset,
7449
10.5M
                                ps_ctxt->au1_cu_csbf,
7450
10.5M
                                ps_ctxt->i4_cu_csbf_strd,
7451
10.5M
                                trans_size,
7452
10.5M
                                scan_idx,
7453
10.5M
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7454
10.5M
                                &num_bytes,
7455
10.5M
                                &tu_bits,
7456
10.5M
                                &zero_cols,
7457
10.5M
                                &zero_rows,
7458
10.5M
                                &u1_is_recon_available,
7459
10.5M
                                i4_perform_sbh,
7460
10.5M
                                i4_perform_rdoq,
7461
10.5M
                                &trans_ssd_u,
7462
10.5M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7463
10.5M
                                i4_alpha_stim_multiplier,
7464
10.5M
                                u1_is_cu_noisy,
7465
10.5M
#endif
7466
10.5M
                                ps_best_cu_prms->u1_skip_flag,
7467
10.5M
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7468
10.5M
                                U_PLANE);
7469
7470
10.5M
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7471
5.65M
                            {
7472
5.65M
                                ps_recon_datastore
7473
5.65M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7474
5.65M
                                                                        [i4_subtu_idx] = 0;
7475
5.65M
                            }
7476
4.88M
                            else
7477
4.88M
                            {
7478
4.88M
                                ps_recon_datastore
7479
4.88M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7480
4.88M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7481
4.88M
                            }
7482
7483
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7484
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7485
                            {
7486
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7487
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7488
                                    pu1_cur_src,
7489
                                    chrm_src_stride,
7490
                                    pu1_cur_pred,
7491
                                    pred_strd,
7492
                                    trans_ssd_u,
7493
                                    i4_alpha_stim_multiplier,
7494
                                    trans_size,
7495
                                    0,
7496
                                    ps_ctxt->u1_enable_psyRDOPT,
7497
                                    U_PLANE);
7498
#else
7499
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7500
                                {
7501
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7502
                                        pu1_cur_src,
7503
                                        chrm_src_stride,
7504
                                        pu1_cur_recon,
7505
                                        i4_recon_stride,
7506
                                        trans_ssd_u,
7507
                                        i4_alpha_stim_multiplier,
7508
                                        trans_size,
7509
                                        0,
7510
                                        ps_ctxt->u1_enable_psyRDOPT,
7511
                                        U_PLANE);
7512
                                }
7513
                                else
7514
                                {
7515
                                    trans_ssd_u = ihevce_inject_stim_into_distortion(
7516
                                        pu1_cur_src,
7517
                                        chrm_src_stride,
7518
                                        pu1_cur_pred,
7519
                                        pred_strd,
7520
                                        trans_ssd_u,
7521
                                        i4_alpha_stim_multiplier,
7522
                                        trans_size,
7523
                                        0,
7524
                                        ps_ctxt->u1_enable_psyRDOPT,
7525
                                        U_PLANE);
7526
                                }
7527
#endif
7528
                            }
7529
#endif
7530
7531
10.5M
                            curr_cb_cod_cost =
7532
10.5M
                                trans_ssd_u +
7533
10.5M
                                COMPUTE_RATE_COST_CLIP30(
7534
10.5M
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7535
7536
10.5M
                            chrm_tu_bits += tu_bits;
7537
10.5M
                            i4_bits_cb += tu_bits;
7538
7539
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7540
10.5M
                            if(0 != cbf)
7541
1.81M
                            {
7542
1.81M
                                COPY_CABAC_STATES(
7543
1.81M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7544
1.81M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7545
1.81M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7546
1.81M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7547
1.81M
                                    IHEVC_CAB_CTXT_END);
7548
1.81M
                            }
7549
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7550
8.72M
                            else
7551
8.72M
                            {
7552
8.72M
                                COPY_CABAC_STATES(
7553
8.72M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7554
8.72M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7555
8.72M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7556
8.72M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7557
8.72M
                                    IHEVC_CAB_CTXT_END);
7558
8.72M
                            }
7559
7560
                            /* If Intra and TU=CU/2, need recon for next TUs */
7561
10.5M
                            if(calc_recon)
7562
1.14M
                            {
7563
1.14M
                                ihevce_chroma_it_recon_fxn(
7564
1.14M
                                    ps_ctxt,
7565
1.14M
                                    pi2_cur_deq_data,
7566
1.14M
                                    deq_data_strd,
7567
1.14M
                                    pu1_cur_pred,
7568
1.14M
                                    pred_strd,
7569
1.14M
                                    pu1_cur_recon,
7570
1.14M
                                    i4_recon_stride,
7571
1.14M
                                    (pu1_ecd_data + total_bytes_offset),
7572
1.14M
                                    trans_size,
7573
1.14M
                                    cbf,
7574
1.14M
                                    zero_cols,
7575
1.14M
                                    zero_rows,
7576
1.14M
                                    U_PLANE);
7577
7578
1.14M
                                ps_recon_datastore
7579
1.14M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7580
1.14M
                                                                        [i4_subtu_idx] = 0;
7581
1.14M
                            }
7582
9.39M
                            else
7583
9.39M
                            {
7584
9.39M
                                ps_recon_datastore
7585
9.39M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7586
9.39M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7587
9.39M
                            }
7588
10.5M
                        }
7589
894k
                        else
7590
894k
                        {
7591
                            /* num bytes is set to 0 */
7592
894k
                            num_bytes = 0;
7593
7594
                            /* cbf is returned as 0 */
7595
894k
                            cbf = 0;
7596
7597
894k
                            curr_cb_cod_cost = trans_ssd_u =
7598
7599
894k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7600
894k
                                    pu1_cur_pred,
7601
894k
                                    pu1_cur_src,
7602
894k
                                    pred_strd,
7603
894k
                                    chrm_src_stride,
7604
894k
                                    trans_size,
7605
894k
                                    trans_size,
7606
894k
                                    U_PLANE);
7607
7608
894k
                            if(u1_compute_spatial_ssd)
7609
501k
                            {
7610
                                /* buffer copy fromp pred to recon */
7611
7612
501k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7613
501k
                                    pu1_cur_pred,
7614
501k
                                    pred_strd,
7615
501k
                                    pu1_cur_recon,
7616
501k
                                    i4_recon_stride,
7617
501k
                                    trans_size,
7618
501k
                                    trans_size,
7619
501k
                                    U_PLANE);
7620
7621
501k
                                ps_recon_datastore
7622
501k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
7623
501k
                                                                        [i4_subtu_idx] = 0;
7624
501k
                            }
7625
7626
894k
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7627
0
                            {
7628
0
                                trans_ssd_u = ihevce_inject_stim_into_distortion(
7629
0
                                    pu1_cur_src,
7630
0
                                    chrm_src_stride,
7631
0
                                    pu1_cur_pred,
7632
0
                                    pred_strd,
7633
0
                                    trans_ssd_u,
7634
0
                                    i4_alpha_stim_multiplier,
7635
0
                                    trans_size,
7636
0
                                    0,
7637
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7638
0
                                    U_PLANE);
7639
0
                            }
7640
7641
894k
#if ENABLE_INTER_ZCU_COST
7642
#if !WEIGH_CHROMA_COST
7643
                            /* cbf = 0, accumulate cu not coded cost */
7644
                            ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
7645
#else
7646
                            /* cbf = 0, accumulate cu not coded cost */
7647
7648
894k
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7649
894k
                                (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7650
894k
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7651
894k
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7652
894k
#endif
7653
894k
#endif
7654
894k
                        }
7655
7656
#if !WEIGH_CHROMA_COST
7657
                        curr_rdopt_cost += curr_cb_cod_cost;
7658
#else
7659
11.4M
                        curr_rdopt_cost +=
7660
11.4M
                            ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7661
11.4M
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7662
11.4M
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7663
11.4M
#endif
7664
11.4M
                        chrm_cod_cost += curr_cb_cod_cost;
7665
11.4M
                        i8_ssd_cb += trans_ssd_u;
7666
7667
11.4M
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7668
11.4M
                        {
7669
                            /* Early exit : If the current running cost exceeds
7670
                            the prev. best mode cost, break */
7671
11.4M
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7672
563k
                            {
7673
563k
                                u1_is_early_exit_condition_satisfied = 1;
7674
563k
                                break;
7675
563k
                            }
7676
11.4M
                        }
7677
7678
                        /* inter cu is coded if any of the tu is coded in it */
7679
10.8M
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7680
7681
                        /* update CB related params */
7682
10.8M
                        ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
7683
10.8M
                            total_bytes_offset + init_bytes_offset;
7684
7685
10.8M
                        if(0 == i4_subtu_idx)
7686
10.8M
                        {
7687
10.8M
                            ps_tu->s_tu.b1_cb_cbf = cbf;
7688
10.8M
                        }
7689
0
                        else
7690
0
                        {
7691
0
                            ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
7692
0
                        }
7693
7694
10.8M
                        total_bytes_offset += num_bytes;
7695
7696
10.8M
                        ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
7697
10.8M
                        ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
7698
10.8M
                        ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
7699
7700
                        /* recon loop is done for non skip cases */
7701
10.8M
                        if(ps_best_cu_prms->u1_skip_flag == 0)
7702
10.1M
                        {
7703
10.1M
                            WORD32 tu_bits;
7704
7705
10.1M
                            cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
7706
10.1M
                                ps_ctxt,
7707
10.1M
                                pu1_cur_pred,
7708
10.1M
                                pred_strd,
7709
10.1M
                                pu1_cur_src,
7710
10.1M
                                chrm_src_stride,
7711
10.1M
                                pi2_cur_deq_data + trans_size,
7712
10.1M
                                deq_data_strd,
7713
10.1M
                                pu1_cur_recon,
7714
10.1M
                                i4_recon_stride,
7715
10.1M
                                pu1_ecd_data + total_bytes_offset,
7716
10.1M
                                ps_ctxt->au1_cu_csbf,
7717
10.1M
                                ps_ctxt->i4_cu_csbf_strd,
7718
10.1M
                                trans_size,
7719
10.1M
                                scan_idx,
7720
10.1M
                                PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
7721
10.1M
                                &num_bytes,
7722
10.1M
                                &tu_bits,
7723
10.1M
                                &zero_cols,
7724
10.1M
                                &zero_rows,
7725
10.1M
                                &u1_is_recon_available,
7726
10.1M
                                i4_perform_sbh,
7727
10.1M
                                i4_perform_rdoq,
7728
10.1M
                                &trans_ssd_v,
7729
10.1M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7730
10.1M
                                i4_alpha_stim_multiplier,
7731
10.1M
                                u1_is_cu_noisy,
7732
10.1M
#endif
7733
10.1M
                                ps_best_cu_prms->u1_skip_flag,
7734
10.1M
                                u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
7735
10.1M
                                V_PLANE);
7736
7737
10.1M
                            if(u1_compute_spatial_ssd && u1_is_recon_available)
7738
5.53M
                            {
7739
5.53M
                                ps_recon_datastore
7740
5.53M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7741
5.53M
                                                                        [i4_subtu_idx] = 0;
7742
5.53M
                            }
7743
4.59M
                            else
7744
4.59M
                            {
7745
4.59M
                                ps_recon_datastore
7746
4.59M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7747
4.59M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7748
4.59M
                            }
7749
7750
#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
7751
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7752
                            {
7753
#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
7754
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7755
                                    pu1_cur_src,
7756
                                    chrm_src_stride,
7757
                                    pu1_cur_pred,
7758
                                    pred_strd,
7759
                                    trans_ssd_v,
7760
                                    i4_alpha_stim_multiplier,
7761
                                    trans_size,
7762
                                    0,
7763
                                    ps_ctxt->u1_enable_psyRDOPT,
7764
                                    V_PLANE);
7765
#else
7766
                                if(u1_compute_spatial_ssd && u1_is_recon_available)
7767
                                {
7768
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7769
                                        pu1_cur_src,
7770
                                        chrm_src_stride,
7771
                                        pu1_cur_recon,
7772
                                        i4_recon_stride,
7773
                                        trans_ssd_v,
7774
                                        i4_alpha_stim_multiplier,
7775
                                        trans_size,
7776
                                        0,
7777
                                        ps_ctxt->u1_enable_psyRDOPT,
7778
                                        V_PLANE);
7779
                                }
7780
                                else
7781
                                {
7782
                                    trans_ssd_v = ihevce_inject_stim_into_distortion(
7783
                                        pu1_cur_src,
7784
                                        chrm_src_stride,
7785
                                        pu1_cur_pred,
7786
                                        pred_strd,
7787
                                        trans_ssd_v,
7788
                                        i4_alpha_stim_multiplier,
7789
                                        trans_size,
7790
                                        0,
7791
                                        ps_ctxt->u1_enable_psyRDOPT,
7792
                                        V_PLANE);
7793
                                }
7794
#endif
7795
                            }
7796
#endif
7797
7798
10.1M
                            curr_cr_cod_cost =
7799
10.1M
                                trans_ssd_v +
7800
10.1M
                                COMPUTE_RATE_COST_CLIP30(
7801
10.1M
                                    tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
7802
10.1M
                            chrm_tu_bits += tu_bits;
7803
10.1M
                            i4_bits_cr += tu_bits;
7804
7805
                            /* RDOPT copy States :  New updated after curr TU to TU init */
7806
10.1M
                            if(0 != cbf)
7807
1.56M
                            {
7808
1.56M
                                COPY_CABAC_STATES(
7809
1.56M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7810
1.56M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7811
1.56M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7812
1.56M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7813
1.56M
                                    IHEVC_CAB_CTXT_END);
7814
1.56M
                            }
7815
                            /* RDOPT copy States :  Restoring back the Cb init state to Cr */
7816
8.57M
                            else
7817
8.57M
                            {
7818
8.57M
                                COPY_CABAC_STATES(
7819
8.57M
                                    &ps_ctxt->s_rdopt_entropy_ctxt
7820
8.57M
                                         .as_cu_entropy_ctxt[rd_opt_curr_idx]
7821
8.57M
                                         .s_cabac_ctxt.au1_ctxt_models[0],
7822
8.57M
                                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
7823
8.57M
                                    IHEVC_CAB_CTXT_END);
7824
8.57M
                            }
7825
7826
                            /* If Intra and TU=CU/2, need recon for next TUs */
7827
10.1M
                            if(calc_recon)
7828
1.12M
                            {
7829
1.12M
                                ihevce_chroma_it_recon_fxn(
7830
1.12M
                                    ps_ctxt,
7831
1.12M
                                    (pi2_cur_deq_data + trans_size),
7832
1.12M
                                    deq_data_strd,
7833
1.12M
                                    pu1_cur_pred,
7834
1.12M
                                    pred_strd,
7835
1.12M
                                    pu1_cur_recon,
7836
1.12M
                                    i4_recon_stride,
7837
1.12M
                                    (pu1_ecd_data + total_bytes_offset),
7838
1.12M
                                    trans_size,
7839
1.12M
                                    cbf,
7840
1.12M
                                    zero_cols,
7841
1.12M
                                    zero_rows,
7842
1.12M
                                    V_PLANE);
7843
7844
1.12M
                                ps_recon_datastore
7845
1.12M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7846
1.12M
                                                                        [i4_subtu_idx] = 0;
7847
1.12M
                            }
7848
9.01M
                            else
7849
9.01M
                            {
7850
9.01M
                                ps_recon_datastore
7851
9.01M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7852
9.01M
                                                                        [i4_subtu_idx] = UCHAR_MAX;
7853
9.01M
                            }
7854
10.1M
                        }
7855
733k
                        else
7856
733k
                        {
7857
                            /* num bytes is set to 0 */
7858
733k
                            num_bytes = 0;
7859
7860
                            /* cbf is returned as 0 */
7861
733k
                            cbf = 0;
7862
7863
733k
                            curr_cr_cod_cost = trans_ssd_v =
7864
7865
733k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
7866
733k
                                    pu1_cur_pred,
7867
733k
                                    pu1_cur_src,
7868
733k
                                    pred_strd,
7869
733k
                                    chrm_src_stride,
7870
733k
                                    trans_size,
7871
733k
                                    trans_size,
7872
733k
                                    V_PLANE);
7873
7874
733k
                            if(u1_compute_spatial_ssd)
7875
421k
                            {
7876
                                /* buffer copy fromp pred to recon */
7877
421k
                                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
7878
421k
                                    pu1_cur_pred,
7879
421k
                                    pred_strd,
7880
421k
                                    pu1_cur_recon,
7881
421k
                                    i4_recon_stride,
7882
421k
                                    trans_size,
7883
421k
                                    trans_size,
7884
421k
                                    V_PLANE);
7885
7886
421k
                                ps_recon_datastore
7887
421k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
7888
421k
                                                                        [i4_subtu_idx] = 0;
7889
421k
                            }
7890
7891
733k
                            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
7892
0
                            {
7893
0
                                trans_ssd_v = ihevce_inject_stim_into_distortion(
7894
0
                                    pu1_cur_src,
7895
0
                                    chrm_src_stride,
7896
0
                                    pu1_cur_pred,
7897
0
                                    pred_strd,
7898
0
                                    trans_ssd_v,
7899
0
                                    i4_alpha_stim_multiplier,
7900
0
                                    trans_size,
7901
0
                                    0,
7902
0
                                    ps_ctxt->u1_enable_psyRDOPT,
7903
0
                                    V_PLANE);
7904
0
                            }
7905
7906
733k
#if ENABLE_INTER_ZCU_COST
7907
#if !WEIGH_CHROMA_COST
7908
                            /* cbf = 0, accumulate cu not coded cost */
7909
                            ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
7910
#else
7911
                            /* cbf = 0, accumulate cu not coded cost */
7912
7913
733k
                            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
7914
733k
                                (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7915
733k
                                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7916
733k
                                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7917
733k
#endif
7918
733k
#endif
7919
733k
                        }
7920
7921
#if !WEIGH_CHROMA_COST
7922
                        curr_rdopt_cost += curr_cr_cod_cost;
7923
#else
7924
10.8M
                        curr_rdopt_cost +=
7925
10.8M
                            ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
7926
10.8M
                              (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
7927
10.8M
                             CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
7928
10.8M
#endif
7929
7930
10.8M
                        chrm_cod_cost += curr_cr_cod_cost;
7931
10.8M
                        i8_ssd_cr += trans_ssd_v;
7932
7933
10.8M
                        if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
7934
10.8M
                        {
7935
                            /* Early exit : If the current running cost exceeds
7936
                            the prev. best mode cost, break */
7937
10.8M
                            if(curr_rdopt_cost > prev_best_rdopt_cost)
7938
460k
                            {
7939
460k
                                u1_is_early_exit_condition_satisfied = 1;
7940
460k
                                break;
7941
460k
                            }
7942
10.8M
                        }
7943
7944
                        /* inter cu is coded if any of the tu is coded in it */
7945
10.4M
                        ps_best_cu_prms->u1_is_cu_coded |= cbf;
7946
7947
                        /* update CR related params */
7948
10.4M
                        ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
7949
10.4M
                            total_bytes_offset + init_bytes_offset;
7950
7951
10.4M
                        if(0 == i4_subtu_idx)
7952
10.4M
                        {
7953
10.4M
                            ps_tu->s_tu.b1_cr_cbf = cbf;
7954
10.4M
                        }
7955
0
                        else
7956
0
                        {
7957
0
                            ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
7958
0
                        }
7959
7960
10.4M
                        total_bytes_offset += num_bytes;
7961
7962
10.4M
                        ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
7963
10.4M
                        ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
7964
10.4M
                        ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
7965
10.4M
                    }
7966
733k
                    else
7967
733k
                    {
7968
733k
                        ps_recon_datastore
7969
733k
                            ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
7970
733k
                            UCHAR_MAX;
7971
733k
                        ps_recon_datastore
7972
733k
                            ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
7973
733k
                            UCHAR_MAX;
7974
733k
                    }
7975
12.1M
                }
7976
7977
12.1M
                if(u1_is_early_exit_condition_satisfied)
7978
1.02M
                {
7979
1.02M
                    break;
7980
1.02M
                }
7981
7982
                /* loop increments */
7983
11.1M
                ps_tu++;
7984
11.1M
                ps_tu_temp_prms++;
7985
11.1M
            }
7986
7987
            /* Signal as luma mode. HIGH_QUALITY may update it */
7988
7.97M
            ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
7989
7990
            /* modify the cost chrm_cod_cost */
7991
7.97M
            if(ps_ctxt->u1_enable_psyRDOPT)
7992
0
            {
7993
0
                UWORD8 *pu1_recon_cu;
7994
0
                WORD32 recon_stride;
7995
0
                WORD32 curr_pos_x;
7996
0
                WORD32 curr_pos_y;
7997
0
                WORD32 start_index;
7998
0
                WORD32 num_horz_cu_in_ctb;
7999
0
                WORD32 had_block_size;
8000
                /* tODO: sreenivasa ctb size has to be used appropriately */
8001
0
                had_block_size = 8;
8002
0
                num_horz_cu_in_ctb = 2 * 64 / had_block_size;
8003
8004
0
                curr_pos_x = cu_pos_x << 3; /* pel units */
8005
0
                curr_pos_y = cu_pos_y << 3; /* pel units */
8006
0
                recon_stride = i4_recon_stride;
8007
0
                pu1_recon_cu = pu1_recon;
8008
8009
                /* start index to index the source satd of curr cu int he current ctb*/
8010
0
                start_index = 2 * (curr_pos_x / had_block_size) +
8011
0
                              (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
8012
8013
0
                {
8014
0
                    chrm_cod_cost += ihevce_psy_rd_cost_croma(
8015
0
                        ps_ctxt->ai4_source_chroma_satd,
8016
0
                        pu1_recon,
8017
0
                        recon_stride,
8018
0
                        1,  //
8019
0
                        cu_size,
8020
0
                        0,  // pic type
8021
0
                        0,  //layer id
8022
0
                        ps_ctxt->i4_satd_lamda,  // lambda
8023
0
                        start_index,
8024
0
                        ps_ctxt->u1_is_input_data_hbd,  // 8 bit
8025
0
                        ps_ctxt->u1_chroma_array_type,
8026
0
                        &ps_ctxt->s_cmn_opt_func
8027
8028
0
                    );  // chroma subsampling 420
8029
0
                }
8030
0
            }
8031
7.97M
        }
8032
1.73M
        else
8033
1.73M
        {
8034
1.73M
            u1_is_mode_eq_chroma_satd_mode = 1;
8035
1.73M
            chrm_cod_cost = MAX_COST_64;
8036
1.73M
        }
8037
8038
        /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
8039
9.71M
        if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
8040
6.68M
           (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
8041
5.85M
        {
8042
5.85M
            if(64 == cu_size)
8043
119k
            {
8044
119k
                ASSERT(TU_EQ_CU != func_proc_mode);
8045
119k
            }
8046
8047
5.85M
            if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
8048
5.85M
                   .i8_chroma_best_rdopt < chrm_cod_cost)
8049
2.13M
            {
8050
2.13M
                UWORD8 *pu1_src;
8051
2.13M
                UWORD8 *pu1_ecd_data_src_cb;
8052
2.13M
                UWORD8 *pu1_ecd_data_src_cr;
8053
8054
2.13M
                chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
8055
2.13M
                    &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
8056
8057
2.13M
                UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
8058
2.13M
                WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
8059
2.13M
                WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
8060
8061
2.13M
                pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
8062
2.13M
                chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
8063
2.13M
                chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
8064
2.13M
                chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
8065
8066
2.13M
                if(u1_is_mode_eq_chroma_satd_mode)
8067
1.73M
                {
8068
1.73M
                    chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
8069
1.73M
                }
8070
8071
                /*Resetting total_num_bytes_to 0*/
8072
2.13M
                total_bytes_offset = 0;
8073
8074
                /* Update the CABAC state corresponding to chroma only */
8075
                /* Chroma Cbf */
8076
2.13M
                memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
8077
                /* Chroma transform skip */
8078
2.13M
                memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
8079
                /* Chroma last coeff x prefix */
8080
2.13M
                memcpy(
8081
2.13M
                    pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
8082
2.13M
                    pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
8083
2.13M
                    3);
8084
                /* Chroma last coeff y prefix */
8085
2.13M
                memcpy(
8086
2.13M
                    pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
8087
2.13M
                    pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
8088
2.13M
                    3);
8089
                /* Chroma csbf */
8090
2.13M
                memcpy(
8091
2.13M
                    pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8092
2.13M
                    pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
8093
2.13M
                    2);
8094
                /* Chroma sig coeff flags */
8095
2.13M
                memcpy(
8096
2.13M
                    pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
8097
                /* Chroma absgt1 flags */
8098
2.13M
                memcpy(
8099
2.13M
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8100
2.13M
                    pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
8101
2.13M
                    8);
8102
                /* Chroma absgt2 flags */
8103
2.13M
                memcpy(
8104
2.13M
                    pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8105
2.13M
                    pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
8106
2.13M
                    2);
8107
8108
2.13M
                ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
8109
2.13M
                ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8110
8111
                /* update to luma decision as we update chroma in final mode */
8112
2.13M
                ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
8113
8114
5.46M
                for(ctr = 0; ctr < u1_num_tus; ctr++)
8115
3.32M
                {
8116
6.65M
                    for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
8117
3.32M
                    {
8118
3.32M
                        WORD32 cbf;
8119
3.32M
                        WORD32 num_bytes;
8120
8121
3.32M
                        pu1_ecd_data_src_cb =
8122
3.32M
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
8123
3.32M
                        pu1_ecd_data_src_cr =
8124
3.32M
                            &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
8125
8126
                        /* check if chroma present flag is set */
8127
3.32M
                        if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
8128
3.32M
                        {
8129
3.32M
                            UWORD8 *pu1_cur_pred_dest;
8130
3.32M
                            UWORD8 *pu1_cur_pred_src;
8131
3.32M
                            WORD32 pred_src_strd;
8132
3.32M
                            WORD16 *pi2_cur_deq_data_dest;
8133
3.32M
                            WORD16 *pi2_cur_deq_data_src_cb;
8134
3.32M
                            WORD16 *pi2_cur_deq_data_src_cr;
8135
3.32M
                            WORD32 deq_src_strd;
8136
8137
3.32M
                            WORD32 curr_pos_x, curr_pos_y;
8138
8139
3.32M
                            trans_size = ps_tu->s_tu.b3_size;
8140
3.32M
                            trans_size = (1 << (trans_size + 1)); /* in chroma units */
8141
8142
                            /*Deriving stride values*/
8143
3.32M
                            pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
8144
3.32M
                            deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
8145
8146
                            /* since 2x2 transform is not allowed for chroma*/
8147
3.32M
                            if(2 == trans_size)
8148
216k
                            {
8149
216k
                                trans_size = 4;
8150
216k
                            }
8151
8152
                            /* get the current tu posx and posy w.r.t to cu */
8153
3.32M
                            curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
8154
3.32M
                            curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
8155
3.32M
                                         (i4_subtu_idx * trans_size);
8156
8157
                            /* 420sp case only vertical height will be half */
8158
3.32M
                            if(0 == u1_is_422)
8159
3.32M
                            {
8160
3.32M
                                curr_pos_y >>= 1;
8161
3.32M
                            }
8162
8163
                            /* increment the pointers to start of current TU  */
8164
3.32M
                            pu1_cur_pred_src =
8165
3.32M
                                ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
8166
3.32M
                            pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
8167
3.32M
                            pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
8168
3.32M
                            pu1_cur_pred_dest += (curr_pos_y * pred_strd);
8169
8170
3.32M
                            pi2_cur_deq_data_src_cb =
8171
3.32M
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
8172
3.32M
                            pi2_cur_deq_data_src_cr =
8173
3.32M
                                &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
8174
3.32M
                            pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
8175
3.32M
                            pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
8176
3.32M
                            pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
8177
3.32M
                            pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
8178
8179
                            /*Overwriting deq data with that belonging to the winning special mode
8180
                            (luma mode !=  chroma mode)
8181
                            ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
8182
                            correspondingly manipulate to copy WORD16 data*/
8183
8184
3.32M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8185
3.32M
                                (UWORD8 *)pi2_cur_deq_data_dest,
8186
3.32M
                                (deq_data_strd << 1),
8187
3.32M
                                (UWORD8 *)pi2_cur_deq_data_src_cb,
8188
3.32M
                                (deq_src_strd << 1),
8189
3.32M
                                (trans_size << 1),
8190
3.32M
                                trans_size);
8191
8192
3.32M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8193
3.32M
                                (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
8194
3.32M
                                (deq_data_strd << 1),
8195
3.32M
                                (UWORD8 *)pi2_cur_deq_data_src_cr,
8196
3.32M
                                (deq_src_strd << 1),
8197
3.32M
                                (trans_size << 1),
8198
3.32M
                                trans_size);
8199
8200
                            /*Overwriting pred data with that belonging to the winning special mode
8201
                            (luma mode !=  chroma mode)*/
8202
8203
3.32M
                            ps_ctxt->s_cmn_opt_func.pf_copy_2d(
8204
3.32M
                                pu1_cur_pred_dest,
8205
3.32M
                                pred_strd,
8206
3.32M
                                pu1_cur_pred_src,
8207
3.32M
                                pred_src_strd,
8208
3.32M
                                (trans_size << 1),
8209
3.32M
                                trans_size);
8210
8211
3.32M
                            num_bytes = ps_chr_intra_satd_ctxt
8212
3.32M
                                            ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
8213
3.32M
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
8214
                            /* inter cu is coded if any of the tu is coded in it */
8215
3.32M
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8216
8217
                            /* update CB related params */
8218
3.32M
                            ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
8219
3.32M
                                total_bytes_offset + init_bytes_offset;
8220
8221
3.32M
                            if(0 == i4_subtu_idx)
8222
3.32M
                            {
8223
3.32M
                                ps_tu->s_tu.b1_cb_cbf = cbf;
8224
3.32M
                            }
8225
0
                            else
8226
0
                            {
8227
0
                                ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
8228
0
                            }
8229
8230
                            /*Overwriting the cb ecd data corresponding to the special mode*/
8231
3.32M
                            if(0 != num_bytes)
8232
506k
                            {
8233
506k
                                memcpy(
8234
506k
                                    (pu1_ecd_data + total_bytes_offset),
8235
506k
                                    pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
8236
506k
                                    num_bytes);
8237
506k
                            }
8238
8239
3.32M
                            total_bytes_offset += num_bytes;
8240
3.32M
                            ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
8241
3.32M
                            ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
8242
8243
3.32M
                            num_bytes = ps_chr_intra_satd_ctxt
8244
3.32M
                                            ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
8245
3.32M
                            cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
8246
                            /* inter cu is coded if any of the tu is coded in it */
8247
3.32M
                            ps_best_cu_prms->u1_is_cu_coded |= cbf;
8248
8249
                            /*Overwriting the cr ecd data corresponding to the special mode*/
8250
3.32M
                            if(0 != num_bytes)
8251
479k
                            {
8252
479k
                                memcpy(
8253
479k
                                    (pu1_ecd_data + total_bytes_offset),
8254
479k
                                    pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
8255
479k
                                    num_bytes);
8256
479k
                            }
8257
8258
                            /* update CR related params */
8259
3.32M
                            ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
8260
3.32M
                                total_bytes_offset + init_bytes_offset;
8261
8262
3.32M
                            if(0 == i4_subtu_idx)
8263
3.32M
                            {
8264
3.32M
                                ps_tu->s_tu.b1_cr_cbf = cbf;
8265
3.32M
                            }
8266
0
                            else
8267
0
                            {
8268
0
                                ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
8269
0
                            }
8270
8271
3.32M
                            total_bytes_offset += num_bytes;
8272
3.32M
                            ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
8273
8274
                            /*Updating zero rows and zero cols*/
8275
3.32M
                            ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
8276
3.32M
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
8277
3.32M
                            ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
8278
3.32M
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
8279
3.32M
                            ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
8280
3.32M
                                ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
8281
3.32M
                            ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
8282
3.32M
                                ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
8283
8284
3.32M
                            ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
8285
8286
3.32M
                            if((u1_num_tus > 1) &&
8287
1.59M
                               ps_recon_datastore->au1_is_chromaRecon_available[2])
8288
1.59M
                            {
8289
1.59M
                                ps_recon_datastore
8290
1.59M
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8291
1.59M
                                                                        [i4_subtu_idx] = 2;
8292
1.59M
                                ps_recon_datastore
8293
1.59M
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8294
1.59M
                                                                        [i4_subtu_idx] = 2;
8295
1.59M
                            }
8296
1.73M
                            else if(
8297
1.73M
                                (1 == u1_num_tus) &&
8298
1.73M
                                ps_recon_datastore->au1_is_chromaRecon_available[1])
8299
965k
                            {
8300
965k
                                ps_recon_datastore
8301
965k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8302
965k
                                                                        [i4_subtu_idx] = 1;
8303
965k
                                ps_recon_datastore
8304
965k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8305
965k
                                                                        [i4_subtu_idx] = 1;
8306
965k
                            }
8307
771k
                            else
8308
771k
                            {
8309
771k
                                ps_recon_datastore
8310
771k
                                    ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
8311
771k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8312
771k
                                ps_recon_datastore
8313
771k
                                    ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
8314
771k
                                                                        [i4_subtu_idx] = UCHAR_MAX;
8315
771k
                            }
8316
3.32M
                        }
8317
3.32M
                    }
8318
8319
                    /* loop increments */
8320
3.32M
                    ps_tu++;
8321
3.32M
                    ps_tu_temp_prms++;
8322
3.32M
                }
8323
2.13M
            }
8324
8325
5.85M
            if(!u1_is_422)
8326
5.85M
            {
8327
5.85M
                if(chrm_pred_mode == luma_pred_mode)
8328
5.46M
                {
8329
5.46M
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8330
5.46M
                }
8331
396k
                else if(chrm_pred_mode == 0)
8332
72.0k
                {
8333
72.0k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8334
72.0k
                }
8335
324k
                else if(chrm_pred_mode == 1)
8336
96.8k
                {
8337
96.8k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8338
96.8k
                }
8339
227k
                else if(chrm_pred_mode == 10)
8340
167k
                {
8341
167k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8342
167k
                }
8343
60.1k
                else if(chrm_pred_mode == 26)
8344
60.1k
                {
8345
60.1k
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8346
60.1k
                }
8347
0
                else
8348
0
                {
8349
0
                    ASSERT(0); /*Should not come here*/
8350
0
                }
8351
5.85M
            }
8352
0
            else
8353
0
            {
8354
0
                if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
8355
0
                {
8356
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
8357
0
                }
8358
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
8359
0
                {
8360
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
8361
0
                }
8362
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
8363
0
                {
8364
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
8365
0
                }
8366
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
8367
0
                {
8368
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
8369
0
                }
8370
0
                else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
8371
0
                {
8372
0
                    ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
8373
0
                }
8374
0
                else
8375
0
                {
8376
0
                    ASSERT(0); /*Should not come here*/
8377
0
                }
8378
0
            }
8379
5.85M
        }
8380
8381
        /* Store the actual chroma mode */
8382
9.71M
        ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
8383
9.71M
    }
8384
8385
    /* update the total bytes produced */
8386
9.71M
    ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
8387
8388
    /* store the final chrm bits accumulated */
8389
9.71M
    *pi4_chrm_tu_bits = chrm_tu_bits;
8390
8391
9.71M
    return (chrm_cod_cost);
8392
9.71M
}
8393
8394
/*!
8395
******************************************************************************
8396
* \if Function name : ihevce_final_rdopt_mode_prcs \endif
8397
*
8398
* \brief
8399
*    Final RDOPT mode process function. Performs Recon computation for the
8400
*    final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
8401
*
8402
* \param[in] pv_ctxt : pointer to enc_loop module
8403
* \param[in] ps_prms : pointer to struct containing requisite parameters
8404
*
8405
* \return
8406
*    None
8407
*
8408
* \author
8409
*  Ittiam
8410
*
8411
*****************************************************************************
8412
*/
8413
void ihevce_final_rdopt_mode_prcs(
8414
    ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
8415
2.48M
{
8416
2.48M
    enc_loop_cu_final_prms_t *ps_best_cu_prms;
8417
2.48M
    tu_enc_loop_out_t *ps_tu_enc_loop;
8418
2.48M
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
8419
2.48M
    nbr_avail_flags_t s_nbr;
8420
2.48M
    recon_datastore_t *ps_recon_datastore;
8421
8422
2.48M
    ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
8423
2.48M
    ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
8424
2.48M
    ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
8425
8426
2.48M
    WORD32 num_tu_in_cu;
8427
2.48M
    LWORD64 rd_opt_cost;
8428
2.48M
    WORD32 ctr;
8429
2.48M
    WORD32 i4_subtu_idx;
8430
2.48M
    WORD32 cu_size;
8431
2.48M
    WORD32 cu_pos_x, cu_pos_y;
8432
2.48M
    WORD32 chrm_present_flag = 1;
8433
2.48M
    WORD32 num_bytes, total_bytes = 0;
8434
2.48M
    WORD32 chrm_ctr = 0;
8435
2.48M
    WORD32 u1_is_cu_coded;
8436
2.48M
    UWORD8 *pu1_old_ecd_data;
8437
2.48M
    UWORD8 *pu1_chrm_old_ecd_data;
8438
2.48M
    UWORD8 *pu1_cur_pred;
8439
2.48M
    WORD16 *pi2_deq_data;
8440
2.48M
    WORD16 *pi2_chrm_deq_data;
8441
2.48M
    WORD16 *pi2_cur_deq_data;
8442
2.48M
    WORD16 *pi2_cur_deq_data_chrm;
8443
2.48M
    UWORD8 *pu1_cur_luma_recon;
8444
2.48M
    UWORD8 *pu1_cur_chroma_recon;
8445
2.48M
    UWORD8 *pu1_cur_src;
8446
2.48M
    UWORD8 *pu1_cur_src_chrm;
8447
2.48M
    UWORD8 *pu1_cur_pred_chrm;
8448
2.48M
    UWORD8 *pu1_intra_pred_mode;
8449
2.48M
    UWORD32 *pu4_nbr_flags;
8450
2.48M
    LWORD64 i8_ssd;
8451
8452
2.48M
    cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
8453
2.48M
    cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
8454
2.48M
    enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
8455
8456
2.48M
    WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
8457
2.48M
    WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
8458
2.48M
    UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
8459
2.48M
    WORD32 src_strd = ps_prms->src_strd;
8460
2.48M
    UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
8461
2.48M
    WORD32 pred_strd = ps_prms->pred_strd;
8462
2.48M
    UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
8463
2.48M
    WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
8464
2.48M
    UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
8465
2.48M
    UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
8466
2.48M
    WORD32 csbf_strd = ps_prms->csbf_strd;
8467
2.48M
    UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
8468
2.48M
    WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
8469
2.48M
    UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
8470
2.48M
    WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
8471
2.48M
    UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
8472
2.48M
    UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
8473
2.48M
    UWORD8 u1_cu_size = ps_prms->u1_cu_size;
8474
2.48M
    WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
8475
2.48M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
8476
2.48M
    UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
8477
    /* Get the Chroma pointer and parameters */
8478
2.48M
    UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
8479
2.48M
    WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
8480
2.48M
    UWORD8 u1_compute_spatial_ssd_luma = 0;
8481
2.48M
    UWORD8 u1_compute_spatial_ssd_chroma = 0;
8482
    /* Get the pointer for function selector */
8483
2.48M
    ihevc_intra_pred_luma_ref_substitution_fptr =
8484
2.48M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
8485
8486
2.48M
    ihevc_intra_pred_ref_filtering_fptr =
8487
2.48M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
8488
8489
2.48M
    ihevc_intra_pred_chroma_ref_substitution_fptr =
8490
2.48M
        ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
8491
8492
    /* Get the best CU parameters */
8493
2.48M
    ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
8494
2.48M
    num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
8495
2.48M
    cu_size = ps_best_cu_prms->u1_cu_size;
8496
2.48M
    cu_pos_x = u1_cu_pos_x;
8497
2.48M
    cu_pos_y = u1_cu_pos_y;
8498
2.48M
    pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
8499
2.48M
    pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
8500
2.48M
    ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
8501
8502
    /* get the first TU pointer */
8503
2.48M
    ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8504
    /* get the first TU only enc_loop prms pointer */
8505
2.48M
    ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8506
    /*modify quant related param in ctxt based on current cu qp*/
8507
2.48M
    if((ps_ctxt->i1_cu_qp_delta_enable))
8508
1.02M
    {
8509
        /*recompute quant related param at every cu level*/
8510
1.02M
        ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
8511
8512
        /* get frame level lambda params */
8513
1.02M
        ihevce_get_cl_cu_lambda_prms(
8514
1.02M
            ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
8515
1.02M
    }
8516
8517
2.48M
    ps_best_cu_prms->i8_cu_ssd = 0;
8518
2.48M
    ps_best_cu_prms->u4_cu_open_intra_sad = 0;
8519
8520
    /* For skip case : Set TU_size = CU_size and make cbf = 0
8521
    so that same TU loop can be used for all modes */
8522
2.48M
    if(PRED_MODE_SKIP == packed_pred_mode)
8523
243k
    {
8524
489k
        for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8525
245k
        {
8526
245k
            ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
8527
8528
245k
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
8529
8530
245k
            ps_tu_enc_loop++;
8531
245k
            ps_tu_enc_loop_temp_prms++;
8532
245k
        }
8533
8534
        /* go back to the first TU pointer */
8535
243k
        ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
8536
243k
        ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
8537
243k
    }
8538
    /**   For inter case, pred calculation is outside the loop     **/
8539
2.48M
    if(PRED_MODE_INTRA != packed_pred_mode)
8540
1.08M
    {
8541
        /**------------- Compute pred data if required --------------**/
8542
1.08M
        if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8543
0
        {
8544
0
            nbr_4x4_t *ps_topleft_nbr_4x4;
8545
0
            nbr_4x4_t *ps_left_nbr_4x4;
8546
0
            nbr_4x4_t *ps_top_nbr_4x4;
8547
0
            WORD32 nbr_4x4_left_strd;
8548
8549
0
            ps_best_inter_cand->pu1_pred_data = pu1_pred;
8550
0
            ps_best_inter_cand->i4_pred_data_stride = pred_strd;
8551
8552
            /* Get the CU nbr information */
8553
0
            ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
8554
0
            ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
8555
0
            ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
8556
0
            nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
8557
8558
            /* MVP ,MVD calc and Motion compensation */
8559
0
            rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
8560
0
                ps_ctxt,
8561
0
                ps_best_inter_cand,
8562
0
                u1_cu_size,
8563
0
                cu_pos_x,
8564
0
                cu_pos_y,
8565
0
                ps_left_nbr_4x4,
8566
0
                ps_top_nbr_4x4,
8567
0
                ps_topleft_nbr_4x4,
8568
0
                nbr_4x4_left_strd,
8569
0
                rd_opt_best_idx);
8570
0
        }
8571
8572
        /** ------ Motion Compensation for Chroma -------- **/
8573
1.08M
        if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
8574
243k
        {
8575
243k
            UWORD8 *pu1_cur_pred;
8576
243k
            pu1_cur_pred = pu1_pred_chrm;
8577
8578
            /* run a loop over all the partitons in cu */
8579
489k
            for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
8580
246k
            {
8581
246k
                pu_t *ps_pu;
8582
246k
                WORD32 inter_pu_wd, inter_pu_ht;
8583
8584
246k
                ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
8585
8586
                /* IF AMP then each partitions can have diff wd ht */
8587
246k
                inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
8588
246k
                inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
8589
246k
                inter_pu_ht <<= u1_is_422;
8590
                /* chroma mc func */
8591
246k
                ihevce_chroma_inter_pred_pu(
8592
246k
                    &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
8593
246k
                if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
8594
5.85k
                {
8595
                    /* 2Nx__ partion case */
8596
5.85k
                    if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
8597
3.26k
                    {
8598
3.26k
                        pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
8599
3.26k
                    }
8600
                    /* __x2N partion case */
8601
5.85k
                    if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
8602
2.59k
                    {
8603
2.59k
                        pu1_cur_pred += inter_pu_wd;
8604
2.59k
                    }
8605
5.85k
                }
8606
246k
            }
8607
243k
        }
8608
1.08M
    }
8609
2.48M
    pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
8610
2.48M
    pi2_chrm_deq_data =
8611
2.48M
        &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
8612
2.48M
    pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
8613
2.48M
    pu1_chrm_old_ecd_data =
8614
2.48M
        &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
8615
8616
    /* default value for cu coded flag */
8617
2.48M
    u1_is_cu_coded = 0;
8618
8619
    /* If we are re-computing coeff, set sad to 0 and start accumulating */
8620
    /* else use the best cand. sad from RDOPT stage                    */
8621
2.48M
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8622
0
    {
8623
        /*init of ssd of CU accuumulated over all TU*/
8624
0
        ps_best_cu_prms->u4_cu_sad = 0;
8625
8626
        /* reset the luma residual bits */
8627
0
        ps_best_cu_prms->u4_cu_luma_res_bits = 0;
8628
0
    }
8629
8630
2.48M
    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
8631
590k
    {
8632
        /* reset the chroma residual bits */
8633
590k
        ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
8634
590k
    }
8635
8636
2.48M
    if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
8637
2.48M
       (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
8638
590k
    {
8639
        /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
8640
        the quantized coefficients might be changed.
8641
        We are copying only those states which correspond to the header from the cabac state
8642
        of the previous CU, because the header is going to be recomputed for this condition*/
8643
590k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
8644
590k
        memcpy(
8645
590k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
8646
590k
            &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
8647
590k
            IHEVC_CAB_COEFFX_PREFIX);
8648
8649
590k
        if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
8650
0
        {
8651
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8652
0
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8653
0
                (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
8654
0
                 IHEVC_CAB_COEFFX_PREFIX),
8655
0
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8656
0
        }
8657
590k
        else
8658
590k
        {
8659
590k
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8660
590k
                (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
8661
590k
                (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8662
590k
                      .s_cabac_ctxt.au1_ctxt_models[0] +
8663
590k
                 IHEVC_CAB_COEFFX_PREFIX),
8664
590k
                (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
8665
590k
        }
8666
590k
        ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
8667
590k
    }
8668
1.89M
    else
8669
1.89M
    {
8670
1.89M
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
8671
1.89M
    }
8672
8673
    /* Zero cbf tool is disabled for intra CUs */
8674
2.48M
    if(PRED_MODE_INTRA == packed_pred_mode)
8675
1.40M
    {
8676
#if ENABLE_ZERO_CBF_IN_INTRA
8677
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8678
#else
8679
1.40M
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8680
1.40M
#endif
8681
1.40M
    }
8682
1.08M
    else
8683
1.08M
    {
8684
#if DISABLE_ZERO_ZBF_IN_INTER
8685
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
8686
#else
8687
1.08M
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
8688
1.08M
#endif
8689
1.08M
    }
8690
8691
    /** Loop for all tu blocks in current cu and do reconstruction **/
8692
6.99M
    for(ctr = 0; ctr < num_tu_in_cu; ctr++)
8693
4.51M
    {
8694
4.51M
        tu_t *ps_tu;
8695
4.51M
        WORD32 trans_size, num_4x4_in_tu;
8696
4.51M
        WORD32 cbf, zero_rows, zero_cols;
8697
4.51M
        WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
8698
4.51M
        WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
8699
4.51M
        WORD32 luma_pred_mode, chroma_pred_mode = 0;
8700
4.51M
        UWORD8 au1_is_recon_available[2];
8701
8702
4.51M
        ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
8703
8704
4.51M
        u1_compute_spatial_ssd_luma = 0;
8705
4.51M
        u1_compute_spatial_ssd_chroma = 0;
8706
8707
4.51M
        trans_size = 1 << (ps_tu->b3_size + 2);
8708
4.51M
        num_4x4_in_tu = (trans_size >> 2);
8709
4.51M
        cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
8710
4.51M
        cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
8711
8712
        /* populate the coeffs scan idx */
8713
4.51M
        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
8714
8715
        /* get the current pos x and pos y in pixels */
8716
4.51M
        cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
8717
4.51M
        cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
8718
8719
        /* Update pointers based on the location */
8720
4.51M
        pu1_cur_src = pu1_src + cu_pos_x_in_pix;
8721
4.51M
        pu1_cur_src += (cu_pos_y_in_pix * src_strd);
8722
4.51M
        pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
8723
4.51M
        pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
8724
8725
4.51M
        pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
8726
4.51M
        pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
8727
8728
4.51M
        pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
8729
4.51M
        pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
8730
8731
4.51M
        pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
8732
4.51M
        pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
8733
4.51M
                            (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
8734
8735
4.51M
        pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
8736
4.51M
        pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
8737
4.51M
                             (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
8738
8739
4.51M
        pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
8740
4.51M
        pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
8741
4.51M
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
8742
8743
4.51M
        pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
8744
4.51M
        pi2_cur_deq_data_chrm +=
8745
4.51M
            ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
8746
8747
        /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
8748
4.51M
        chrm_present_flag = 1; /* by default chroma present is set to 1*/
8749
8750
4.51M
        if(4 == trans_size)
8751
1.64M
        {
8752
            /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
8753
1.64M
            if(0 != chrm_ctr)
8754
1.23M
            {
8755
1.23M
                chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
8756
1.23M
            }
8757
8758
            /* increment the chrm ctr unconditionally */
8759
1.64M
            chrm_ctr++;
8760
            /* after ctr reached 4 reset it */
8761
1.64M
            if(4 == chrm_ctr)
8762
412k
            {
8763
412k
                chrm_ctr = 0;
8764
412k
            }
8765
1.64M
        }
8766
8767
        /**------------- Compute pred data if required --------------**/
8768
4.51M
        if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
8769
2.59M
        {
8770
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8771
2.59M
            luma_pred_mode = *pu1_intra_pred_mode;
8772
8773
2.59M
            if((ps_ctxt->i4_rc_pass == 1) ||
8774
2.59M
               (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
8775
0
            {
8776
0
                WORD32 nbr_flags;
8777
0
                WORD32 luma_pred_func_idx;
8778
0
                UWORD8 *pu1_left;
8779
0
                UWORD8 *pu1_top;
8780
0
                UWORD8 *pu1_top_left;
8781
0
                WORD32 left_strd;
8782
8783
                /* left cu boundary */
8784
0
                if(0 == cu_pos_x_in_pix)
8785
0
                {
8786
0
                    left_strd = ps_cu_nbr_prms->cu_left_stride;
8787
0
                    pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
8788
0
                }
8789
0
                else
8790
0
                {
8791
0
                    pu1_left = pu1_cur_luma_recon - 1;
8792
0
                    left_strd = recon_luma_strd;
8793
0
                }
8794
8795
                /* top cu boundary */
8796
0
                if(0 == cu_pos_y_in_pix)
8797
0
                {
8798
0
                    pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
8799
0
                }
8800
0
                else
8801
0
                {
8802
0
                    pu1_top = pu1_cur_luma_recon - recon_luma_strd;
8803
0
                }
8804
8805
                /* by default top left is set to cu top left */
8806
0
                pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
8807
8808
                /* top left based on position */
8809
0
                if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
8810
0
                {
8811
0
                    pu1_top_left = pu1_left - left_strd;
8812
0
                }
8813
0
                else if(0 != cu_pos_x_in_pix)
8814
0
                {
8815
0
                    pu1_top_left = pu1_top - 1;
8816
0
                }
8817
8818
                /* get the neighbour availability flags */
8819
0
                nbr_flags = ihevce_get_nbr_intra(
8820
0
                    &s_nbr,
8821
0
                    ps_ctxt->pu1_ctb_nbr_map,
8822
0
                    ps_ctxt->i4_nbr_map_strd,
8823
0
                    cu_pos_x_in_4x4,
8824
0
                    cu_pos_y_in_4x4,
8825
0
                    num_4x4_in_tu);
8826
8827
0
                if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
8828
0
                {
8829
                    /* copy the nbr flags for chroma reuse */
8830
0
                    if(4 != trans_size)
8831
0
                    {
8832
0
                        *pu4_nbr_flags = nbr_flags;
8833
0
                    }
8834
0
                    else if(1 == chrm_present_flag)
8835
0
                    {
8836
                        /* compute the avail flags assuming luma trans is 8x8 */
8837
                        /* get the neighbour availability flags */
8838
0
                        *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8839
0
                            ps_ctxt->pu1_ctb_nbr_map,
8840
0
                            ps_ctxt->i4_nbr_map_strd,
8841
0
                            cu_pos_x_in_4x4,
8842
0
                            cu_pos_y_in_4x4,
8843
0
                            (num_4x4_in_tu << 1),
8844
0
                            (num_4x4_in_tu << 1));
8845
0
                    }
8846
8847
                    /* call reference array substitution */
8848
0
                    ihevc_intra_pred_luma_ref_substitution_fptr(
8849
0
                        pu1_top_left,
8850
0
                        pu1_top,
8851
0
                        pu1_left,
8852
0
                        left_strd,
8853
0
                        trans_size,
8854
0
                        nbr_flags,
8855
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8856
0
                        1);
8857
8858
                    /* call reference filtering */
8859
0
                    ihevc_intra_pred_ref_filtering_fptr(
8860
0
                        (UWORD8 *)ps_ctxt->pv_ref_sub_out,
8861
0
                        trans_size,
8862
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8863
0
                        luma_pred_mode,
8864
0
                        ps_ctxt->i1_strong_intra_smoothing_enable_flag);
8865
8866
                    /* use the look up to get the function idx */
8867
0
                    luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
8868
8869
                    /* call the intra prediction function */
8870
0
                    ps_ctxt->apf_lum_ip[luma_pred_func_idx](
8871
0
                        (UWORD8 *)ps_ctxt->pv_ref_filt_out,
8872
0
                        1,
8873
0
                        pu1_cur_pred,
8874
0
                        pred_strd,
8875
0
                        trans_size,
8876
0
                        luma_pred_mode);
8877
0
                }
8878
0
            }
8879
2.59M
            else if(
8880
2.59M
                (1 == chrm_present_flag) &&
8881
1.73M
                (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
8882
439k
            {
8883
439k
                WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
8884
8885
439k
                if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
8886
80.6k
                {
8887
80.6k
                    temp_num_4x4_in_tu = num_4x4_in_tu << 1;
8888
80.6k
                }
8889
8890
439k
                *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
8891
439k
                    ps_ctxt->pu1_ctb_nbr_map,
8892
439k
                    ps_ctxt->i4_nbr_map_strd,
8893
439k
                    cu_pos_x_in_4x4,
8894
439k
                    cu_pos_y_in_4x4,
8895
439k
                    temp_num_4x4_in_tu,
8896
439k
                    temp_num_4x4_in_tu);
8897
439k
            }
8898
8899
            /* Get the pred mode for scan idx calculation, even if pred is not required */
8900
2.59M
            chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
8901
2.59M
        }
8902
8903
4.51M
        if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
8904
0
        {
8905
0
            WORD32 temp_bits;
8906
0
            LWORD64 temp_cost;
8907
0
            UWORD32 u4_tu_sad;
8908
0
            WORD32 perform_sbh, perform_rdoq;
8909
8910
0
            if(PRED_MODE_INTRA == packed_pred_mode)
8911
0
            {
8912
                /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
8913
0
                if(trans_size < 16)
8914
0
                {
8915
                    /* for modes from 22 upto 30 horizontal scan is used */
8916
0
                    if((luma_pred_mode > 21) && (luma_pred_mode < 31))
8917
0
                    {
8918
0
                        ps_ctxt->i4_scan_idx = SCAN_HORZ;
8919
0
                    }
8920
                    /* for modes from 6 upto 14 horizontal scan is used */
8921
0
                    else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
8922
0
                    {
8923
0
                        ps_ctxt->i4_scan_idx = SCAN_VERT;
8924
0
                    }
8925
0
                }
8926
0
            }
8927
8928
            /* RDOPT copy States :  TU init (best until prev TU) to current */
8929
0
            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
8930
0
                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
8931
0
                        .s_cabac_ctxt.au1_ctxt_models[0] +
8932
0
                    IHEVC_CAB_COEFFX_PREFIX,
8933
0
                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
8934
0
                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
8935
8936
0
            if(ps_prms->u1_recompute_sbh_and_rdoq)
8937
0
            {
8938
0
                perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
8939
0
                perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
8940
0
            }
8941
0
            else
8942
0
            {
8943
                /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
8944
0
                perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
8945
                /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
8946
                we would have to do RDOQ again.*/
8947
0
                perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
8948
0
            }
8949
8950
#if DISABLE_RDOQ_INTRA
8951
            if(PRED_MODE_INTRA == packed_pred_mode)
8952
            {
8953
                perform_rdoq = 0;
8954
            }
8955
#endif
8956
            /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
8957
            so that all candidates and best candidate are quantized with same rounding factor  */
8958
0
            if(1 == perform_rdoq)
8959
0
            {
8960
0
                ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
8961
0
            }
8962
8963
0
            cbf = ihevce_t_q_iq_ssd_scan_fxn(
8964
0
                ps_ctxt,
8965
0
                pu1_cur_pred,
8966
0
                pred_strd,
8967
0
                pu1_cur_src,
8968
0
                src_strd,
8969
0
                pi2_cur_deq_data,
8970
0
                cu_size, /*deq_data stride is cu_size*/
8971
0
                pu1_cur_luma_recon,
8972
0
                recon_luma_strd,
8973
0
                pu1_final_ecd_data,
8974
0
                pu1_csbf_buf,
8975
0
                csbf_strd,
8976
0
                trans_size,
8977
0
                packed_pred_mode,
8978
0
                &temp_cost,
8979
0
                &num_bytes,
8980
0
                &temp_bits,
8981
0
                &u4_tu_sad,
8982
0
                &zero_cols,
8983
0
                &zero_rows,
8984
0
                &au1_is_recon_available[0],
8985
0
                perform_rdoq,  //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
8986
0
                perform_sbh,
8987
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
8988
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
8989
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
8990
0
                                          (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
8991
0
                                             100.0,
8992
0
                ps_prms->u1_is_cu_noisy,
8993
0
#endif
8994
0
                u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
8995
0
                1 /*early cbf*/
8996
0
            );  //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
8997
8998
            /* Accumulate luma residual bits */
8999
0
            ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
9000
9001
            /* RDOPT copy States :  New updated after curr TU to TU init */
9002
0
            if(0 != cbf)
9003
0
            {
9004
                /* update to new state only if CBF is non zero */
9005
0
                COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9006
0
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9007
0
                    &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9008
0
                            .s_cabac_ctxt.au1_ctxt_models[0] +
9009
0
                        IHEVC_CAB_COEFFX_PREFIX,
9010
0
                    IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9011
0
            }
9012
9013
            /* accumulate the TU sad into cu sad */
9014
0
            ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
9015
0
            ps_tu->b1_y_cbf = cbf;
9016
0
            ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
9017
9018
            /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
9019
0
            if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
9020
0
            {
9021
0
                WORD32 num_4x4_in_cu = u1_cu_size >> 2;
9022
0
                nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
9023
0
                ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
9024
0
                ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
9025
                /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
9026
0
                ps_cur_nbr_4x4->b1_y_cbf = cbf;
9027
                /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
9028
0
                ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
9029
                /* Qp and cbf are stored for the all 4x4 in TU */
9030
0
                {
9031
0
                    WORD32 i, j;
9032
0
                    nbr_4x4_t *ps_tmp_4x4;
9033
0
                    ps_tmp_4x4 = ps_cur_nbr_4x4;
9034
9035
0
                    for(i = 0; i < num_4x4_in_tu; i++)
9036
0
                    {
9037
0
                        for(j = 0; j < num_4x4_in_tu; j++)
9038
0
                        {
9039
0
                            ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
9040
0
                            ps_tmp_4x4[j].b1_y_cbf = cbf;
9041
0
                        }
9042
                        /* row level update*/
9043
0
                        ps_tmp_4x4 += num_4x4_in_cu;
9044
0
                    }
9045
0
                }
9046
0
            }
9047
0
        }
9048
4.51M
        else
9049
4.51M
        {
9050
4.51M
            zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
9051
4.51M
            zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
9052
9053
4.51M
            if(ps_prms->u1_will_cabac_state_change)
9054
4.51M
            {
9055
4.51M
                num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
9056
4.51M
            }
9057
0
            else
9058
0
            {
9059
0
                num_bytes = 0;
9060
0
            }
9061
9062
            /* copy luma ecd data to final buffer */
9063
4.51M
            memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
9064
9065
4.51M
            pu1_old_ecd_data += num_bytes;
9066
9067
4.51M
            au1_is_recon_available[0] = 0;
9068
4.51M
        }
9069
9070
        /**-------- Compute Recon data (Do IT & Recon) : Luma  -----------**/
9071
4.51M
        if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9072
4.49M
           (!u1_compute_spatial_ssd_luma ||
9073
0
            (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
9074
4.49M
        {
9075
4.49M
            if(!ps_recon_datastore->u1_is_lumaRecon_available ||
9076
2.75M
               (ps_recon_datastore->u1_is_lumaRecon_available &&
9077
2.75M
                (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
9078
2.04M
            {
9079
2.04M
                ihevce_it_recon_fxn(
9080
2.04M
                    ps_ctxt,
9081
2.04M
                    pi2_cur_deq_data,
9082
2.04M
                    cu_size,
9083
2.04M
                    pu1_cur_pred,
9084
2.04M
                    pred_strd,
9085
2.04M
                    pu1_cur_luma_recon,
9086
2.04M
                    recon_luma_strd,
9087
2.04M
                    pu1_final_ecd_data,
9088
2.04M
                    trans_size,
9089
2.04M
                    packed_pred_mode,
9090
2.04M
                    ps_tu->b1_y_cbf,
9091
2.04M
                    zero_cols,
9092
2.04M
                    zero_rows);
9093
2.04M
            }
9094
2.44M
            else if(
9095
2.44M
                ps_recon_datastore->u1_is_lumaRecon_available &&
9096
2.44M
                (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
9097
2.44M
            {
9098
2.44M
                UWORD8 *pu1_recon_src =
9099
2.44M
                    ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
9100
2.44M
                         [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
9101
2.44M
                    cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
9102
9103
2.44M
                ps_ctxt->s_cmn_opt_func.pf_copy_2d(
9104
2.44M
                    pu1_cur_luma_recon,
9105
2.44M
                    recon_luma_strd,
9106
2.44M
                    pu1_recon_src,
9107
2.44M
                    ps_recon_datastore->i4_lumaRecon_stride,
9108
2.44M
                    trans_size,
9109
2.44M
                    trans_size);
9110
2.44M
            }
9111
4.49M
        }
9112
9113
4.51M
        if(ps_prms->u1_will_cabac_state_change)
9114
4.51M
        {
9115
4.51M
            ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
9116
4.51M
        }
9117
9118
4.51M
        pu1_final_ecd_data += num_bytes;
9119
        /* update total bytes consumed */
9120
4.51M
        total_bytes += num_bytes;
9121
9122
4.51M
        u1_is_cu_coded |= ps_tu->b1_y_cbf;
9123
9124
        /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
9125
4.51M
        if(1 == chrm_present_flag)
9126
3.28M
        {
9127
3.28M
            pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
9128
3.28M
            pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
9129
3.28M
                                (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
9130
9131
3.28M
            pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
9132
3.28M
            pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
9133
3.28M
                                 (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
9134
9135
3.28M
            pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
9136
3.28M
            pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
9137
3.28M
                                    (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
9138
9139
3.28M
            pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
9140
3.28M
            pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
9141
3.28M
                                     (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
9142
9143
3.28M
            if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
9144
0
               (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
9145
0
               (PRED_MODE_INTRA != packed_pred_mode))
9146
0
            {
9147
0
                WORD32 i4_num_bytes;
9148
0
                UWORD8 *pu1_chroma_pred;
9149
0
                UWORD8 *pu1_chroma_recon;
9150
0
                WORD16 *pi2_chroma_deq;
9151
0
                UWORD32 u4_zero_col;
9152
0
                UWORD32 u4_zero_row;
9153
9154
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9155
0
                {
9156
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9157
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9158
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9159
9160
0
                    if(0 == u1_is_422)
9161
0
                    {
9162
0
                        i4_subtu_pos_y >>= 1;
9163
0
                    }
9164
9165
0
                    pu1_chroma_pred =
9166
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9167
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9168
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9169
0
                    pi2_chroma_deq =
9170
0
                        pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
9171
9172
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9173
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9174
9175
0
                    if(ps_prms->u1_will_cabac_state_change)
9176
0
                    {
9177
0
                        i4_num_bytes =
9178
0
                            ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9179
0
                    }
9180
0
                    else
9181
0
                    {
9182
0
                        i4_num_bytes = 0;
9183
0
                    }
9184
9185
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9186
9187
0
                    pu1_old_ecd_data += i4_num_bytes;
9188
9189
0
                    au1_is_recon_available[U_PLANE] = 0;
9190
9191
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9192
0
                       (!u1_compute_spatial_ssd_chroma ||
9193
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9194
0
                    {
9195
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9196
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9197
0
                            (UCHAR_MAX ==
9198
0
                             ps_recon_datastore
9199
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9200
0
                        {
9201
0
                            ihevce_chroma_it_recon_fxn(
9202
0
                                ps_ctxt,
9203
0
                                pi2_chroma_deq,
9204
0
                                cu_size,
9205
0
                                pu1_chroma_pred,
9206
0
                                pred_chrm_strd,
9207
0
                                pu1_chroma_recon,
9208
0
                                recon_chrma_strd,
9209
0
                                pu1_final_ecd_data,
9210
0
                                chroma_trans_size,
9211
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9212
0
                                u4_zero_col,
9213
0
                                u4_zero_row,
9214
0
                                U_PLANE);
9215
0
                        }
9216
0
                        else if(
9217
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9218
0
                            (UCHAR_MAX !=
9219
0
                             ps_recon_datastore
9220
0
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9221
0
                        {
9222
0
                            UWORD8 *pu1_recon_src =
9223
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9224
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9225
0
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9226
0
                                i4_subtu_pos_x +
9227
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9228
9229
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9230
0
                                pu1_recon_src,
9231
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9232
0
                                pu1_chroma_recon,
9233
0
                                recon_chrma_strd,
9234
0
                                chroma_trans_size,
9235
0
                                chroma_trans_size,
9236
0
                                U_PLANE);
9237
0
                        }
9238
0
                    }
9239
9240
0
                    u1_is_cu_coded |=
9241
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9242
9243
0
                    pu1_final_ecd_data += i4_num_bytes;
9244
0
                    total_bytes += i4_num_bytes;
9245
0
                }
9246
9247
0
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9248
0
                {
9249
0
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9250
0
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9251
0
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9252
9253
0
                    if(0 == u1_is_422)
9254
0
                    {
9255
0
                        i4_subtu_pos_y >>= 1;
9256
0
                    }
9257
9258
0
                    pu1_chroma_pred =
9259
0
                        pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9260
0
                    pu1_chroma_recon = pu1_cur_chroma_recon +
9261
0
                                       (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9262
0
                    pi2_chroma_deq = pi2_cur_deq_data_chrm +
9263
0
                                     (i4_subtu_idx * chroma_trans_size * cu_size) +
9264
0
                                     chroma_trans_size;
9265
9266
0
                    u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9267
0
                    u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9268
9269
0
                    if(ps_prms->u1_will_cabac_state_change)
9270
0
                    {
9271
0
                        i4_num_bytes =
9272
0
                            ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9273
0
                    }
9274
0
                    else
9275
0
                    {
9276
0
                        i4_num_bytes = 0;
9277
0
                    }
9278
9279
0
                    memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
9280
9281
0
                    pu1_old_ecd_data += i4_num_bytes;
9282
9283
0
                    au1_is_recon_available[V_PLANE] = 0;
9284
9285
0
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9286
0
                       (!u1_compute_spatial_ssd_chroma ||
9287
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9288
0
                    {
9289
0
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9290
0
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9291
0
                            (UCHAR_MAX ==
9292
0
                             ps_recon_datastore
9293
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9294
0
                        {
9295
0
                            ihevce_chroma_it_recon_fxn(
9296
0
                                ps_ctxt,
9297
0
                                pi2_chroma_deq,
9298
0
                                cu_size,
9299
0
                                pu1_chroma_pred,
9300
0
                                pred_chrm_strd,
9301
0
                                pu1_chroma_recon,
9302
0
                                recon_chrma_strd,
9303
0
                                pu1_final_ecd_data,
9304
0
                                chroma_trans_size,
9305
0
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9306
0
                                u4_zero_col,
9307
0
                                u4_zero_row,
9308
0
                                V_PLANE);
9309
0
                        }
9310
0
                        else if(
9311
0
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9312
0
                            (UCHAR_MAX !=
9313
0
                             ps_recon_datastore
9314
0
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9315
0
                        {
9316
0
                            UWORD8 *pu1_recon_src =
9317
0
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9318
0
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9319
0
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9320
0
                                i4_subtu_pos_x +
9321
0
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9322
9323
0
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9324
0
                                pu1_recon_src,
9325
0
                                ps_recon_datastore->i4_lumaRecon_stride,
9326
0
                                pu1_chroma_recon,
9327
0
                                recon_chrma_strd,
9328
0
                                chroma_trans_size,
9329
0
                                chroma_trans_size,
9330
0
                                V_PLANE);
9331
0
                        }
9332
0
                    }
9333
9334
0
                    u1_is_cu_coded |=
9335
0
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9336
9337
0
                    pu1_final_ecd_data += i4_num_bytes;
9338
0
                    total_bytes += i4_num_bytes;
9339
0
                }
9340
0
            }
9341
3.28M
            else
9342
3.28M
            {
9343
3.28M
                WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
9344
9345
6.56M
                for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
9346
3.28M
                {
9347
3.28M
                    WORD32 cb_cbf, cr_cbf;
9348
3.28M
                    WORD32 cb_num_bytes, cr_num_bytes;
9349
9350
3.28M
                    WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
9351
9352
3.28M
                    WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
9353
3.28M
                    WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
9354
9355
3.28M
                    if(0 == u1_is_422)
9356
3.28M
                    {
9357
3.28M
                        i4_subtu_pos_y >>= 1;
9358
3.28M
                    }
9359
9360
3.28M
                    pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
9361
3.28M
                    pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
9362
3.28M
                    pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
9363
3.28M
                    pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
9364
9365
3.28M
                    if((PRED_MODE_INTRA == packed_pred_mode) &&
9366
1.73M
                       (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
9367
439k
                    {
9368
439k
                        WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
9369
439k
                        UWORD8 *pu1_left_chrm;
9370
439k
                        UWORD8 *pu1_top_chrm;
9371
439k
                        UWORD8 *pu1_top_left_chrm;
9372
9373
439k
                        nbr_flags = ihevce_get_intra_chroma_tu_nbr(
9374
439k
                            *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
9375
9376
                        /* left cu boundary */
9377
439k
                        if(0 == i4_subtu_pos_x)
9378
377k
                        {
9379
377k
                            left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
9380
377k
                            pu1_left_chrm =
9381
377k
                                ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
9382
377k
                        }
9383
62.0k
                        else
9384
62.0k
                        {
9385
62.0k
                            pu1_left_chrm = pu1_cur_chroma_recon - 2;
9386
62.0k
                            left_strd_chrm = recon_chrma_strd;
9387
62.0k
                        }
9388
9389
                        /* top cu boundary */
9390
439k
                        if(0 == i4_subtu_pos_y)
9391
377k
                        {
9392
377k
                            pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
9393
377k
                        }
9394
62.0k
                        else
9395
62.0k
                        {
9396
62.0k
                            pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
9397
62.0k
                        }
9398
9399
                        /* by default top left is set to cu top left */
9400
439k
                        pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
9401
9402
                        /* top left based on position */
9403
439k
                        if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
9404
31.0k
                        {
9405
31.0k
                            pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
9406
31.0k
                        }
9407
408k
                        else if(0 != i4_subtu_pos_x)
9408
62.0k
                        {
9409
62.0k
                            pu1_top_left_chrm = pu1_top_chrm - 2;
9410
62.0k
                        }
9411
9412
                        /* call the chroma reference array substitution */
9413
439k
                        ihevc_intra_pred_chroma_ref_substitution_fptr(
9414
439k
                            pu1_top_left_chrm,
9415
439k
                            pu1_top_chrm,
9416
439k
                            pu1_left_chrm,
9417
439k
                            left_strd_chrm,
9418
439k
                            chroma_trans_size,
9419
439k
                            nbr_flags,
9420
439k
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9421
439k
                            1);
9422
9423
                        /* use the look up to get the function idx */
9424
439k
                        chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
9425
9426
                        /* call the intra prediction function */
9427
439k
                        ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
9428
439k
                            (UWORD8 *)ps_ctxt->pv_ref_sub_out,
9429
439k
                            1,
9430
439k
                            pu1_cur_pred_chrm,
9431
439k
                            pred_chrm_strd,
9432
439k
                            chroma_trans_size,
9433
439k
                            chroma_pred_mode);
9434
439k
                    }
9435
9436
                    /**---------- Compute iq&coeff data if required : Chroma ------------**/
9437
3.28M
                    if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
9438
865k
                    {
9439
865k
                        WORD32 perform_sbh, perform_rdoq, temp_bits;
9440
9441
865k
                        if(ps_prms->u1_recompute_sbh_and_rdoq)
9442
0
                        {
9443
0
                            perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
9444
0
                            perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
9445
0
                        }
9446
865k
                        else
9447
865k
                        {
9448
                            /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
9449
865k
                            perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
9450
                            /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
9451
                        we would have to do RDOQ again.*/
9452
865k
                            perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
9453
865k
                        }
9454
9455
                        /* populate the coeffs scan idx */
9456
865k
                        ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
9457
9458
865k
                        if(PRED_MODE_INTRA == packed_pred_mode)
9459
439k
                        {
9460
                            /* for 4x4 transforms based on intra pred mode scan is choosen*/
9461
439k
                            if(4 == chroma_trans_size)
9462
242k
                            {
9463
                                /* for modes from 22 upto 30 horizontal scan is used */
9464
242k
                                if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
9465
23.5k
                                {
9466
23.5k
                                    ps_ctxt->i4_scan_idx = SCAN_HORZ;
9467
23.5k
                                }
9468
                                /* for modes from 6 upto 14 horizontal scan is used */
9469
219k
                                else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
9470
81.5k
                                {
9471
81.5k
                                    ps_ctxt->i4_scan_idx = SCAN_VERT;
9472
81.5k
                                }
9473
242k
                            }
9474
439k
                        }
9475
9476
#if DISABLE_RDOQ_INTRA
9477
                        if(PRED_MODE_INTRA == packed_pred_mode)
9478
                        {
9479
                            perform_rdoq = 0;
9480
                        }
9481
#endif
9482
9483
                        /* RDOPT copy States :  TU init (best until prev TU) to current */
9484
865k
                        COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9485
865k
                            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9486
865k
                                    .s_cabac_ctxt.au1_ctxt_models[0] +
9487
865k
                                IHEVC_CAB_COEFFX_PREFIX,
9488
865k
                            &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9489
865k
                            IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9490
9491
865k
                        ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
9492
                        /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
9493
                    so that all candidates and best candidate are quantized with same rounding factor  */
9494
865k
                        if(1 == perform_rdoq)
9495
0
                        {
9496
0
                            ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
9497
0
                        }
9498
9499
865k
                        if(!ps_best_cu_prms->u1_skip_flag ||
9500
5.11k
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9501
865k
                        {
9502
                            /* Cb */
9503
865k
                            cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9504
865k
                                ps_ctxt,
9505
865k
                                pu1_cur_pred_chrm,
9506
865k
                                pred_chrm_strd,
9507
865k
                                pu1_cur_src_chrm,
9508
865k
                                src_chrm_strd,
9509
865k
                                pi2_cur_deq_data_chrm,
9510
865k
                                cu_size,
9511
865k
                                pu1_chrm_recon,
9512
865k
                                recon_chrma_strd,
9513
865k
                                pu1_final_ecd_data,
9514
865k
                                pu1_csbf_buf,
9515
865k
                                csbf_strd,
9516
865k
                                chroma_trans_size,
9517
865k
                                ps_ctxt->i4_scan_idx,
9518
865k
                                (PRED_MODE_INTRA == packed_pred_mode),
9519
865k
                                &cb_num_bytes,
9520
865k
                                &temp_bits,
9521
865k
                                &cb_zero_col,
9522
865k
                                &cb_zero_row,
9523
865k
                                &au1_is_recon_available[U_PLANE],
9524
865k
                                perform_sbh,
9525
865k
                                perform_rdoq,
9526
865k
                                &i8_ssd,
9527
865k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9528
865k
                                !ps_ctxt->u1_is_refPic
9529
865k
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9530
865k
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9531
828k
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9532
828k
                                          100.0,
9533
865k
                                ps_prms->u1_is_cu_noisy,
9534
865k
#endif
9535
865k
                                ps_best_cu_prms->u1_skip_flag &&
9536
5.11k
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9537
865k
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9538
865k
                                                              : FREQUENCY_DOMAIN_SSD,
9539
865k
                                U_PLANE);
9540
865k
                        }
9541
0
                        else
9542
0
                        {
9543
0
                            cb_cbf = 0;
9544
0
                            temp_bits = 0;
9545
0
                            cb_num_bytes = 0;
9546
0
                            au1_is_recon_available[U_PLANE] = 0;
9547
0
                            cb_zero_col = 0;
9548
0
                            cb_zero_row = 0;
9549
0
                        }
9550
9551
                        /* Accumulate chroma residual bits */
9552
865k
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9553
9554
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9555
865k
                        if(0 != cb_cbf)
9556
227k
                        {
9557
227k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9558
227k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9559
227k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9560
227k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9561
227k
                                    IHEVC_CAB_COEFFX_PREFIX,
9562
227k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9563
227k
                        }
9564
                        /* RDOPT copy States :  Restoring back the Cb init state to Cr */
9565
638k
                        else
9566
638k
                        {
9567
638k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9568
638k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9569
638k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9570
638k
                                    IHEVC_CAB_COEFFX_PREFIX,
9571
638k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9572
638k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9573
638k
                        }
9574
9575
865k
                        if(!ps_best_cu_prms->u1_skip_flag ||
9576
5.11k
                           !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
9577
865k
                        {
9578
                            /* Cr */
9579
865k
                            cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
9580
865k
                                ps_ctxt,
9581
865k
                                pu1_cur_pred_chrm,
9582
865k
                                pred_chrm_strd,
9583
865k
                                pu1_cur_src_chrm,
9584
865k
                                src_chrm_strd,
9585
865k
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9586
865k
                                cu_size,
9587
865k
                                pu1_chrm_recon,
9588
865k
                                recon_chrma_strd,
9589
865k
                                pu1_final_ecd_data + cb_num_bytes,
9590
865k
                                pu1_csbf_buf,
9591
865k
                                csbf_strd,
9592
865k
                                chroma_trans_size,
9593
865k
                                ps_ctxt->i4_scan_idx,
9594
865k
                                (PRED_MODE_INTRA == packed_pred_mode),
9595
865k
                                &cr_num_bytes,
9596
865k
                                &temp_bits,
9597
865k
                                &cr_zero_col,
9598
865k
                                &cr_zero_row,
9599
865k
                                &au1_is_recon_available[V_PLANE],
9600
865k
                                perform_sbh,
9601
865k
                                perform_rdoq,
9602
865k
                                &i8_ssd,
9603
865k
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
9604
865k
                                !ps_ctxt->u1_is_refPic
9605
865k
                                    ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
9606
865k
                                    : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
9607
828k
                                       (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
9608
828k
                                          100.0,
9609
865k
                                ps_prms->u1_is_cu_noisy,
9610
865k
#endif
9611
865k
                                ps_best_cu_prms->u1_skip_flag &&
9612
5.11k
                                    ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
9613
865k
                                u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
9614
865k
                                                              : FREQUENCY_DOMAIN_SSD,
9615
865k
                                V_PLANE);
9616
865k
                        }
9617
0
                        else
9618
0
                        {
9619
0
                            cr_cbf = 0;
9620
0
                            temp_bits = 0;
9621
0
                            cr_num_bytes = 0;
9622
0
                            au1_is_recon_available[V_PLANE] = 0;
9623
0
                            cr_zero_col = 0;
9624
0
                            cr_zero_row = 0;
9625
0
                        }
9626
9627
                        /* Accumulate chroma residual bits */
9628
865k
                        ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
9629
9630
                        /* RDOPT copy States :  New updated after curr TU to TU init */
9631
865k
                        if(0 != cr_cbf)
9632
224k
                        {
9633
224k
                            COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
9634
224k
                                &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
9635
224k
                                &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9636
224k
                                        .s_cabac_ctxt.au1_ctxt_models[0] +
9637
224k
                                    IHEVC_CAB_COEFFX_PREFIX,
9638
224k
                                IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
9639
224k
                        }
9640
9641
865k
                        if(0 == i4_subtu_idx)
9642
865k
                        {
9643
865k
                            ps_tu->b1_cb_cbf = cb_cbf;
9644
865k
                            ps_tu->b1_cr_cbf = cr_cbf;
9645
865k
                        }
9646
0
                        else
9647
0
                        {
9648
0
                            ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
9649
0
                            ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
9650
0
                        }
9651
865k
                    }
9652
2.41M
                    else
9653
2.41M
                    {
9654
2.41M
                        cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
9655
2.41M
                        cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
9656
2.41M
                        cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
9657
2.41M
                        cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
9658
9659
2.41M
                        if(ps_prms->u1_will_cabac_state_change)
9660
2.41M
                        {
9661
2.41M
                            cb_num_bytes =
9662
2.41M
                                ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
9663
2.41M
                        }
9664
0
                        else
9665
0
                        {
9666
0
                            cb_num_bytes = 0;
9667
0
                        }
9668
9669
2.41M
                        if(ps_prms->u1_will_cabac_state_change)
9670
2.41M
                        {
9671
2.41M
                            cr_num_bytes =
9672
2.41M
                                ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
9673
2.41M
                        }
9674
0
                        else
9675
0
                        {
9676
0
                            cr_num_bytes = 0;
9677
0
                        }
9678
9679
                        /* copy cb ecd data to final buffer */
9680
2.41M
                        memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
9681
9682
2.41M
                        pu1_chrm_old_ecd_data += cb_num_bytes;
9683
9684
                        /* copy cb ecd data to final buffer */
9685
2.41M
                        memcpy(
9686
2.41M
                            (pu1_final_ecd_data + cb_num_bytes),
9687
2.41M
                            pu1_chrm_old_ecd_data,
9688
2.41M
                            cr_num_bytes);
9689
9690
2.41M
                        pu1_chrm_old_ecd_data += cr_num_bytes;
9691
9692
2.41M
                        au1_is_recon_available[U_PLANE] = 0;
9693
2.41M
                        au1_is_recon_available[V_PLANE] = 0;
9694
2.41M
                    }
9695
9696
                    /**-------- Compute Recon data (Do IT & Recon) : Chroma  -----------**/
9697
3.28M
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9698
3.25M
                       (!u1_compute_spatial_ssd_chroma ||
9699
0
                        (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
9700
3.25M
                    {
9701
3.25M
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9702
929k
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9703
929k
                            (UCHAR_MAX ==
9704
929k
                             ps_recon_datastore
9705
929k
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
9706
3.09M
                        {
9707
3.09M
                            ihevce_chroma_it_recon_fxn(
9708
3.09M
                                ps_ctxt,
9709
3.09M
                                pi2_cur_deq_data_chrm,
9710
3.09M
                                cu_size,
9711
3.09M
                                pu1_cur_pred_chrm,
9712
3.09M
                                pred_chrm_strd,
9713
3.09M
                                pu1_cur_chroma_recon,
9714
3.09M
                                recon_chrma_strd,
9715
3.09M
                                pu1_final_ecd_data,
9716
3.09M
                                chroma_trans_size,
9717
3.09M
                                (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
9718
3.09M
                                cb_zero_col,
9719
3.09M
                                cb_zero_row,
9720
3.09M
                                U_PLANE);
9721
3.09M
                        }
9722
160k
                        else if(
9723
160k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9724
160k
                            (UCHAR_MAX !=
9725
160k
                             ps_recon_datastore
9726
160k
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
9727
160k
                        {
9728
160k
                            UWORD8 *pu1_recon_src =
9729
160k
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9730
160k
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9731
160k
                                          [U_PLANE][ctr][i4_subtu_idx]]) +
9732
160k
                                i4_subtu_pos_x +
9733
160k
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9734
9735
160k
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9736
160k
                                pu1_recon_src,
9737
160k
                                ps_recon_datastore->i4_lumaRecon_stride,
9738
160k
                                pu1_cur_chroma_recon,
9739
160k
                                recon_chrma_strd,
9740
160k
                                chroma_trans_size,
9741
160k
                                chroma_trans_size,
9742
160k
                                U_PLANE);
9743
160k
                        }
9744
3.25M
                    }
9745
9746
3.28M
                    u1_is_cu_coded |=
9747
3.28M
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
9748
9749
3.28M
                    if(ps_prms->u1_will_cabac_state_change)
9750
3.28M
                    {
9751
3.28M
                        ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
9752
3.28M
                    }
9753
9754
3.28M
                    pu1_final_ecd_data += cb_num_bytes;
9755
                    /* update total bytes consumed */
9756
3.28M
                    total_bytes += cb_num_bytes;
9757
9758
3.28M
                    if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
9759
3.25M
                       (!u1_compute_spatial_ssd_chroma ||
9760
0
                        (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
9761
3.25M
                    {
9762
3.25M
                        if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
9763
929k
                           (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9764
929k
                            (UCHAR_MAX ==
9765
929k
                             ps_recon_datastore
9766
929k
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
9767
3.09M
                        {
9768
3.09M
                            ihevce_chroma_it_recon_fxn(
9769
3.09M
                                ps_ctxt,
9770
3.09M
                                pi2_cur_deq_data_chrm + chroma_trans_size,
9771
3.09M
                                cu_size,
9772
3.09M
                                pu1_cur_pred_chrm,
9773
3.09M
                                pred_chrm_strd,
9774
3.09M
                                pu1_cur_chroma_recon,
9775
3.09M
                                recon_chrma_strd,
9776
3.09M
                                pu1_final_ecd_data,
9777
3.09M
                                chroma_trans_size,
9778
3.09M
                                (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
9779
3.09M
                                cr_zero_col,
9780
3.09M
                                cr_zero_row,
9781
3.09M
                                V_PLANE);
9782
3.09M
                        }
9783
160k
                        else if(
9784
160k
                            ps_recon_datastore->au1_is_chromaRecon_available[0] &&
9785
160k
                            (UCHAR_MAX !=
9786
160k
                             ps_recon_datastore
9787
160k
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
9788
160k
                        {
9789
160k
                            UWORD8 *pu1_recon_src =
9790
160k
                                ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
9791
160k
                                     [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
9792
160k
                                          [V_PLANE][ctr][i4_subtu_idx]]) +
9793
160k
                                i4_subtu_pos_x +
9794
160k
                                i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
9795
9796
160k
                            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
9797
160k
                                pu1_recon_src,
9798
160k
                                ps_recon_datastore->i4_lumaRecon_stride,
9799
160k
                                pu1_cur_chroma_recon,
9800
160k
                                recon_chrma_strd,
9801
160k
                                chroma_trans_size,
9802
160k
                                chroma_trans_size,
9803
160k
                                V_PLANE);
9804
160k
                        }
9805
3.25M
                    }
9806
9807
3.28M
                    u1_is_cu_coded |=
9808
3.28M
                        ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
9809
9810
3.28M
                    if(ps_prms->u1_will_cabac_state_change)
9811
3.28M
                    {
9812
3.28M
                        ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
9813
3.28M
                    }
9814
9815
3.28M
                    pu1_final_ecd_data += cr_num_bytes;
9816
                    /* update total bytes consumed */
9817
3.28M
                    total_bytes += cr_num_bytes;
9818
3.28M
                }
9819
3.28M
            }
9820
3.28M
        }
9821
1.23M
        else
9822
1.23M
        {
9823
1.23M
            ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
9824
1.23M
            ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
9825
1.23M
            ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
9826
1.23M
            ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
9827
1.23M
            ps_tu->b1_cb_cbf = 0;
9828
1.23M
            ps_tu->b1_cr_cbf = 0;
9829
1.23M
            ps_tu->b1_cb_cbf_subtu1 = 0;
9830
1.23M
            ps_tu->b1_cr_cbf_subtu1 = 0;
9831
1.23M
        }
9832
9833
        /* Update to next TU */
9834
4.51M
        ps_tu_enc_loop++;
9835
4.51M
        ps_tu_enc_loop_temp_prms++;
9836
9837
4.51M
        pu4_nbr_flags++;
9838
4.51M
        pu1_intra_pred_mode++;
9839
9840
        /*Do not set the nbr map for last pu in cu */
9841
4.51M
        if((num_tu_in_cu - 1) != ctr)
9842
2.03M
        {
9843
            /* set the neighbour map to 1 */
9844
2.03M
            ihevce_set_nbr_map(
9845
2.03M
                ps_ctxt->pu1_ctb_nbr_map,
9846
2.03M
                ps_ctxt->i4_nbr_map_strd,
9847
2.03M
                cu_pos_x_in_4x4,
9848
2.03M
                cu_pos_y_in_4x4,
9849
2.03M
                (trans_size >> 2),
9850
2.03M
                1);
9851
2.03M
        }
9852
4.51M
    }
9853
9854
2.48M
    if(ps_prms->u1_will_cabac_state_change)
9855
2.48M
    {
9856
2.48M
        ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
9857
9858
        /* Modify skip flag, if luma is skipped & Chroma is coded */
9859
2.48M
        if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
9860
1.89k
        {
9861
1.89k
            ps_best_cu_prms->u1_skip_flag = 0;
9862
1.89k
        }
9863
2.48M
    }
9864
9865
    /* during chroma evaluation if skip decision was over written     */
9866
    /* then the current skip candidate is set to a non skip candidate */
9867
2.48M
    if(PRED_MODE_INTRA != packed_pred_mode)
9868
1.08M
    {
9869
1.08M
        ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
9870
1.08M
    }
9871
9872
    /**------------- Compute header data if required --------------**/
9873
2.48M
    if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
9874
590k
    {
9875
590k
        WORD32 cbf_bits;
9876
590k
        WORD32 cu_bits;
9877
590k
        WORD32 unit_4x4_size = cu_size >> 2;
9878
9879
        /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
9880
        be copied as the base reference for the next cu
9881
        Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
9882
        luma and chroma are being reevaluated*/
9883
590k
        COPY_CABAC_STATES(
9884
590k
            &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
9885
590k
                 .s_cabac_ctxt.au1_ctxt_models[0],
9886
590k
            &ps_ctxt->au1_rdopt_init_ctxt_models[0],
9887
590k
            IHEVC_CAB_CTXT_END);
9888
9889
        /* get the neighbour availability flags for current cu  */
9890
590k
        ihevce_get_only_nbr_flag(
9891
590k
            &s_nbr,
9892
590k
            ps_ctxt->pu1_ctb_nbr_map,
9893
590k
            ps_ctxt->i4_nbr_map_strd,
9894
590k
            (cu_pos_x << 1),
9895
590k
            (cu_pos_y << 1),
9896
590k
            unit_4x4_size,
9897
590k
            unit_4x4_size);
9898
9899
590k
        cu_bits = ihevce_entropy_rdo_encode_cu(
9900
590k
            &ps_ctxt->s_rdopt_entropy_ctxt,
9901
590k
            ps_best_cu_prms,
9902
590k
            cu_pos_x,
9903
590k
            cu_pos_y,
9904
590k
            cu_size,
9905
590k
            ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
9906
590k
                                           : s_nbr.u1_top_avail,
9907
590k
            s_nbr.u1_left_avail,
9908
590k
            (pu1_final_ecd_data - total_bytes),
9909
590k
            &cbf_bits);
9910
9911
        /* cbf bits are excluded from header bits, instead considered as texture bits */
9912
590k
        ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
9913
590k
        ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
9914
590k
    }
9915
9916
2.48M
    if(ps_prms->u1_will_cabac_state_change)
9917
2.48M
    {
9918
2.48M
        ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
9919
2.48M
    }
9920
2.48M
}
9921
9922
/*!
9923
******************************************************************************
9924
* \if Function name : ihevce_set_eval_flags \endif
9925
*
9926
* \brief
9927
*    Function which decides which eval flags have to be set based on present
9928
*    and RDOQ conditions
9929
*
9930
* \param[in] ps_ctxt : encoder ctxt pointer
9931
* \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
9932
*
9933
* \return
9934
*    None
9935
*
9936
* \author
9937
*  Ittiam
9938
*
9939
*****************************************************************************
9940
*/
9941
void ihevce_set_eval_flags(
9942
    ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
9943
2.48M
{
9944
2.48M
    WORD32 count = 0;
9945
9946
2.48M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
9947
9948
2.48M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
9949
2.48M
        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
9950
9951
2.48M
    if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
9952
13.9k
    {
9953
13.9k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
9954
13.9k
    }
9955
2.46M
    else
9956
2.46M
    {
9957
2.46M
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
9958
2.46M
    }
9959
9960
2.48M
    if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
9961
2.48M
       (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
9962
0
    {
9963
        /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
9964
        RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
9965
        for the current CU will change. Therefore, we need to reevaluate the pred data*/
9966
0
        if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
9967
0
           (ps_enc_loop_bestprms->u1_intra_flag == 1))
9968
0
        {
9969
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
9970
0
            ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
9971
0
        }
9972
0
        if(ps_enc_loop_bestprms->u1_skip_flag == 1)
9973
0
        {
9974
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9975
0
            {
9976
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9977
0
                    .b1_eval_luma_iq_and_coeff_data = 0;
9978
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9979
0
                    .b1_eval_chroma_iq_and_coeff_data = 0;
9980
0
            }
9981
0
        }
9982
0
        else
9983
0
        {
9984
0
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
9985
0
            {
9986
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9987
0
                    .b1_eval_luma_iq_and_coeff_data = 1;
9988
0
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
9989
0
                    .b1_eval_chroma_iq_and_coeff_data = 1;
9990
0
            }
9991
0
        }
9992
0
    }
9993
2.48M
    else
9994
2.48M
    {
9995
2.48M
        switch(ps_ctxt->i4_quality_preset)
9996
2.48M
        {
9997
1.53M
        case IHEVCE_QUALITY_P0:
9998
1.68M
        case IHEVCE_QUALITY_P2:
9999
1.89M
        case IHEVCE_QUALITY_P3:
10000
1.89M
        {
10001
5.18M
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10002
3.29M
            {
10003
3.29M
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10004
3.29M
                    .b1_eval_luma_iq_and_coeff_data = 0;
10005
3.29M
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10006
3.29M
                    .b1_eval_chroma_iq_and_coeff_data =
10007
3.29M
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10008
3.29M
            }
10009
10010
1.89M
            break;
10011
1.68M
        }
10012
110k
        case IHEVCE_QUALITY_P4:
10013
245k
        case IHEVCE_QUALITY_P5:
10014
245k
        {
10015
788k
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10016
543k
            {
10017
543k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10018
543k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10019
543k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10020
543k
                    .b1_eval_chroma_iq_and_coeff_data =
10021
543k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10022
543k
            }
10023
10024
245k
            break;
10025
110k
        }
10026
344k
        case IHEVCE_QUALITY_P6:
10027
344k
        {
10028
1.02M
            for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
10029
683k
            {
10030
683k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10031
683k
                    .b1_eval_luma_iq_and_coeff_data = 0;
10032
683k
#if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
10033
683k
                ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10034
683k
                    .b1_eval_chroma_iq_and_coeff_data =
10035
683k
                    !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10036
#else
10037
                if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
10038
                   (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
10039
                {
10040
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10041
                        .b1_eval_chroma_iq_and_coeff_data =
10042
                        ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
10043
                }
10044
                else
10045
                {
10046
                    ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
10047
                        .b1_eval_chroma_iq_and_coeff_data =
10048
                        !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
10049
                }
10050
#endif
10051
683k
            }
10052
10053
344k
            break;
10054
110k
        }
10055
0
        default:
10056
0
        {
10057
0
            break;
10058
110k
        }
10059
2.48M
        }
10060
2.48M
    }
10061
10062
    /* Not recomputing Luma pred-data and header data for any preset now */
10063
2.48M
    ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
10064
2.48M
}
10065
10066
/**
10067
******************************************************************************
10068
*
10069
*  @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
10070
*         (not coded children) into a parent node(not coded).
10071
*
10072
*  @par   Description
10073
*         This is required post RDO evaluation as TU decisions are
10074
*         pre-determined(pre RDO) based on recursive SATD,
10075
*         while the quad children TU's can be skipped during RDO
10076
*
10077
*         The shrink process is applied iteratively till there are no
10078
*         more modes to shrink
10079
*
10080
*  @param[inout]   ps_tu_enc_loop
10081
*       pointer to tu enc loop params of inter cu
10082
*
10083
*  @param[inout]   ps_tu_enc_loop_temp_prms
10084
*       pointer to temp tu enc loop params of inter cu
10085
*
10086
*  @param[in]   num_tu_in_cu
10087
*       number of tus in cu
10088
*
10089
*  @return      modified number of tus in cu
10090
*
10091
******************************************************************************
10092
*/
10093
WORD32 ihevce_shrink_inter_tu_tree(
10094
    tu_enc_loop_out_t *ps_tu_enc_loop,
10095
    tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
10096
    recon_datastore_t *ps_recon_datastore,
10097
    WORD32 num_tu_in_cu,
10098
    UWORD8 u1_is_422)
10099
439k
{
10100
439k
    WORD32 recurse = 1;
10101
439k
    WORD32 ctr;
10102
10103
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10104
    /* Post RDO, if all 4 child nodes are not coded the overheads of split TU    */
10105
    /* flags and cbf flags are saved by merging to parent node and marking       */
10106
    /* parent TU as not coded                                                    */
10107
    /*                                                                           */
10108
    /*                               ParentTUSplit=1                             */
10109
    /*                                      |                                    */
10110
    /*       ---------------------------------------------------------           */
10111
    /*       |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded)      */
10112
    /*                                     ||                                    */
10113
    /*                                     \/                                    */
10114
    /*                                                                           */
10115
    /*                              ParentTUSplit=0 (Not Coded)                  */
10116
    /*                                                                           */
10117
    /* ------------- Quadtree TU Split Transform flag optimization ------------  */
10118
546k
    while((num_tu_in_cu > 4) && recurse)
10119
107k
    {
10120
107k
        recurse = 0;
10121
10122
        /* Validate inter CU */
10123
        //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
10124
10125
        /* loop for all tu blocks in current cu */
10126
740k
        for(ctr = 0; ctr < num_tu_in_cu;)
10127
633k
        {
10128
            /* Get current tu posx, posy and size */
10129
633k
            WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
10130
633k
            WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
10131
            /* +1 is for parents size */
10132
633k
            WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
10133
10134
            /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
10135
633k
            WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
10136
633k
            eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
10137
10138
            /* As TUs are published in encode order (Z SCAN),                      */
10139
            /* Four consecutive TUS of same size implies we have hit leaf nodes.   */
10140
633k
            if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
10141
446k
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
10142
315k
               ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
10143
220k
               eval_merge)
10144
184k
            {
10145
184k
                WORD32 merge_parent = 1;
10146
10147
                /* If any leaf noded is coded, it cannot be merged to parent */
10148
184k
                if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
10149
17.3k
                   (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
10150
10151
15.2k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
10152
11.7k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
10153
11.7k
                   (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
10154
10155
11.7k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
10156
7.85k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
10157
7.82k
                   (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
10158
10159
7.80k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
10160
6.61k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
10161
6.61k
                   (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
10162
178k
                {
10163
178k
                    merge_parent = 0;
10164
178k
                }
10165
10166
184k
                if(u1_is_422)
10167
0
                {
10168
0
                    if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
10169
0
                       (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
10170
10171
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
10172
0
                       (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
10173
10174
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
10175
0
                       (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
10176
10177
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
10178
0
                       (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
10179
0
                    {
10180
0
                        merge_parent = 0;
10181
0
                    }
10182
0
                }
10183
10184
184k
                if(merge_parent)
10185
6.60k
                {
10186
                    /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
10187
10188
6.60k
                    if(ps_recon_datastore->u1_is_lumaRecon_available)
10189
151
                    {
10190
151
                        ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
10191
10192
151
                        memmove(
10193
151
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
10194
151
                            &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
10195
151
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10196
151
                    }
10197
10198
6.60k
                    if(ps_recon_datastore->au1_is_chromaRecon_available[0])
10199
151
                    {
10200
151
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
10201
151
                            UCHAR_MAX;
10202
151
                        ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
10203
151
                            UCHAR_MAX;
10204
10205
151
                        memmove(
10206
151
                            &ps_recon_datastore
10207
151
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
10208
151
                            &ps_recon_datastore
10209
151
                                 ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
10210
151
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10211
10212
151
                        memmove(
10213
151
                            &ps_recon_datastore
10214
151
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
10215
151
                            &ps_recon_datastore
10216
151
                                 ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
10217
151
                            (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10218
10219
151
                        if(u1_is_422)
10220
0
                        {
10221
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
10222
0
                                UCHAR_MAX;
10223
0
                            ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
10224
0
                                UCHAR_MAX;
10225
10226
0
                            memmove(
10227
0
                                &ps_recon_datastore
10228
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
10229
0
                                &ps_recon_datastore
10230
0
                                     ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
10231
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10232
10233
0
                            memmove(
10234
0
                                &ps_recon_datastore
10235
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
10236
0
                                &ps_recon_datastore
10237
0
                                     ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
10238
0
                                (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
10239
0
                        }
10240
151
                    }
10241
10242
                    /* Parent node size is one more than that of child */
10243
6.60k
                    ps_tu_enc_loop[ctr].s_tu.b3_size++;
10244
10245
6.60k
                    ctr++;
10246
10247
                    /* move the subsequent TUs to next element */
10248
6.60k
                    ASSERT(num_tu_in_cu >= (ctr + 3));
10249
6.60k
                    memmove(
10250
6.60k
                        (void *)(ps_tu_enc_loop + ctr),
10251
6.60k
                        (void *)(ps_tu_enc_loop + ctr + 3),
10252
6.60k
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
10253
10254
                    /* Also memmove the temp TU params */
10255
6.60k
                    memmove(
10256
6.60k
                        (void *)(ps_tu_enc_loop_temp_prms + ctr),
10257
6.60k
                        (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
10258
6.60k
                        (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
10259
10260
                    /* Number of TUs in CU are now less by 3 */
10261
6.60k
                    num_tu_in_cu -= 3;
10262
10263
                    /* Recurse again as new parent also be can be merged later */
10264
6.60k
                    recurse = 1;
10265
6.60k
                }
10266
178k
                else
10267
178k
                {
10268
                    /* Go to next set of leaf nodes */
10269
178k
                    ctr += 4;
10270
178k
                }
10271
184k
            }
10272
448k
            else
10273
448k
            {
10274
448k
                ctr++;
10275
448k
            }
10276
633k
        }
10277
107k
    }
10278
10279
    /* return the modified num TUs*/
10280
439k
    ASSERT(num_tu_in_cu > 0);
10281
439k
    return (num_tu_in_cu);
10282
439k
}
10283
10284
UWORD8 ihevce_intra_mode_nxn_hash_updater(
10285
    UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
10286
1.24M
{
10287
1.24M
    WORD32 i;
10288
1.24M
    WORD32 i4_mode;
10289
10290
4.98M
    for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
10291
3.73M
    {
10292
3.73M
        if(pu1_mode_array[i] < 35)
10293
3.73M
        {
10294
3.73M
            if(pu1_mode_array[i] != 0)
10295
3.00M
            {
10296
3.00M
                i4_mode = pu1_mode_array[i] - 1;
10297
10298
3.00M
                if(!pu1_hash_table[i4_mode])
10299
1.33M
                {
10300
1.33M
                    pu1_hash_table[i4_mode] = 1;
10301
1.33M
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10302
1.33M
                    u1_num_ipe_modes++;
10303
1.33M
                }
10304
3.00M
            }
10305
10306
3.73M
            if(pu1_mode_array[i] != 34)
10307
3.71M
            {
10308
3.71M
                i4_mode = pu1_mode_array[i] + 1;
10309
10310
3.71M
                if((!pu1_hash_table[i4_mode]))
10311
2.03M
                {
10312
2.03M
                    pu1_hash_table[i4_mode] = 1;
10313
2.03M
                    pu1_mode_array[u1_num_ipe_modes] = i4_mode;
10314
2.03M
                    u1_num_ipe_modes++;
10315
2.03M
                }
10316
3.71M
            }
10317
3.73M
        }
10318
3.73M
    }
10319
10320
1.24M
    if(!pu1_hash_table[INTRA_PLANAR])
10321
374k
    {
10322
374k
        pu1_hash_table[INTRA_PLANAR] = 1;
10323
374k
        pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
10324
374k
        u1_num_ipe_modes++;
10325
374k
    }
10326
10327
1.24M
    if(!pu1_hash_table[INTRA_DC])
10328
374k
    {
10329
374k
        pu1_hash_table[INTRA_DC] = 1;
10330
374k
        pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
10331
374k
        u1_num_ipe_modes++;
10332
374k
    }
10333
10334
1.24M
    return u1_num_ipe_modes;
10335
1.24M
}
10336
10337
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
10338
WORD32 ihevce_determine_tu_tree_distribution(
10339
    cu_inter_cand_t *ps_cu_data,
10340
    me_func_selector_t *ps_func_selector,
10341
    WORD16 *pi2_scratch_mem,
10342
    UWORD8 *pu1_inp,
10343
    WORD32 i4_inp_stride,
10344
    WORD32 i4_lambda,
10345
    UWORD8 u1_lambda_q_shift,
10346
    UWORD8 u1_cu_size,
10347
    UWORD8 u1_max_tr_depth)
10348
{
10349
    err_prms_t s_err_prms;
10350
10351
    PF_SAD_FXN_TU_REC pf_err_compute[4];
10352
10353
    WORD32 i4_satd;
10354
10355
    s_err_prms.pi4_sad_grid = &i4_satd;
10356
    s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
10357
    s_err_prms.pu1_inp = pu1_inp;
10358
    s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
10359
    s_err_prms.i4_inp_stride = i4_inp_stride;
10360
    s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
10361
    s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
10362
10363
    if(u1_cu_size == 64)
10364
    {
10365
        s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
10366
    }
10367
    else
10368
    {
10369
        s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
10370
    }
10371
10372
    pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
10373
    pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
10374
    pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
10375
    pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
10376
10377
    i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
10378
        &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
10379
10380
    if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
10381
    {
10382
        ps_cu_data->ai4_tu_split_flag[0] = 1;
10383
    }
10384
10385
    return i4_satd;
10386
}
10387
#endif
10388
10389
void ihevce_populate_nbr_4x4_with_pu_data(
10390
    nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
10391
558k
{
10392
558k
    WORD32 i, j;
10393
10394
558k
    nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
10395
10396
558k
    WORD32 ht = (ps_pu->b4_ht + 1);
10397
558k
    WORD32 wd = (ps_pu->b4_wd + 1);
10398
10399
558k
    ps_nbr_4x4->b1_intra_flag = 0;
10400
558k
    ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
10401
558k
    ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
10402
558k
    ps_nbr_4x4->mv = ps_pu->mv;
10403
10404
1.75M
    for(i = 0; i < ht; i++)
10405
1.20M
    {
10406
6.81M
        for(j = 0; j < wd; j++)
10407
5.61M
        {
10408
5.61M
            ps_tmp_4x4[j] = *ps_nbr_4x4;
10409
5.61M
        }
10410
10411
1.20M
        ps_tmp_4x4 += i4_nbr_buf_stride;
10412
1.20M
    }
10413
558k
}
10414
10415
void ihevce_call_luma_inter_pred_rdopt_pass1(
10416
    ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
10417
0
{
10418
0
    pu_t *ps_pu;
10419
0
    UWORD8 *pu1_pred;
10420
0
    WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
10421
0
    WORD32 inter_pu_wd, inter_pu_ht;
10422
10423
0
    pu1_pred = ps_inter_cand->pu1_pred_data_scr;
10424
0
    pred_stride = ps_inter_cand->i4_pred_data_stride;
10425
0
    num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
10426
10427
0
    for(ctr = 0; ctr < num_cu_part; ctr++)
10428
0
    {
10429
0
        ps_pu = &ps_inter_cand->as_inter_pu[ctr];
10430
10431
        /* IF AMP then each partitions can have diff wd ht */
10432
0
        inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
10433
0
        inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
10434
10435
0
        skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
10436
        //if(0 == skip_or_merge_flag)
10437
0
        {
10438
0
            ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
10439
0
        }
10440
0
        if((2 == num_cu_part) && (0 == ctr))
10441
0
        {
10442
            /* 2Nx__ partion case */
10443
0
            if(inter_pu_wd == cu_size)
10444
0
            {
10445
0
                pu1_pred += (inter_pu_ht * pred_stride);
10446
0
            }
10447
10448
            /* __x2N partion case */
10449
0
            if(inter_pu_ht == cu_size)
10450
0
            {
10451
0
                pu1_pred += inter_pu_wd;
10452
0
            }
10453
0
        }
10454
0
    }
10455
0
}
10456
10457
LWORD64 ihevce_it_recon_ssd(
10458
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10459
    UWORD8 *pu1_src,
10460
    WORD32 i4_src_strd,
10461
    UWORD8 *pu1_pred,
10462
    WORD32 i4_pred_strd,
10463
    WORD16 *pi2_deq_data,
10464
    WORD32 i4_deq_data_strd,
10465
    UWORD8 *pu1_recon,
10466
    WORD32 i4_recon_stride,
10467
    UWORD8 *pu1_ecd_data,
10468
    UWORD8 u1_trans_size,
10469
    UWORD8 u1_pred_mode,
10470
    WORD32 i4_cbf,
10471
    WORD32 i4_zero_col,
10472
    WORD32 i4_zero_row,
10473
    CHROMA_PLANE_ID_T e_chroma_plane)
10474
27.5M
{
10475
27.5M
    if(NULL_PLANE == e_chroma_plane)
10476
12.1M
    {
10477
12.1M
        ihevce_it_recon_fxn(
10478
12.1M
            ps_ctxt,
10479
12.1M
            pi2_deq_data,
10480
12.1M
            i4_deq_data_strd,
10481
12.1M
            pu1_pred,
10482
12.1M
            i4_pred_strd,
10483
12.1M
            pu1_recon,
10484
12.1M
            i4_recon_stride,
10485
12.1M
            pu1_ecd_data,
10486
12.1M
            u1_trans_size,
10487
12.1M
            u1_pred_mode,
10488
12.1M
            i4_cbf,
10489
12.1M
            i4_zero_col,
10490
12.1M
            i4_zero_row);
10491
10492
12.1M
        return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
10493
12.1M
            pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size,
10494
12.1M
            e_chroma_plane);
10495
12.1M
    }
10496
15.3M
    else
10497
15.3M
    {
10498
15.3M
        ihevce_chroma_it_recon_fxn(
10499
15.3M
            ps_ctxt,
10500
15.3M
            pi2_deq_data,
10501
15.3M
            i4_deq_data_strd,
10502
15.3M
            pu1_pred,
10503
15.3M
            i4_pred_strd,
10504
15.3M
            pu1_recon,
10505
15.3M
            i4_recon_stride,
10506
15.3M
            pu1_ecd_data,
10507
15.3M
            u1_trans_size,
10508
15.3M
            i4_cbf,
10509
15.3M
            i4_zero_col,
10510
15.3M
            i4_zero_row,
10511
15.3M
            e_chroma_plane);
10512
10513
15.3M
        return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10514
15.3M
            pu1_recon,
10515
15.3M
            pu1_src,
10516
15.3M
            i4_recon_stride,
10517
15.3M
            i4_src_strd,
10518
15.3M
            u1_trans_size,
10519
15.3M
            u1_trans_size,
10520
15.3M
            e_chroma_plane);
10521
15.3M
    }
10522
27.5M
}
10523
10524
/*!
10525
******************************************************************************
10526
* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
10527
*
10528
* \brief
10529
*    Transform unit level (Chroma) enc_loop function
10530
*
10531
* \param[in] ps_ctxt    enc_loop module ctxt pointer
10532
* \param[in] pu1_pred       pointer to predicted data buffer
10533
* \param[in] pred_strd      predicted buffer stride
10534
* \param[in] pu1_src    pointer to source data buffer
10535
* \param[in] src_strd   source buffer stride
10536
* \param[in] pi2_deq_data   pointer to store iq data
10537
* \param[in] deq_data_strd  iq data buffer stride
10538
* \param[out] pu1_ecd_data  pointer coeff output buffer (input to ent cod)
10539
* \param[out] pu1_csbf_buf  pointer to store the csbf for all 4x4 in a current
10540
*                           block
10541
* \param[out] csbf_strd     csbf buffer stride
10542
* \param[in] trans_size     transform size (4, 8, 16)
10543
* \param[in] intra_flag     0:Inter/Skip 1:Intra
10544
* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
10545
*                           coeff buffer
10546
the current TU in RDopt Mode
10547
* \param[out] pi4_zero_col  pointer to store the zero_col info for the TU
10548
* \param[out] pi4_zero_row  pointer to store the zero_row info for the TU
10549
*
10550
* \return
10551
*    CBF of the current block
10552
*
10553
* \author
10554
*  Ittiam
10555
*
10556
*****************************************************************************
10557
*/
10558
WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
10559
    ihevce_enc_loop_ctxt_t *ps_ctxt,
10560
    UWORD8 *pu1_pred,
10561
    WORD32 pred_strd,
10562
    UWORD8 *pu1_src,
10563
    WORD32 src_strd,
10564
    WORD16 *pi2_deq_data,
10565
    WORD32 deq_data_strd,
10566
    UWORD8 *pu1_recon,
10567
    WORD32 i4_recon_stride,
10568
    UWORD8 *pu1_ecd_data,
10569
    UWORD8 *pu1_csbf_buf,
10570
    WORD32 csbf_strd,
10571
    WORD32 trans_size,
10572
    WORD32 i4_scan_idx,
10573
    WORD32 intra_flag,
10574
    WORD32 *pi4_coeff_off,
10575
    WORD32 *pi4_tu_bits,
10576
    WORD32 *pi4_zero_col,
10577
    WORD32 *pi4_zero_row,
10578
    UWORD8 *pu1_is_recon_available,
10579
    WORD32 i4_perform_sbh,
10580
    WORD32 i4_perform_rdoq,
10581
    LWORD64 *pi8_cost,
10582
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10583
    WORD32 i4_alpha_stim_multiplier,
10584
    UWORD8 u1_is_cu_noisy,
10585
#endif
10586
    UWORD8 u1_is_skip,
10587
    SSD_TYPE_T e_ssd_type,
10588
    CHROMA_PLANE_ID_T e_chroma_plane)
10589
28.9M
{
10590
28.9M
    WORD32 trans_idx, cbf, u4_blk_sad;
10591
28.9M
    WORD16 *pi2_quant_coeffs;
10592
28.9M
    WORD16 *pi2_trans_values;
10593
28.9M
    WORD32 quant_scale_mat_offset;
10594
28.9M
    WORD32 *pi4_trans_scratch;
10595
28.9M
    WORD32 *pi4_subBlock2csbfId_map = NULL;
10596
10597
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10598
    WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
10599
#endif
10600
10601
28.9M
    rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
10602
10603
28.9M
    WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
10604
22.4M
                             (!intra_flag && ENABLE_INTER_ZCU_COST);
10605
28.9M
    WORD32 i4_perform_coeff_level_rdoq =
10606
28.9M
        (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
10607
24.2M
        (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
10608
10609
28.9M
    ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
10610
28.9M
    ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
10611
10612
28.9M
    *pi4_coeff_off = 0;
10613
28.9M
    *pi4_tu_bits = 0;
10614
28.9M
    pu1_is_recon_available[0] = 0;
10615
10616
28.9M
    pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
10617
28.9M
    pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
10618
28.9M
    pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
10619
10620
28.9M
    if(2 == trans_size)
10621
0
    {
10622
0
        trans_size = 4;
10623
0
    }
10624
10625
    /* translate the transform size to index */
10626
28.9M
    trans_idx = trans_size >> 2;
10627
10628
28.9M
    if(16 == trans_size)
10629
4.01M
    {
10630
4.01M
        trans_idx = 3;
10631
4.01M
    }
10632
10633
28.9M
    if(u1_is_skip)
10634
0
    {
10635
0
        pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10636
0
            pu1_pred,
10637
0
            pu1_src,
10638
0
            pred_strd,
10639
0
            src_strd,
10640
0
            trans_size,
10641
0
            trans_size,
10642
0
            e_chroma_plane);
10643
10644
0
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10645
0
        {
10646
            /* buffer copy fromp pred to recon */
10647
0
            ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
10648
0
                pu1_pred,
10649
0
                pred_strd,
10650
0
                pu1_recon,
10651
0
                i4_recon_stride,
10652
0
                trans_size,
10653
0
                trans_size,
10654
0
                e_chroma_plane);
10655
10656
0
            pu1_is_recon_available[0] = 1;
10657
0
        }
10658
10659
0
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10660
0
        if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
10661
0
        {
10662
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10663
0
                pu1_src,
10664
0
                src_strd,
10665
0
                pu1_pred,
10666
0
                pred_strd,
10667
0
                pi8_cost[0],
10668
0
                i4_alpha_stim_multiplier,
10669
0
                trans_size,
10670
0
                0,
10671
0
                ps_ctxt->u1_enable_psyRDOPT,
10672
0
                e_chroma_plane);
10673
0
        }
10674
0
#endif
10675
10676
0
#if ENABLE_INTER_ZCU_COST
10677
#if !WEIGH_CHROMA_COST
10678
        /* cbf = 0, accumulate cu not coded cost */
10679
        ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
10680
#else
10681
0
        ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
10682
0
                                          (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
10683
0
                                         CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
10684
0
#endif
10685
0
#endif
10686
10687
0
        return 0;
10688
0
    }
10689
10690
28.9M
    if(intra_flag == 1)
10691
22.4M
    {
10692
22.4M
        quant_scale_mat_offset = 0;
10693
10694
#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10695
        ai4_quant_rounding_factors[0][0] =
10696
            MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
10697
10698
        for(i = 0; i < trans_size * trans_size; i++)
10699
        {
10700
            ai4_quant_rounding_factors[1][i] =
10701
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
10702
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10703
            ai4_quant_rounding_factors[2][i] =
10704
                MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
10705
                    (1 << QUANT_ROUND_FACTOR_Q) / 3);
10706
        }
10707
#endif
10708
22.4M
    }
10709
6.53M
    else
10710
6.53M
    {
10711
6.53M
        quant_scale_mat_offset = NUM_TRANS_TYPES;
10712
6.53M
    }
10713
10714
28.9M
    switch(trans_size)
10715
28.9M
    {
10716
14.7M
    case 4:
10717
14.7M
    {
10718
14.7M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
10719
10720
14.7M
        break;
10721
0
    }
10722
10.2M
    case 8:
10723
10.2M
    {
10724
10.2M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
10725
10726
10.2M
        break;
10727
0
    }
10728
4.01M
    case 16:
10729
4.01M
    {
10730
4.01M
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
10731
10732
4.01M
        break;
10733
0
    }
10734
0
    case 32:
10735
0
    {
10736
0
        pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
10737
10738
0
        break;
10739
0
    }
10740
28.9M
    }
10741
10742
    /* ---------- call residue and transform block ------- */
10743
28.9M
    u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
10744
28.9M
        pu1_src,
10745
28.9M
        pu1_pred,
10746
28.9M
        pi4_trans_scratch,
10747
28.9M
        pi2_trans_values,
10748
28.9M
        src_strd,
10749
28.9M
        pred_strd,
10750
28.9M
        trans_size,
10751
28.9M
        e_chroma_plane);
10752
28.9M
    (void)u4_blk_sad;
10753
    /* -------- calculate SSD calculation in Transform Domain ------ */
10754
10755
28.9M
    cbf = ps_ctxt->apf_quant_iquant_ssd
10756
28.9M
              [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
10757
10758
28.9M
          (pi2_trans_values,
10759
28.9M
           ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
10760
28.9M
           pi2_quant_coeffs,
10761
28.9M
           pi2_deq_data,
10762
28.9M
           trans_size,
10763
28.9M
           ps_ctxt->i4_chrm_cu_qp_div6,
10764
28.9M
           ps_ctxt->i4_chrm_cu_qp_mod6,
10765
28.9M
#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
10766
28.9M
           ps_ctxt->i4_quant_rnd_factor[intra_flag],
10767
28.9M
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10768
28.9M
           ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10769
#else
10770
           intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
10771
           intra_flag ? ai4_quant_rounding_factors[1]
10772
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
10773
           intra_flag ? ai4_quant_rounding_factors[2]
10774
                      : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
10775
#endif
10776
28.9M
           trans_size,
10777
28.9M
           trans_size,
10778
28.9M
           deq_data_strd,
10779
28.9M
           pu1_csbf_buf,
10780
28.9M
           csbf_strd,
10781
28.9M
           pi4_zero_col,
10782
28.9M
           pi4_zero_row,
10783
28.9M
           ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
10784
28.9M
           pi8_cost);
10785
10786
28.9M
    if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
10787
15.3M
    {
10788
15.3M
        pi8_cost[0] = UINT_MAX;
10789
15.3M
    }
10790
10791
28.9M
    if(0 != cbf)
10792
4.66M
    {
10793
4.66M
        if(i4_perform_sbh || i4_perform_rdoq)
10794
3.37M
        {
10795
3.37M
            ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
10796
3.37M
            ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
10797
10798
3.37M
            ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
10799
3.37M
            ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
10800
3.37M
            ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
10801
3.37M
            ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
10802
3.37M
            ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
10803
10804
3.37M
            ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
10805
3.37M
                ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
10806
3.37M
            ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
10807
3.37M
            ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
10808
3.37M
            ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
10809
3.37M
            ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
10810
3.37M
            ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
10811
10812
3.37M
            if((!i4_perform_rdoq))
10813
1.82M
            {
10814
1.82M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10815
10816
1.82M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10817
1.82M
            }
10818
3.37M
        }
10819
10820
        /* ------- call coeffs scan function ------- */
10821
4.66M
        *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10822
4.66M
            pi2_quant_coeffs,
10823
4.66M
            pi4_subBlock2csbfId_map,
10824
4.66M
            i4_scan_idx,
10825
4.66M
            trans_size,
10826
4.66M
            pu1_ecd_data,
10827
4.66M
            pu1_csbf_buf,
10828
4.66M
            csbf_strd);
10829
4.66M
    }
10830
10831
    /*  Normalize Cost. Note : trans_idx, not (trans_idx-1) */
10832
28.9M
    pi8_cost[0] >>= ga_trans_shift[trans_idx];
10833
10834
28.9M
#if RDOPT_ZERO_CBF_ENABLE
10835
28.9M
    if((0 != cbf))
10836
4.66M
    {
10837
4.66M
        WORD32 tu_bits;
10838
4.66M
        LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
10839
10840
4.66M
        zero_cbf_cost_u = 0;
10841
10842
        /*Populating the feilds of rdoq_ctxt structure*/
10843
4.66M
        if(i4_perform_rdoq)
10844
1.54M
        {
10845
            //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
10846
            /* transform size to log2transform size */
10847
1.54M
            GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
10848
1.54M
            ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
10849
10850
1.54M
            ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
10851
1.54M
            ps_rdoq_sbh_ctxt->i4_is_luma = 0;
10852
1.54M
            ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
10853
1.54M
            ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
10854
1.54M
                (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
10855
1.54M
            ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
10856
1.54M
            ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
10857
1.54M
            ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
10858
1.54M
        }
10859
3.11M
        else if(i4_perform_zcbf)
10860
1.00M
        {
10861
            /* cost of zero cbf encoding */
10862
1.00M
            zero_cbf_cost_u =
10863
10864
1.00M
                ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
10865
1.00M
                    pu1_pred,
10866
1.00M
                    pu1_src,
10867
1.00M
                    pred_strd,
10868
1.00M
                    src_strd,
10869
1.00M
                    trans_size,
10870
1.00M
                    trans_size,
10871
1.00M
                    e_chroma_plane);
10872
1.00M
        }
10873
10874
        /************************************************************************/
10875
        /* call the entropy rdo encode to get the bit estimate for current tu   */
10876
        /* note that tu includes only residual coding bits and does not include */
10877
        /* tu split, cbf and qp delta encoding bits for a TU                    */
10878
        /************************************************************************/
10879
4.66M
        if(i4_perform_rdoq)
10880
1.54M
        {
10881
1.54M
            tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
10882
1.54M
                &ps_ctxt->s_rdopt_entropy_ctxt,
10883
1.54M
                pu1_ecd_data,
10884
1.54M
                trans_size,
10885
1.54M
                0,
10886
1.54M
                ps_rdoq_sbh_ctxt,
10887
1.54M
                pi8_cost,
10888
1.54M
                &zero_cbf_cost_u,
10889
1.54M
                0);
10890
            //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
10891
10892
1.54M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
10893
75.9k
            {
10894
75.9k
                cbf = 0;
10895
10896
                /* num bytes is set to 0 */
10897
75.9k
                *pi4_coeff_off = 0;
10898
75.9k
            }
10899
10900
1.54M
            (*pi4_tu_bits) += tu_bits;
10901
10902
1.54M
            if((i4_perform_sbh) && (0 != cbf))
10903
1.47M
            {
10904
1.47M
                ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
10905
10906
1.47M
                ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
10907
10908
1.47M
                pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
10909
1.47M
            }
10910
10911
            /*Add round value before normalizing*/
10912
1.54M
            pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
10913
1.54M
            pi8_cost[0] >>= ga_trans_shift[trans_idx];
10914
10915
1.54M
            if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
10916
1.47M
            {
10917
1.47M
                *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
10918
1.47M
                    pi2_quant_coeffs,
10919
1.47M
                    pi4_subBlock2csbfId_map,
10920
1.47M
                    i4_scan_idx,
10921
1.47M
                    trans_size,
10922
1.47M
                    pu1_ecd_data,
10923
1.47M
                    ps_rdoq_sbh_ctxt->pu1_csbf_buf,
10924
1.47M
                    csbf_strd);
10925
1.47M
            }
10926
1.54M
        }
10927
3.11M
        else
10928
3.11M
        {
10929
            /************************************************************************/
10930
            /* call the entropy rdo encode to get the bit estimate for current tu   */
10931
            /* note that tu includes only residual coding bits and does not include */
10932
            /* tu split, cbf and qp delta encoding bits for a TU                    */
10933
            /************************************************************************/
10934
3.11M
            tu_bits = ihevce_entropy_rdo_encode_tu(
10935
3.11M
                &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
10936
10937
3.11M
            (*pi4_tu_bits) += tu_bits;
10938
3.11M
        }
10939
10940
4.66M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
10941
1.44M
        {
10942
1.44M
            pi8_cost[0] = ihevce_it_recon_ssd(
10943
1.44M
                ps_ctxt,
10944
1.44M
                pu1_src,
10945
1.44M
                src_strd,
10946
1.44M
                pu1_pred,
10947
1.44M
                pred_strd,
10948
1.44M
                pi2_deq_data,
10949
1.44M
                deq_data_strd,
10950
1.44M
                pu1_recon,
10951
1.44M
                i4_recon_stride,
10952
1.44M
                pu1_ecd_data,
10953
1.44M
                trans_size,
10954
1.44M
                PRED_MODE_INTRA,
10955
1.44M
                cbf,
10956
1.44M
                pi4_zero_col[0],
10957
1.44M
                pi4_zero_row[0],
10958
1.44M
                e_chroma_plane);
10959
10960
1.44M
            pu1_is_recon_available[0] = 1;
10961
1.44M
        }
10962
10963
4.66M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
10964
4.66M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10965
0
        {
10966
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10967
0
                pu1_src,
10968
0
                src_strd,
10969
0
                pu1_recon,
10970
0
                i4_recon_stride,
10971
0
                pi8_cost[0],
10972
0
                i4_alpha_stim_multiplier,
10973
0
                trans_size,
10974
0
                0,
10975
0
                ps_ctxt->u1_enable_psyRDOPT,
10976
0
                e_chroma_plane);
10977
0
        }
10978
4.66M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
10979
0
        {
10980
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
10981
0
                pu1_src,
10982
0
                src_strd,
10983
0
                pu1_pred,
10984
0
                pred_strd,
10985
0
                pi8_cost[0],
10986
0
                i4_alpha_stim_multiplier,
10987
0
                trans_size,
10988
0
                0,
10989
0
                ps_ctxt->u1_enable_psyRDOPT,
10990
0
                e_chroma_plane);
10991
0
        }
10992
4.66M
#endif
10993
10994
4.66M
        curr_cb_cod_cost = pi8_cost[0];
10995
10996
        /* add the SSD cost to bits estimate given by ECD */
10997
4.66M
        curr_cb_cod_cost +=
10998
4.66M
            COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
10999
11000
4.66M
        if(i4_perform_zcbf)
11001
1.52M
        {
11002
1.52M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11003
1.52M
            if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
11004
0
            {
11005
0
                zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
11006
0
                    pu1_src,
11007
0
                    src_strd,
11008
0
                    pu1_pred,
11009
0
                    pred_strd,
11010
0
                    zero_cbf_cost_u,
11011
0
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11012
0
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11013
0
                                              (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11014
0
                                                 100.0,
11015
0
                    trans_size,
11016
0
                    0,
11017
0
                    ps_ctxt->u1_enable_psyRDOPT,
11018
0
                    e_chroma_plane);
11019
0
            }
11020
1.52M
#endif
11021
            /* force the tu as zero cbf if zero_cbf_cost is lower */
11022
1.52M
            if(zero_cbf_cost_u < curr_cb_cod_cost)
11023
21.3k
            {
11024
21.3k
                *pi4_coeff_off = 0;
11025
21.3k
                cbf = 0;
11026
21.3k
                (*pi4_tu_bits) = 0;
11027
21.3k
                pi8_cost[0] = zero_cbf_cost_u;
11028
11029
21.3k
                pu1_is_recon_available[0] = 0;
11030
11031
21.3k
                if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11032
8.24k
                {
11033
8.24k
                    ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
11034
8.24k
                        pu1_pred,
11035
8.24k
                        pred_strd,
11036
8.24k
                        pu1_recon,
11037
8.24k
                        i4_recon_stride,
11038
8.24k
                        trans_size,
11039
8.24k
                        trans_size,
11040
8.24k
                        e_chroma_plane);
11041
11042
8.24k
                    pu1_is_recon_available[0] = 1;
11043
8.24k
                }
11044
21.3k
            }
11045
11046
1.52M
#if ENABLE_INTER_ZCU_COST
11047
1.52M
            if(!intra_flag)
11048
1.52M
            {
11049
#if !WEIGH_CHROMA_COST
11050
                ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
11051
#else
11052
1.52M
                ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11053
1.52M
                    (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
11054
1.52M
                     (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11055
1.52M
                    CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11056
1.52M
#endif
11057
1.52M
            }
11058
1.52M
#endif
11059
1.52M
        }
11060
4.66M
    }
11061
24.2M
    else
11062
24.2M
    {
11063
24.2M
        if(e_ssd_type == SPATIAL_DOMAIN_SSD)
11064
13.9M
        {
11065
13.9M
            pi8_cost[0] = ihevce_it_recon_ssd(
11066
13.9M
                ps_ctxt,
11067
13.9M
                pu1_src,
11068
13.9M
                src_strd,
11069
13.9M
                pu1_pred,
11070
13.9M
                pred_strd,
11071
13.9M
                pi2_deq_data,
11072
13.9M
                deq_data_strd,
11073
13.9M
                pu1_recon,
11074
13.9M
                i4_recon_stride,
11075
13.9M
                pu1_ecd_data,
11076
13.9M
                trans_size,
11077
13.9M
                PRED_MODE_INTRA,
11078
13.9M
                cbf,
11079
13.9M
                pi4_zero_col[0],
11080
13.9M
                pi4_zero_row[0],
11081
13.9M
                e_chroma_plane);
11082
11083
13.9M
            pu1_is_recon_available[0] = 1;
11084
13.9M
        }
11085
11086
24.2M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
11087
24.2M
        if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11088
0
        {
11089
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11090
0
                pu1_src,
11091
0
                src_strd,
11092
0
                pu1_recon,
11093
0
                i4_recon_stride,
11094
0
                pi8_cost[0],
11095
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11096
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11097
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11098
0
                                             100.0,
11099
0
                trans_size,
11100
0
                0,
11101
0
                ps_ctxt->u1_enable_psyRDOPT,
11102
0
                e_chroma_plane);
11103
0
        }
11104
24.2M
        else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
11105
0
        {
11106
0
            pi8_cost[0] = ihevce_inject_stim_into_distortion(
11107
0
                pu1_src,
11108
0
                src_strd,
11109
0
                pu1_pred,
11110
0
                pred_strd,
11111
0
                pi8_cost[0],
11112
0
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
11113
0
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
11114
0
                                          (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
11115
0
                                             100.0,
11116
0
                trans_size,
11117
0
                0,
11118
0
                ps_ctxt->u1_enable_psyRDOPT,
11119
0
                e_chroma_plane);
11120
0
        }
11121
24.2M
#endif
11122
11123
24.2M
#if ENABLE_INTER_ZCU_COST
11124
24.2M
        if(!intra_flag)
11125
5.01M
        {
11126
#if !WEIGH_CHROMA_COST
11127
            /* cbf = 0, accumulate cu not coded cost */
11128
            ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
11129
#else
11130
            /* cbf = 0, accumulate cu not coded cost */
11131
11132
5.01M
            ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
11133
5.01M
                (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
11134
5.01M
                 (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
11135
5.01M
                CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
11136
5.01M
#endif
11137
5.01M
        }
11138
24.2M
#endif
11139
24.2M
    }
11140
28.9M
#endif /* RDOPT_ZERO_CBF_ENABLE */
11141
11142
28.9M
    return (cbf);
11143
28.9M
}