Coverage Report

Created: 2025-07-11 06:43

/src/libhevc/encoder/ihevce_enc_cu_recursion.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/**
22
******************************************************************************
23
* \file ihevce_enc_cu_recursion.c
24
*
25
* \brief
26
*    This file contains Encoder normative loop pass related functions
27
*
28
* \date
29
*    18/09/2012
30
*
31
* \author
32
*    Ittiam
33
*
34
*
35
* List of Functions
36
*
37
*
38
******************************************************************************
39
*/
40
41
/*****************************************************************************/
42
/* File Includes                                                             */
43
/*****************************************************************************/
44
/* System include files */
45
#include <stdio.h>
46
#include <string.h>
47
#include <stdlib.h>
48
#include <assert.h>
49
#include <stdarg.h>
50
#include <math.h>
51
52
/* User include files */
53
#include "ihevc_typedefs.h"
54
#include "itt_video_api.h"
55
#include "ihevce_api.h"
56
57
#include "rc_cntrl_param.h"
58
#include "rc_frame_info_collector.h"
59
#include "rc_look_ahead_params.h"
60
61
#include "ihevc_defs.h"
62
#include "ihevc_macros.h"
63
#include "ihevc_debug.h"
64
#include "ihevc_structs.h"
65
#include "ihevc_platform_macros.h"
66
#include "ihevc_deblk.h"
67
#include "ihevc_itrans_recon.h"
68
#include "ihevc_chroma_itrans_recon.h"
69
#include "ihevc_chroma_intra_pred.h"
70
#include "ihevc_intra_pred.h"
71
#include "ihevc_inter_pred.h"
72
#include "ihevc_mem_fns.h"
73
#include "ihevc_padding.h"
74
#include "ihevc_weighted_pred.h"
75
#include "ihevc_sao.h"
76
#include "ihevc_resi_trans.h"
77
#include "ihevc_quant_iquant_ssd.h"
78
#include "ihevc_cabac_tables.h"
79
80
#include "ihevce_defs.h"
81
#include "ihevce_hle_interface.h"
82
#include "ihevce_lap_enc_structs.h"
83
#include "ihevce_multi_thrd_structs.h"
84
#include "ihevce_multi_thrd_funcs.h"
85
#include "ihevce_me_common_defs.h"
86
#include "ihevce_had_satd.h"
87
#include "ihevce_error_codes.h"
88
#include "ihevce_bitstream.h"
89
#include "ihevce_cabac.h"
90
#include "ihevce_rdoq_macros.h"
91
#include "ihevce_function_selector.h"
92
#include "ihevce_enc_structs.h"
93
#include "ihevce_entropy_structs.h"
94
#include "ihevce_cmn_utils_instr_set_router.h"
95
#include "ihevce_ipe_instr_set_router.h"
96
#include "ihevce_decomp_pre_intra_structs.h"
97
#include "ihevce_decomp_pre_intra_pass.h"
98
#include "ihevce_enc_loop_structs.h"
99
#include "ihevce_global_tables.h"
100
#include "ihevce_nbr_avail.h"
101
#include "ihevce_enc_loop_utils.h"
102
#include "ihevce_bs_compute_ctb.h"
103
#include "ihevce_cabac_rdo.h"
104
#include "ihevce_dep_mngr_interface.h"
105
#include "ihevce_enc_loop_pass.h"
106
#include "ihevce_rc_enc_structs.h"
107
#include "ihevce_enc_cu_recursion.h"
108
#include "ihevce_stasino_helpers.h"
109
110
#include "cast_types.h"
111
#include "osal.h"
112
#include "osal_defaults.h"
113
114
/*****************************************************************************/
115
/* Macros                                                                    */
116
/*****************************************************************************/
117
0
#define NUM_CTB_QUANT_ROUNDING 6
118
119
/*****************************************************************************/
120
/* Function Definitions                                                      */
121
/*****************************************************************************/
122
123
/**
124
*********************************************************************************
125
* Function name : ihevce_store_cu_final
126
*
127
* \brief
128
*    This function store cu info to the enc loop cu context
129
*
130
* \param[in] ps_ctxt : pointer to enc loop context structure
131
* \param[in] ps_cu_final  : pointer to enc loop output CU structure
132
* \param[in] pu1_ecd_data : ecd data pointer
133
* \param[in] ps_enc_out_ctxt : pointer to CU information structure
134
* \param[in] ps_cu_prms : pointer to  cu level parameters for SATD / RDOPT
135
*
136
* \return
137
*    None
138
*
139
**********************************************************************************/
140
void ihevce_store_cu_final(
141
    ihevce_enc_loop_ctxt_t *ps_ctxt,
142
    cu_enc_loop_out_t *ps_cu_final,
143
    UWORD8 *pu1_ecd_data,
144
    ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
145
    enc_loop_cu_prms_t *ps_cu_prms)
146
2.00M
{
147
2.00M
    enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
148
2.00M
    WORD32 i4_8x8_blks_in_cu;
149
2.00M
    WORD32 i4_br_id, i4_enc_frm_id;
150
151
2.00M
    WORD32 u4_tex_bits, u4_hdr_bits;
152
2.00M
    WORD32 i4_qscale, i4_qscale_ctb;
153
2.00M
    ps_enc_loop_bestprms = ps_enc_out_ctxt->ps_cu_prms;
154
2.00M
    i4_qscale = ((ps_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
155
2.00M
                      [ps_enc_out_ctxt->i1_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
156
2.00M
    i4_qscale_ctb = ((
157
2.00M
        ps_ctxt->ps_rc_quant_ctxt
158
2.00M
            ->pi4_qp_to_qscale[ps_ctxt->i4_frame_mod_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
159
160
    /* All texture bits accumulated */
161
2.00M
    u4_tex_bits = ps_enc_loop_bestprms->u4_cu_luma_res_bits +
162
2.00M
                  ps_enc_loop_bestprms->u4_cu_chroma_res_bits +
163
2.00M
                  ps_enc_loop_bestprms->u4_cu_cbf_bits;
164
165
2.00M
    u4_hdr_bits = ps_enc_loop_bestprms->u4_cu_hdr_bits;
166
167
2.00M
    i4_br_id = ps_ctxt->i4_bitrate_instance_num;
168
2.00M
    i4_enc_frm_id = ps_ctxt->i4_enc_frm_id;
169
170
2.00M
    i4_8x8_blks_in_cu = ((ps_enc_out_ctxt->u1_cu_size >> 3) * (ps_enc_out_ctxt->u1_cu_size >> 3));
171
172
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd +=
173
2.00M
        ps_enc_loop_bestprms
174
2.00M
            ->i8_cu_ssd;  // + (((float)(ps_ctxt->i8_cl_ssd_lambda_qf/ (1<< LAMBDA_Q_SHIFT))) * ps_enc_loop_bestprms->u4_cu_hdr_bits);
175
176
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad +=
177
2.00M
        (UWORD32)(
178
2.00M
            ps_enc_loop_bestprms->u4_cu_open_intra_sad +
179
2.00M
            (((float)(ps_ctxt->i4_sad_lamda) / (1 << LAMBDA_Q_SHIFT)) *
180
2.00M
             ps_enc_loop_bestprms->u4_cu_hdr_bits));
181
182
2.00M
    if(1 == ps_enc_loop_bestprms->u1_intra_flag)
183
1.08M
    {
184
1.08M
        ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad_acc +=
185
1.08M
            ps_enc_loop_bestprms->u4_cu_sad;
186
1.08M
        ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_intra_cost_acc +=
187
1.08M
            ps_enc_loop_bestprms->i8_best_rdopt_cost;
188
1.08M
    }
189
921k
    else
190
921k
    {
191
921k
        ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_inter_sad_acc +=
192
921k
            ps_enc_loop_bestprms->u4_cu_sad;
193
921k
        ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_inter_cost_acc +=
194
921k
            ps_enc_loop_bestprms->i8_best_rdopt_cost;
195
921k
    }
196
    /*accumulating the frame level stats across frame*/
197
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc +=
198
2.00M
        ps_enc_loop_bestprms->u4_cu_sad;
199
200
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_cost_acc +=
201
2.00M
        ps_enc_loop_bestprms->i8_best_rdopt_cost;
202
203
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits +=
204
2.00M
        (u4_tex_bits + u4_hdr_bits);
205
206
    /*Total bits and header bits accumalted here for CTB*/
207
2.00M
    ps_ctxt->u4_total_cu_bits += (u4_tex_bits + u4_hdr_bits);
208
2.00M
    ps_ctxt->u4_total_cu_bits_mul_qs +=
209
2.00M
        ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale_ctb)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
210
2.00M
        QSCALE_Q_FAC_3;
211
2.00M
    ps_ctxt->u4_total_cu_hdr_bits += u4_hdr_bits;
212
2.00M
    ps_ctxt->u4_cu_tot_bits_into_qscale +=
213
2.00M
        ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
214
2.00M
        QSCALE_Q_FAC_3;
215
2.00M
    ps_ctxt->u4_cu_tot_bits += (u4_tex_bits + u4_hdr_bits);
216
217
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits +=
218
2.00M
        u4_hdr_bits;
219
220
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
221
2.00M
        ->i8_sad_by_qscale[ps_enc_loop_bestprms->u1_intra_flag] +=
222
2.00M
        ((((LWORD64)ps_enc_loop_bestprms->u4_cu_sad) << SAD_BY_QSCALE_Q) / i4_qscale);
223
224
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
225
2.00M
        ->i4_qp_normalized_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] +=
226
2.00M
        (i4_8x8_blks_in_cu * i4_qscale);
227
228
2.00M
    ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
229
2.00M
        ->i4_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] += i4_8x8_blks_in_cu;
230
231
    /* PCM not supported */
232
2.00M
    ps_cu_final->b1_pcm_flag = 0;
233
2.00M
    ps_cu_final->b1_pred_mode_flag = ps_enc_loop_bestprms->u1_intra_flag;
234
235
2.00M
    ps_cu_final->b1_skip_flag = ps_enc_loop_bestprms->u1_skip_flag;
236
2.00M
    ps_cu_final->b1_tq_bypass_flag = 0;
237
2.00M
    ps_cu_final->b3_part_mode = ps_enc_loop_bestprms->u1_part_mode;
238
239
2.00M
    ps_cu_final->pv_coeff = pu1_ecd_data;
240
241
2.00M
    ps_cu_final->i1_cu_qp = ps_enc_out_ctxt->i1_cu_qp;
242
2.00M
    if(ps_enc_loop_bestprms->u1_is_cu_coded)
243
668k
    {
244
668k
        ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_enc_out_ctxt->i1_cu_qp;
245
668k
    }
246
1.33M
    else
247
1.33M
    {
248
1.33M
        ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_pred_qp;
249
1.33M
    }
250
2.00M
    ps_cu_final->b1_first_cu_in_qg = ps_enc_out_ctxt->b1_first_cu_in_qg;
251
252
    /* Update the no residue flag. Needed for inter cu. */
253
    /* Needed for deblocking inter/intra both           */
254
    //if(ps_cu_final->b1_pred_mode_flag == PRED_MODE_INTER)
255
2.00M
    {
256
2.00M
        ps_cu_final->b1_no_residual_syntax_flag = !ps_enc_loop_bestprms->u1_is_cu_coded;
257
2.00M
    }
258
259
    /* store the number of TUs */
260
2.00M
    ps_cu_final->u2_num_tus_in_cu = ps_enc_loop_bestprms->u2_num_tus_in_cu;
261
262
    /* ---- copy the TUs to final structure ----- */
263
2.00M
    memcpy(
264
2.00M
        ps_cu_final->ps_enc_tu,
265
2.00M
        &ps_enc_loop_bestprms->as_tu_enc_loop[0],
266
2.00M
        ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(tu_enc_loop_out_t));
267
268
    /* ---- copy the PUs to final structure ----- */
269
2.00M
    memcpy(
270
2.00M
        ps_cu_final->ps_pu,
271
2.00M
        &ps_enc_loop_bestprms->as_pu_enc_loop[0],
272
2.00M
        ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_t));
273
274
    /* --- copy reminder and prev_flags ----- */
275
    /* only required for intra */
276
2.00M
    if(PRED_MODE_INTRA == ps_cu_final->b1_pred_mode_flag)
277
1.08M
    {
278
1.08M
        memcpy(
279
1.08M
            &ps_cu_final->as_prev_rem[0],
280
1.08M
            &ps_enc_loop_bestprms->as_intra_prev_rem[0],
281
1.08M
            ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(intra_prev_rem_flags_t));
282
283
1.08M
        ps_cu_final->b3_chroma_intra_pred_mode = ps_enc_loop_bestprms->u1_chroma_intra_pred_mode;
284
1.08M
    }
285
286
    /* --------------------------------------------------- */
287
    /* ---- Boundary Strength Calculation at CU level ---- */
288
    /* --------------------------------------------------- */
289
2.00M
    if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
290
1.93M
    {
291
1.93M
        WORD32 num_4x4_in_ctb;
292
1.93M
        nbr_4x4_t *ps_left_nbr_4x4;
293
1.93M
        nbr_4x4_t *ps_top_nbr_4x4;
294
1.93M
        nbr_4x4_t *ps_curr_nbr_4x4;
295
1.93M
        WORD32 nbr_4x4_left_strd;
296
297
1.93M
        num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
298
299
1.93M
        ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
300
1.93M
        ps_curr_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
301
1.93M
        ps_curr_nbr_4x4 += ((ps_enc_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
302
303
        /* CU left */
304
1.93M
        if(0 == ps_enc_out_ctxt->b3_cu_pos_x)
305
727k
        {
306
727k
            ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
307
727k
            ps_left_nbr_4x4 += ps_enc_out_ctxt->b3_cu_pos_y << 1;
308
727k
            nbr_4x4_left_strd = 1;
309
727k
        }
310
1.20M
        else
311
1.20M
        {
312
            /* inside CTB */
313
1.20M
            ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
314
1.20M
            nbr_4x4_left_strd = num_4x4_in_ctb;
315
1.20M
        }
316
317
        /* CU top */
318
1.93M
        if(0 == ps_enc_out_ctxt->b3_cu_pos_y)
319
719k
        {
320
            /* CTB boundary */
321
719k
            ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
322
719k
            ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
323
719k
            ps_top_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
324
719k
        }
325
1.21M
        else
326
1.21M
        {
327
            /* inside CTB */
328
1.21M
            ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
329
1.21M
        }
330
331
1.93M
        ihevce_bs_compute_cu(
332
1.93M
            ps_cu_final,
333
1.93M
            ps_top_nbr_4x4,
334
1.93M
            ps_left_nbr_4x4,
335
1.93M
            ps_curr_nbr_4x4,
336
1.93M
            nbr_4x4_left_strd,
337
1.93M
            num_4x4_in_ctb,
338
1.93M
            &ps_ctxt->s_deblk_bs_prms);
339
1.93M
    }
340
2.00M
}
341
342
/**
343
*********************************************************************************
344
* Function name : ihevce_store_cu_results
345
*
346
* \brief
347
*    This function store cu result to cu info context
348
*
349
* \param[in] ps_ctxt : pointer to enc loop context structure
350
* \param[out] ps_cu_prms : pointer to  cu level parameters for SATD / RDOPT
351
*
352
* \return
353
*    None
354
*
355
**********************************************************************************/
356
void ihevce_store_cu_results(
357
    ihevce_enc_loop_ctxt_t *ps_ctxt,
358
    enc_loop_cu_prms_t *ps_cu_prms,
359
    final_mode_state_t *ps_final_state)
360
3.06M
{
361
3.06M
    ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
362
3.06M
    nbr_4x4_t *ps_nbr_4x4, *ps_tmp_nbr_4x4, *ps_curr_nbr_4x4;
363
364
3.06M
    UWORD8 *pu1_recon, *pu1_final_recon;
365
3.06M
    WORD32 num_4x4_in_ctb, ctr;
366
3.06M
    WORD32 num_4x4_in_cu;
367
3.06M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
368
3.06M
    WORD32 cu_depth, log2_ctb_size, log2_cu_size;
369
370
3.06M
    ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
371
3.06M
    (void)ps_final_state;
372
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
373
    {
374
        /* ---- copy the child luma recon back to curr. recon -------- */
375
        pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
376
377
        /* based on CU position derive the luma pointers */
378
        pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
379
380
        pu1_final_recon +=
381
            ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
382
383
        ps_ctxt->s_cmn_opt_func.pf_copy_2d(
384
            pu1_final_recon,
385
            ps_cu_prms->i4_luma_recon_stride,
386
            pu1_recon,
387
            ps_enc_tmp_out_ctxt->u1_cu_size,
388
            ps_enc_tmp_out_ctxt->u1_cu_size,
389
            ps_enc_tmp_out_ctxt->u1_cu_size);
390
391
        /* ---- copy the child chroma recon back to curr. recon -------- */
392
        pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
393
394
        /* based on CU position derive the chroma pointers */
395
        pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
396
397
        pu1_final_recon +=
398
            ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
399
             ps_cu_prms->i4_chrm_recon_stride);
400
401
        /* Cb and Cr pixel interleaved */
402
        ps_ctxt->s_cmn_opt_func.pf_copy_2d(
403
            pu1_final_recon,
404
            ps_cu_prms->i4_chrm_recon_stride,
405
            pu1_recon,
406
            ps_enc_tmp_out_ctxt->u1_cu_size,
407
            ps_enc_tmp_out_ctxt->u1_cu_size,
408
            (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
409
    }
410
#else
411
3.06M
    if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
412
1.67M
    {
413
        /* ---- copy the child luma recon back to curr. recon -------- */
414
1.67M
        pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
415
416
        /* based on CU position derive the luma pointers */
417
1.67M
        pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
418
419
1.67M
        pu1_final_recon +=
420
1.67M
            ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
421
422
1.67M
        ps_ctxt->s_cmn_opt_func.pf_copy_2d(
423
1.67M
            pu1_final_recon,
424
1.67M
            ps_cu_prms->i4_luma_recon_stride,
425
1.67M
            pu1_recon,
426
1.67M
            ps_enc_tmp_out_ctxt->u1_cu_size,
427
1.67M
            ps_enc_tmp_out_ctxt->u1_cu_size,
428
1.67M
            ps_enc_tmp_out_ctxt->u1_cu_size);
429
430
        /* ---- copy the child chroma recon back to curr. recon -------- */
431
1.67M
        pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
432
433
        /* based on CU position derive the chroma pointers */
434
1.67M
        pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
435
436
1.67M
        pu1_final_recon +=
437
1.67M
            ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
438
1.67M
             ps_cu_prms->i4_chrm_recon_stride);
439
440
1.67M
        ps_ctxt->s_cmn_opt_func.pf_copy_2d(
441
1.67M
            pu1_final_recon,
442
1.67M
            ps_cu_prms->i4_chrm_recon_stride,
443
1.67M
            pu1_recon,
444
1.67M
            ps_enc_tmp_out_ctxt->u1_cu_size,
445
1.67M
            ps_enc_tmp_out_ctxt->u1_cu_size,
446
1.67M
            (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
447
1.67M
    }
448
3.06M
#endif
449
    /*copy qp for qg*/
450
3.06M
    {
451
3.06M
        WORD32 i4_num_8x8, i4_x, i4_y;
452
3.06M
        WORD32 i4_cu_pos_x, i4_cu_pox_y;
453
3.06M
        i4_num_8x8 = ps_enc_tmp_out_ctxt->u1_cu_size >> 3;
454
3.06M
        i4_cu_pos_x = ps_enc_tmp_out_ctxt->b3_cu_pos_x;
455
3.06M
        i4_cu_pox_y = ps_enc_tmp_out_ctxt->b3_cu_pos_y;
456
9.94M
        for(i4_y = 0; i4_y < i4_num_8x8; i4_y++)
457
6.88M
        {
458
29.0M
            for(i4_x = 0; i4_x < i4_num_8x8; i4_x++)
459
22.1M
            {
460
22.1M
                if(ps_enc_tmp_out_ctxt->ps_cu_prms->u1_is_cu_coded)
461
2.97M
                {
462
2.97M
                    ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
463
2.97M
                        ps_ctxt->i4_cu_qp;
464
2.97M
                }
465
19.2M
                else
466
19.2M
                {
467
19.2M
                    ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
468
19.2M
                        ps_ctxt->i4_pred_qp;
469
19.2M
                }
470
22.1M
            }
471
6.88M
        }
472
3.06M
    }
473
474
    /* ------ copy the nbr 4x4 to final output ------ */
475
3.06M
    num_4x4_in_cu = ps_enc_tmp_out_ctxt->u1_cu_size >> 2;
476
3.06M
    num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
477
478
3.06M
    ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
479
3.06M
    ps_curr_nbr_4x4 += (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 1);
480
3.06M
    ps_curr_nbr_4x4 += ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
481
3.06M
    ps_tmp_nbr_4x4 = ps_curr_nbr_4x4;
482
483
3.06M
    ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
484
485
3.06M
    GETRANGE(log2_ctb_size, ps_cu_prms->i4_ctb_size);
486
3.06M
    GETRANGE(log2_cu_size, ps_enc_tmp_out_ctxt->u1_cu_size);
487
3.06M
    cu_depth = log2_ctb_size - log2_cu_size;
488
489
3.06M
    ASSERT(cu_depth <= 3);
490
3.06M
    ASSERT(cu_depth >= 0);
491
492
    /*assign qp for all 4x4 nbr blocks*/
493
91.8M
    for(ctr = 0; ctr < num_4x4_in_cu * num_4x4_in_cu; ctr++, ps_nbr_4x4++)
494
88.7M
    {
495
88.7M
        ps_nbr_4x4->b1_skip_flag = ps_enc_tmp_out_ctxt->s_cu_prms.u1_skip_flag;
496
88.7M
        ps_nbr_4x4->b2_cu_depth = cu_depth;
497
88.7M
        ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
498
88.7M
    }
499
500
3.06M
    ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
501
502
16.8M
    for(ctr = 0; ctr < num_4x4_in_cu; ctr++)
503
13.7M
    {
504
13.7M
        memcpy(ps_tmp_nbr_4x4, ps_nbr_4x4, num_4x4_in_cu * sizeof(nbr_4x4_t));
505
506
13.7M
        ps_tmp_nbr_4x4 += num_4x4_in_ctb;
507
13.7M
        ps_nbr_4x4 += num_4x4_in_cu;
508
13.7M
    }
509
3.06M
}
510
511
/**
512
*********************************************************************************
513
* Function name : ihevce_populate_cu_struct
514
*
515
* \brief
516
*    This function populate cu struct
517
*
518
* \param[in] ps_ctxt : pointer to enc loop context structure
519
* \param[in] ps_cur_ipe_ctb : pointer to  IPE L0 analyze structure
520
* \param[in] ps_cu_tree_analyse : pointer to  Structure for CU recursion
521
* \param[in] ps_best_results : pointer to  strcuture  contain result for partition type of CU
522
* \param[in] ps_cu_out : pointer to  structre contain  mode analysis info
523
* \param[in] i4_32x32_id : noise estimation id
524
* \param[in] u1_num_best_results : num best result value
525
*
526
* \return
527
*    None
528
*
529
**********************************************************************************/
530
void ihevce_populate_cu_struct(
531
    ihevce_enc_loop_ctxt_t *ps_ctxt,
532
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
533
    cur_ctb_cu_tree_t *ps_cu_tree_analyse,
534
    part_type_results_t *ps_best_results,
535
    cu_analyse_t *ps_cu_out,
536
    WORD32 i4_32x32_id,
537
#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
538
    UWORD8 u1_is_cu_noisy,
539
#endif
540
    UWORD8 u1_num_best_results)
541
1.85M
{
542
1.85M
    cu_inter_cand_t *ps_cu_candt;
543
544
1.85M
    WORD32 j;
545
    /* open loop intra cost by IPE */
546
1.85M
    WORD32 intra_cost_ol;
547
    /* closed loop intra cost based on empirical coding noise estimate */
548
1.85M
    WORD32 intra_cost_cl_est = 0;
549
    /* closed loop intra coding noise estimate */
550
1.85M
    WORD32 intra_noise_cl_est;
551
1.85M
    WORD32 num_results_to_copy = 0;
552
553
1.85M
    WORD32 found_intra = 0;
554
1.85M
    WORD32 quality_preset = ps_ctxt->i4_quality_preset;
555
1.85M
    WORD32 frm_qp = ps_ctxt->i4_frame_qp;
556
1.85M
    WORD32 frm_qstep_multiplier = gau4_frame_qstep_multiplier[frm_qp - 1];
557
1.85M
    WORD32 frm_qstep = ps_ctxt->i4_frame_qstep;
558
1.85M
    UWORD8 u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
559
1.85M
    UWORD8 u1_x_off = ps_cu_tree_analyse->b3_cu_pos_x << 3;
560
1.85M
    UWORD8 u1_y_off = ps_cu_tree_analyse->b3_cu_pos_y << 3;
561
1.85M
    UWORD8 u1_threshold_multi;
562
1.85M
    switch(quality_preset)
563
1.85M
    {
564
1.06M
    case IHEVCE_QUALITY_P0:
565
1.15M
    case IHEVCE_QUALITY_P2:
566
1.15M
    {
567
1.15M
        num_results_to_copy =
568
1.15M
            MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ, u1_num_best_results);
569
1.15M
        break;
570
1.06M
    }
571
153k
    case IHEVCE_QUALITY_P3:
572
153k
    {
573
153k
        num_results_to_copy = MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_MS, u1_num_best_results);
574
153k
        break;
575
1.06M
    }
576
64.8k
    case IHEVCE_QUALITY_P4:
577
163k
    case IHEVCE_QUALITY_P5:
578
546k
    case IHEVCE_QUALITY_P6:
579
546k
    {
580
546k
        num_results_to_copy =
581
546k
            MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_HS_AND_XS, u1_num_best_results);
582
546k
        break;
583
163k
    }
584
1.85M
    }
585
586
1.85M
    ps_cu_out->u1_num_inter_cands = 0;
587
588
    /***************************************************************/
589
    /* Depending CU size that has won in ME,                       */
590
    /*     Estimate the closed loop intra cost for enabling intra  */
591
    /*     evaluation in rdopt stage based on preset               */
592
    /***************************************************************/
593
1.85M
    switch(u1_cu_size)
594
1.85M
    {
595
29.2k
    case 64:
596
29.2k
    {
597
        /* coding noise estimate for intra closed loop cost */
598
29.2k
        intra_cost_ol = ps_cur_ipe_ctb->i4_best64x64_intra_cost - frm_qstep * 256;
599
600
29.2k
        intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
601
602
29.2k
        intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 16;
603
604
29.2k
        intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
605
29.2k
        break;
606
0
    }
607
402k
    case 32:
608
402k
    {
609
        /* coding noise estimate for intra closed loop cost */
610
402k
        intra_cost_ol = ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id] - frm_qstep * 64;
611
612
402k
        intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
613
614
402k
        intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 4;
615
616
402k
        intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
617
402k
        break;
618
0
    }
619
807k
    case 16:
620
807k
    {
621
        /* coding noise estimate for intra closed loop cost */
622
807k
        intra_cost_ol =
623
807k
            ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_x_off >> 4) + ((u1_y_off >> 4) << 2)] -
624
807k
            frm_qstep * 16;
625
626
807k
        intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
627
628
807k
        intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16));
629
630
807k
        intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
631
807k
        break;
632
0
    }
633
613k
    case 8:
634
613k
    {
635
        /* coding noise estimate for intra closed loop cost */
636
613k
        intra_cost_ol =
637
613k
            ps_cur_ipe_ctb->ai4_best8x8_intra_cost[(u1_x_off >> 3) + u1_y_off] - frm_qstep * 4;
638
639
613k
        intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
640
641
613k
        intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) >> 2;
642
643
613k
        intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
644
613k
        break;
645
0
    }
646
1.85M
    }
647
#if DISABLE_INTER_CANDIDATES
648
    return;
649
#endif
650
651
1.85M
    u1_threshold_multi = 1;
652
#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
653
    if(u1_is_cu_noisy)
654
    {
655
        intra_cost_cl_est = INT_MAX;
656
    }
657
#endif
658
659
1.85M
    ps_cu_candt = ps_cu_out->as_cu_inter_cand;
660
661
    /* Check if the first best candidate is inter or intra */
662
1.85M
    if(ps_best_results[0].as_pu_results[0].pu.b1_intra_flag)
663
158k
    {
664
158k
        ps_cu_out->u1_best_is_intra = 1;
665
158k
    }
666
1.69M
    else
667
1.69M
    {
668
1.69M
        ps_cu_out->u1_best_is_intra = 0;
669
1.69M
    }
670
671
2.85M
    for(j = 0; j < u1_num_best_results; j++)
672
2.70M
    {
673
2.70M
        part_type_results_t *ps_best = &ps_best_results[j];
674
675
2.70M
        if(ps_best->as_pu_results[0].pu.b1_intra_flag)
676
160k
        {
677
160k
            found_intra = 1;
678
160k
        }
679
2.53M
        else
680
2.53M
        {
681
            /* populate the TU split flags, 4 flags copied as max cu can be 64 */
682
2.53M
            memcpy(ps_cu_candt->ai4_tu_split_flag, ps_best->ai4_tu_split_flag, 4 * sizeof(WORD32));
683
684
            /* populate the TU early CBF flags, 4 flags copied as max cu can be 64 */
685
2.53M
            memcpy(ps_cu_candt->ai4_tu_early_cbf, ps_best->ai4_tu_early_cbf, 4 * sizeof(WORD32));
686
687
            /* Note: the enums of part size and me part types shall match */
688
2.53M
            ps_cu_candt->b3_part_size = ps_best->u1_part_type;
689
690
            /* ME will always set the skip flag to 0            */
691
            /* in closed loop skip will be added as a candidate */
692
2.53M
            ps_cu_candt->b1_skip_flag = 0;
693
694
            /* copy the inter pus : Note: assuming NxN part type is not supported */
695
2.53M
            ps_cu_candt->as_inter_pu[0] = ps_best->as_pu_results[0].pu;
696
697
2.53M
            ps_cu_candt->as_inter_pu[0].b1_merge_flag = 0;
698
699
            /* Copy the total cost of the CU candt */
700
2.53M
            ps_cu_candt->i4_total_cost = ps_best->i4_tot_cost;
701
702
2.53M
            ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][0] =
703
2.53M
                ps_best->as_pu_results[0].i4_mv_cost;
704
705
#if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
706
            ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][0] =
707
                ps_best->as_pu_results[0].i4_tot_cost - ps_best->as_pu_results[0].i4_mv_cost;
708
#endif
709
710
2.53M
            if(ps_best->u1_part_type)
711
755k
            {
712
755k
                ps_cu_candt->as_inter_pu[1] = ps_best->as_pu_results[1].pu;
713
755k
                ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][1] =
714
755k
                    ps_best->as_pu_results[1].i4_mv_cost;
715
#if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
716
                ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][1] =
717
                    ps_best->as_pu_results[1].i4_tot_cost - ps_best->as_pu_results[1].i4_mv_cost;
718
#endif
719
720
755k
                ps_cu_candt->as_inter_pu[1].b1_merge_flag = 0;
721
755k
            }
722
723
2.53M
            ps_cu_candt++;
724
2.53M
            ps_cu_out->u1_num_inter_cands++;
725
2.53M
            if(intra_cost_cl_est < ((ps_best->i4_tot_cost * u1_threshold_multi) >> 0))
726
1.91M
            {
727
                /* The rationale - */
728
                /* Artefacts were being observed in some sequences, */
729
                /* Brooklyn_1080p in particular - where it was readily */
730
                /* apparent. The cause was coding of CU's as inter CU's */
731
                /* when they actually needed to be coded as intra CU's. */
732
                /* This was observed during either fade-outs aor flashes. */
733
                /* After tinkering with the magnitude of the coding noise */
734
                /* factor that was added to the intra cost to see when the */
735
                /* artefacts in Brooklyn vanished, it was observed that the */
736
                /* factor multiplied with the frame_qstep followed a pattern. */
737
                /* When the pattern was subjected to a regression analysis, the */
738
                /* formula seen below emerged. Also note the fact that the coding */
739
                /* noise factor is the product of the frame_qstep and a constant */
740
                /* multiplier */
741
742
                /*UWORD32 frm_qstep_multiplier =
743
                -3.346 * log((float)frm_qstep) + 15.925;*/
744
1.91M
                found_intra = 1;
745
1.91M
            }
746
747
2.53M
            if(ps_cu_out->u1_num_inter_cands >= num_results_to_copy)
748
1.69M
            {
749
1.69M
                break;
750
1.69M
            }
751
2.53M
        }
752
2.70M
    }
753
754
1.85M
    if(quality_preset < IHEVCE_QUALITY_P4)
755
1.30M
    {
756
1.30M
        found_intra = 1;
757
1.30M
    }
758
759
1.85M
    if(!found_intra)
760
300k
    {
761
        /* rdopt evaluation of intra disabled as inter is clear winner */
762
300k
        ps_cu_out->u1_num_intra_rdopt_cands = 0;
763
764
        /* all the modes invalidated */
765
300k
        ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
766
300k
        ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
767
300k
        ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
768
300k
        ps_cu_out->u1_chroma_intra_pred_mode = 255;
769
770
        /* no intra candt to verify */
771
300k
        ps_cu_out->s_cu_intra_cand.b6_num_intra_cands = 0;
772
300k
    }
773
1.85M
}
774
775
/**
776
*********************************************************************************
777
* Function name : ihevce_create_child_nodes_cu_tree
778
*
779
* \brief
780
*    This function create child node from cu tree
781
*
782
* \param[in] ps_cu_tree_root : pointer to Structure for CU recursion
783
* \param[out] ps_cu_tree_cur_node : pointer to  Structure for CU recursion
784
* \param[in] ai4_child_node_enable : child node enable flag
785
* \param[in] nodes_already_created : already created node value
786
* \return
787
*    None
788
*
789
**********************************************************************************/
790
WORD32 ihevce_create_child_nodes_cu_tree(
791
    cur_ctb_cu_tree_t *ps_cu_tree_root,
792
    cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
793
    WORD32 *ai4_child_node_enable,
794
    WORD32 nodes_already_created)
795
474k
{
796
474k
    cur_ctb_cu_tree_t *ps_tl;
797
474k
    cur_ctb_cu_tree_t *ps_tr;
798
474k
    cur_ctb_cu_tree_t *ps_bl;
799
474k
    cur_ctb_cu_tree_t *ps_br;
800
801
474k
    ps_tl = ps_cu_tree_root + nodes_already_created;
802
474k
    ps_tr = ps_tl + 1;
803
474k
    ps_bl = ps_tr + 1;
804
474k
    ps_br = ps_bl + 1;
805
806
474k
    if(1 == ps_cu_tree_cur_node->is_node_valid)
807
137k
    {
808
137k
        ps_tl = (ai4_child_node_enable[0]) ? ps_tl : NULL;
809
137k
        ps_tr = (ai4_child_node_enable[1]) ? ps_tr : NULL;
810
137k
        ps_bl = (ai4_child_node_enable[2]) ? ps_bl : NULL;
811
137k
        ps_br = (ai4_child_node_enable[3]) ? ps_br : NULL;
812
813
        /* In incomplete CTB, if any of the child nodes are assigned to NULL */
814
        /* then parent node ceases to be valid */
815
137k
        if((ps_tl == NULL) || (ps_tr == NULL) || (ps_br == NULL) || (ps_bl == NULL))
816
0
        {
817
0
            ps_cu_tree_cur_node->is_node_valid = 0;
818
0
        }
819
137k
    }
820
474k
    ps_cu_tree_cur_node->ps_child_node_tl = ps_tl;
821
474k
    ps_cu_tree_cur_node->ps_child_node_tr = ps_tr;
822
474k
    ps_cu_tree_cur_node->ps_child_node_bl = ps_bl;
823
474k
    ps_cu_tree_cur_node->ps_child_node_br = ps_br;
824
825
474k
    return 4;
826
474k
}
827
828
/**
829
*********************************************************************************
830
* Function name : ihevce_populate_cu_tree
831
*
832
* \brief
833
*    This function create child node from cu tree
834
*
835
* \param[in] ps_cur_ipe_ctb : pointer to Structure for CU recursion
836
* \param[out] ps_cu_tree : pointer to  Structure for CU recursion
837
* \param[in] tree_depth : child node enable flag
838
* \param[in] e_quality_preset : already created node value
839
* \param[in] e_grandparent_blk_pos : already created node value
840
* \param[in] e_parent_blk_pos : already created node value
841
* \param[in] e_cur_blk_pos : already created node value
842
*
843
* \return
844
*    None
845
*
846
**********************************************************************************/
847
void ihevce_populate_cu_tree(
848
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
849
    cur_ctb_cu_tree_t *ps_cu_tree,
850
    WORD32 tree_depth,
851
    IHEVCE_QUALITY_CONFIG_T e_quality_preset,
852
    CU_POS_T e_grandparent_blk_pos,
853
    CU_POS_T e_parent_blk_pos,
854
    CU_POS_T e_cur_blk_pos)
855
2.04M
{
856
2.04M
    WORD32 ai4_child_enable[4];
857
2.04M
    WORD32 children_nodes_required = 0;
858
2.04M
    WORD32 cu_pos_x = 0;
859
2.04M
    WORD32 cu_pos_y = 0;
860
2.04M
    WORD32 cu_size = 0;
861
2.04M
    WORD32 i;
862
2.04M
    WORD32 node_validity = 0;
863
864
2.04M
    if(NULL == ps_cu_tree)
865
0
    {
866
0
        return;
867
0
    }
868
869
2.04M
    switch(tree_depth)
870
2.04M
    {
871
145k
    case 0:
872
145k
    {
873
        /* 64x64 block */
874
145k
        intra32_analyse_t *ps_intra32_analyse = ps_cur_ipe_ctb->as_intra32_analyse;
875
876
145k
        children_nodes_required = 1;
877
145k
        cu_size = 64;
878
145k
        cu_pos_x = 0;
879
145k
        cu_pos_y = 0;
880
881
145k
        node_validity = !ps_cur_ipe_ctb->u1_split_flag;
882
883
145k
        if(e_quality_preset >= IHEVCE_QUALITY_P2)
884
95.1k
        {
885
95.1k
            if(node_validity == 1)
886
40.1k
            {
887
40.1k
                children_nodes_required = 0;
888
40.1k
            }
889
95.1k
        }
890
891
727k
        for(i = 0; i < 4; i++)
892
581k
        {
893
581k
            ai4_child_enable[i] = ps_intra32_analyse[i].b1_valid_cu;
894
581k
        }
895
896
145k
        break;
897
0
    }
898
421k
    case 1:
899
421k
    {
900
        /* 32x32 block */
901
421k
        WORD32 valid_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_valid_cu);
902
903
421k
        intra16_analyse_t *ps_intra16_analyse =
904
421k
            ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].as_intra16_analyse;
905
906
421k
        cu_size = 32;
907
908
        /* Explanation for logic below - */
909
        /* * pos_x and pos_y are in units of 8x8 CU's */
910
        /* * pos_x = 0 for TL and BL children */
911
        /* * pos_x = 4 for TR and BR children */
912
        /* * pos_y = 0 for TL and TR children */
913
        /* * pos_y = 4 for BL and BR children */
914
421k
        cu_pos_x = (e_cur_blk_pos & 1) << 2;
915
421k
        cu_pos_y = (e_cur_blk_pos & 2) << 1;
916
917
421k
        {
918
421k
            node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
919
920
421k
            if(e_quality_preset >= IHEVCE_QUALITY_P2)
921
220k
            {
922
220k
                node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
923
220k
            }
924
925
421k
            node_validity = node_validity && valid_flag_32;
926
421k
            children_nodes_required = !node_validity || ps_cur_ipe_ctb->u1_split_flag;
927
421k
        }
928
929
421k
        if(e_quality_preset >= IHEVCE_QUALITY_P2)
930
220k
        {
931
220k
            if(node_validity == 1)
932
134k
            {
933
134k
                children_nodes_required = 0;
934
134k
            }
935
85.4k
            else
936
85.4k
            {
937
85.4k
                children_nodes_required =
938
85.4k
                    (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
939
85.4k
            }
940
220k
        }
941
942
2.10M
        for(i = 0; i < 4; i++)
943
1.68M
        {
944
1.68M
            ai4_child_enable[i] = ps_intra16_analyse[i].b1_valid_cu;
945
1.68M
        }
946
947
421k
        break;
948
0
    }
949
602k
    case 2:
950
602k
    {
951
        /* 16x16 block */
952
602k
        WORD32 cu_pos_x_parent;
953
602k
        WORD32 cu_pos_y_parent;
954
602k
        WORD32 merge_flag_16;
955
602k
        WORD32 merge_flag_32;
956
957
602k
        intra8_analyse_t *ps_intra8_analyse = ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
958
602k
                                                  .as_intra16_analyse[e_cur_blk_pos]
959
602k
                                                  .as_intra8_analyse;
960
961
602k
        WORD32 valid_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
962
602k
                                    .as_intra16_analyse[e_cur_blk_pos]
963
602k
                                    .b1_valid_cu);
964
965
602k
        cu_size = 16;
966
967
        /* Explanation for logic below - */
968
        /* See similar explanation above */
969
602k
        cu_pos_x_parent = (e_parent_blk_pos & 1) << 2;
970
602k
        cu_pos_y_parent = (e_parent_blk_pos & 2) << 1;
971
602k
        cu_pos_x = cu_pos_x_parent + ((e_cur_blk_pos & 1) << 1);
972
602k
        cu_pos_y = cu_pos_y_parent + (e_cur_blk_pos & 2);
973
974
602k
        merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
975
602k
                             .as_intra16_analyse[e_cur_blk_pos]
976
602k
                             .b1_merge_flag);
977
602k
        merge_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos].b1_merge_flag);
978
979
602k
#if !ENABLE_UNIFORM_CU_SIZE_8x8
980
602k
        node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
981
#else
982
        node_validity = 0;
983
#endif
984
985
602k
        node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
986
987
602k
        if(e_quality_preset >= IHEVCE_QUALITY_P2)
988
292k
        {
989
292k
            node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
990
292k
                                  .as_intra16_analyse[e_cur_blk_pos]
991
292k
                                  .b1_split_flag);
992
292k
        }
993
994
602k
        node_validity = node_validity && valid_flag_16;
995
996
602k
        children_nodes_required = ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32)) ||
997
602k
                                  !merge_flag_16;
998
999
602k
        if(e_quality_preset >= IHEVCE_QUALITY_P2)
1000
292k
        {
1001
292k
            children_nodes_required = !node_validity;
1002
292k
        }
1003
1004
3.01M
        for(i = 0; i < 4; i++)
1005
2.40M
        {
1006
2.40M
            ai4_child_enable[i] = ps_intra8_analyse[i].b1_valid_cu;
1007
2.40M
        }
1008
602k
        break;
1009
0
    }
1010
874k
    case 3:
1011
874k
    {
1012
        /* 8x8 block */
1013
874k
        WORD32 cu_pos_x_grandparent;
1014
874k
        WORD32 cu_pos_y_grandparent;
1015
1016
874k
        WORD32 cu_pos_x_parent;
1017
874k
        WORD32 cu_pos_y_parent;
1018
1019
874k
        WORD32 valid_flag_8 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
1020
874k
                                   .as_intra16_analyse[e_parent_blk_pos]
1021
874k
                                   .as_intra8_analyse[e_cur_blk_pos]
1022
874k
                                   .b1_valid_cu);
1023
1024
874k
        cu_size = 8;
1025
1026
874k
        cu_pos_x_grandparent = (e_grandparent_blk_pos & 1) << 2;
1027
874k
        cu_pos_y_grandparent = (e_grandparent_blk_pos & 2) << 1;
1028
874k
        cu_pos_x_parent = cu_pos_x_grandparent + ((e_parent_blk_pos & 1) << 1);
1029
874k
        cu_pos_y_parent = cu_pos_y_grandparent + (e_parent_blk_pos & 2);
1030
874k
        cu_pos_x = cu_pos_x_parent + (e_cur_blk_pos & 1);
1031
874k
        cu_pos_y = cu_pos_y_parent + ((e_cur_blk_pos & 2) >> 1);
1032
1033
874k
        node_validity = 1 && valid_flag_8;
1034
1035
874k
        children_nodes_required = 0;
1036
1037
874k
        break;
1038
0
    }
1039
2.04M
    }
1040
1041
    /* Fill the current cu_tree node */
1042
2.04M
    ps_cu_tree->is_node_valid = node_validity;
1043
2.04M
    ps_cu_tree->u1_cu_size = cu_size;
1044
2.04M
    ps_cu_tree->b3_cu_pos_x = cu_pos_x;
1045
2.04M
    ps_cu_tree->b3_cu_pos_y = cu_pos_y;
1046
1047
2.04M
    if(children_nodes_required)
1048
474k
    {
1049
474k
        tree_depth++;
1050
1051
474k
        ps_cur_ipe_ctb->nodes_created_in_cu_tree += ihevce_create_child_nodes_cu_tree(
1052
474k
            ps_cur_ipe_ctb->ps_cu_tree_root,
1053
474k
            ps_cu_tree,
1054
474k
            ai4_child_enable,
1055
474k
            ps_cur_ipe_ctb->nodes_created_in_cu_tree);
1056
1057
474k
        ihevce_populate_cu_tree(
1058
474k
            ps_cur_ipe_ctb,
1059
474k
            ps_cu_tree->ps_child_node_tl,
1060
474k
            tree_depth,
1061
474k
            e_quality_preset,
1062
474k
            e_parent_blk_pos,
1063
474k
            e_cur_blk_pos,
1064
474k
            POS_TL);
1065
1066
474k
        ihevce_populate_cu_tree(
1067
474k
            ps_cur_ipe_ctb,
1068
474k
            ps_cu_tree->ps_child_node_tr,
1069
474k
            tree_depth,
1070
474k
            e_quality_preset,
1071
474k
            e_parent_blk_pos,
1072
474k
            e_cur_blk_pos,
1073
474k
            POS_TR);
1074
1075
474k
        ihevce_populate_cu_tree(
1076
474k
            ps_cur_ipe_ctb,
1077
474k
            ps_cu_tree->ps_child_node_bl,
1078
474k
            tree_depth,
1079
474k
            e_quality_preset,
1080
474k
            e_parent_blk_pos,
1081
474k
            e_cur_blk_pos,
1082
474k
            POS_BL);
1083
1084
474k
        ihevce_populate_cu_tree(
1085
474k
            ps_cur_ipe_ctb,
1086
474k
            ps_cu_tree->ps_child_node_br,
1087
474k
            tree_depth,
1088
474k
            e_quality_preset,
1089
474k
            e_parent_blk_pos,
1090
474k
            e_cur_blk_pos,
1091
474k
            POS_BR);
1092
474k
    }
1093
1.56M
    else
1094
1.56M
    {
1095
1.56M
        ps_cu_tree->ps_child_node_tl = NULL;
1096
1.56M
        ps_cu_tree->ps_child_node_tr = NULL;
1097
1.56M
        ps_cu_tree->ps_child_node_bl = NULL;
1098
1.56M
        ps_cu_tree->ps_child_node_br = NULL;
1099
1.56M
    }
1100
2.04M
}
1101
1102
/**
1103
*********************************************************************************
1104
* Function name : ihevce_intra_mode_populator
1105
*
1106
* \brief
1107
*    This function populate intra mode info to strcut
1108
*
1109
* \param[in] ps_cu_intra_cand : pointer to Structure contain cu intra candidate info
1110
* \param[out] ps_ipe_data : pointer to  IPE L0 analyze structure
1111
* \param[in] ps_cu_tree_data : poniter to cu recursive struct
1112
* \param[in] i1_slice_type : contain slice type value
1113
* \param[in] i4_quality_preset : contain quality preset value
1114
*
1115
* \return
1116
*    None
1117
*
1118
**********************************************************************************/
1119
static void ihevce_intra_mode_populator(
1120
    cu_intra_cand_t *ps_cu_intra_cand,
1121
    ipe_l0_ctb_analyse_for_me_t *ps_ipe_data,
1122
    cur_ctb_cu_tree_t *ps_cu_tree_data,
1123
    WORD8 i1_slice_type,
1124
    WORD32 i4_quality_preset)
1125
2.78M
{
1126
2.78M
    WORD32 i4_32x32_id, i4_16x16_id, i4_8x8_id;
1127
1128
2.78M
    UWORD8 u1_cu_pos_x = ps_cu_tree_data->b3_cu_pos_x;
1129
2.78M
    UWORD8 u1_cu_pos_y = ps_cu_tree_data->b3_cu_pos_y;
1130
1131
2.78M
    i4_32x32_id = ((u1_cu_pos_x & 4) >> 2) + ((u1_cu_pos_y & 4) >> 1);
1132
1133
2.78M
    i4_16x16_id = ((u1_cu_pos_x & 2) >> 1) + ((u1_cu_pos_y & 2));
1134
1135
2.78M
    i4_8x8_id = (u1_cu_pos_x & 1) + ((u1_cu_pos_y & 1) << 1);
1136
1137
2.78M
    if(i4_quality_preset < IHEVCE_QUALITY_P3)
1138
1.97M
    {
1139
1.97M
        switch(ps_cu_tree_data->u1_cu_size)
1140
1.97M
        {
1141
61.4k
        case 64:
1142
61.4k
        {
1143
61.4k
            memcpy(
1144
61.4k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1145
61.4k
                ps_ipe_data->au1_best_modes_32x32_tu,
1146
61.4k
                MAX_INTRA_CU_CANDIDATES + 1);
1147
1148
61.4k
            break;
1149
0
        }
1150
330k
        case 32:
1151
330k
        {
1152
330k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1153
1154
330k
            memcpy(
1155
330k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1156
330k
                ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1157
330k
                MAX_INTRA_CU_CANDIDATES + 1);
1158
1159
330k
            if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1160
123k
            {
1161
123k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1162
123k
            }
1163
207k
            else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1164
170k
            {
1165
170k
                if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
1166
170k
                   (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
1167
46.5k
                {
1168
46.5k
                    ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1169
46.5k
                }
1170
123k
                else
1171
123k
                {
1172
123k
                    memcpy(
1173
123k
                        ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1174
123k
                        ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1175
123k
                        MAX_INTRA_CU_CANDIDATES + 1);
1176
123k
                }
1177
170k
            }
1178
37.0k
            else
1179
37.0k
            {
1180
37.0k
                memcpy(
1181
37.0k
                    ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1182
37.0k
                    ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1183
37.0k
                    MAX_INTRA_CU_CANDIDATES + 1);
1184
37.0k
            }
1185
1186
330k
            break;
1187
0
        }
1188
794k
        case 16:
1189
794k
        {
1190
            /* Copy best 16x16 CU modes */
1191
794k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1192
1193
794k
            intra16_analyse_t *ps_16x16_ipe_analyze =
1194
794k
                &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1195
1196
794k
            memcpy(
1197
794k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1198
794k
                ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1199
794k
                MAX_INTRA_CU_CANDIDATES + 1);
1200
1201
794k
            if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1202
495k
            {
1203
495k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1204
495k
            }
1205
299k
            else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
1206
246k
            {
1207
246k
                if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
1208
246k
                   (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
1209
59.9k
                {
1210
59.9k
                    ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1211
59.9k
                }
1212
186k
                else
1213
186k
                {
1214
186k
                    memcpy(
1215
186k
                        ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1216
186k
                        ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1217
186k
                        MAX_INTRA_CU_CANDIDATES + 1);
1218
186k
                }
1219
246k
            }
1220
52.9k
            else
1221
52.9k
            {
1222
52.9k
                memcpy(
1223
52.9k
                    ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1224
52.9k
                    ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1225
52.9k
                    MAX_INTRA_CU_CANDIDATES + 1);
1226
52.9k
            }
1227
1228
794k
            break;
1229
0
        }
1230
788k
        case 8:
1231
788k
        {
1232
788k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1233
1234
788k
            intra16_analyse_t *ps_16x16_ipe_analyze =
1235
788k
                &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1236
1237
788k
            intra8_analyse_t *ps_8x8_ipe_analyze =
1238
788k
                &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1239
1240
788k
            memcpy(
1241
788k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1242
788k
                ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1243
788k
                MAX_INTRA_CU_CANDIDATES + 1);
1244
1245
788k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1246
1247
            /* Initialise the hash */
1248
788k
            {
1249
788k
                WORD32 i, j;
1250
1251
3.94M
                for(i = 0; i < NUM_PU_PARTS; i++)
1252
3.15M
                {
1253
3.15M
                    ps_cu_intra_cand->au1_num_modes_added[i] = 0;
1254
1255
113M
                    for(j = 0; j < MAX_INTRA_CANDIDATES; j++)
1256
110M
                    {
1257
110M
                        ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i][j] = 0;
1258
110M
                    }
1259
3.15M
                }
1260
1261
3.94M
                for(i = 0; i < NUM_PU_PARTS; i++)
1262
3.15M
                {
1263
7.31M
                    for(j = 0; j < MAX_INTRA_CU_CANDIDATES; j++)
1264
5.92M
                    {
1265
5.92M
                        if(ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j] == 255)
1266
1.76M
                        {
1267
1.76M
                            ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] = 255;
1268
1.76M
                            break;
1269
1.76M
                        }
1270
1271
4.16M
                        ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] =
1272
4.16M
                            ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j];
1273
1274
4.16M
                        ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash
1275
4.16M
                            [i][ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j]] = 1;
1276
1277
4.16M
                        ps_cu_intra_cand->au1_num_modes_added[i]++;
1278
4.16M
                    }
1279
1280
3.15M
                    if(ps_cu_intra_cand->au1_num_modes_added[i] == MAX_INTRA_CU_CANDIDATES)
1281
1.38M
                    {
1282
1.38M
                        if(i1_slice_type != BSLICE)
1283
1.25M
                        {
1284
1.25M
                            ps_cu_intra_cand->au1_num_modes_added[i] =
1285
1.25M
                                ihevce_intra_mode_nxn_hash_updater(
1286
1.25M
                                    ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1287
1.25M
                                    ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i],
1288
1.25M
                                    ps_cu_intra_cand->au1_num_modes_added[i]);
1289
1.25M
                        }
1290
1.38M
                    }
1291
3.15M
                }
1292
788k
            }
1293
1294
788k
            break;
1295
0
        }
1296
1.97M
        }
1297
1.97M
    }
1298
809k
    else if(i4_quality_preset == IHEVCE_QUALITY_P6)
1299
331k
    {
1300
331k
        switch(ps_cu_tree_data->u1_cu_size)
1301
331k
        {
1302
6.66k
        case 64:
1303
6.66k
        {
1304
6.66k
            memcpy(
1305
6.66k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1306
6.66k
                ps_ipe_data->au1_best_modes_32x32_tu,
1307
6.66k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1308
1309
6.66k
            ps_cu_intra_cand->b1_eval_tx_cusize = 0;
1310
6.66k
            ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
1311
6.66k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1312
1313
6.66k
#if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1314
6.66k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1315
6.66k
                [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1316
6.66k
#endif
1317
1318
6.66k
            break;
1319
0
        }
1320
70.1k
        case 32:
1321
70.1k
        {
1322
70.1k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1323
1324
70.1k
            memcpy(
1325
70.1k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1326
70.1k
                ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1327
70.1k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1328
1329
70.1k
            memcpy(
1330
70.1k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1331
70.1k
                ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1332
70.1k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1333
1334
70.1k
#if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1335
70.1k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1336
70.1k
                [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1337
70.1k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1338
70.1k
                [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1339
70.1k
#endif
1340
1341
70.1k
            break;
1342
0
        }
1343
142k
        case 16:
1344
142k
        {
1345
            /* Copy best 16x16 CU modes */
1346
142k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1347
1348
142k
            intra16_analyse_t *ps_16x16_ipe_analyze =
1349
142k
                &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1350
1351
142k
            memcpy(
1352
142k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1353
142k
                ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1354
142k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1355
1356
142k
            memcpy(
1357
142k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1358
142k
                ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1359
142k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1360
1361
142k
#if ENABLE_INTRA_MODE_FILTERING_IN_XS25
1362
142k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1363
142k
                [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1364
142k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1365
142k
                [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1366
142k
#endif
1367
1368
142k
            break;
1369
0
        }
1370
112k
        case 8:
1371
112k
        {
1372
112k
            WORD32 i;
1373
1374
112k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1375
1376
112k
            intra16_analyse_t *ps_16x16_ipe_analyze =
1377
112k
                &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1378
1379
112k
            intra8_analyse_t *ps_8x8_ipe_analyze =
1380
112k
                &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1381
1382
112k
            memcpy(
1383
112k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1384
112k
                ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1385
112k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1386
1387
#if !ENABLE_INTRA_MODE_FILTERING_IN_XS25
1388
            memcpy(
1389
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1390
                ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1391
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1392
1393
            for(i = 0; i < 4; i++)
1394
            {
1395
                memcpy(
1396
                    ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1397
                    ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1398
                    (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1399
1400
                ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
1401
            }
1402
#else
1403
112k
            if(255 == ps_8x8_ipe_analyze->au1_4x4_best_modes[0][0])
1404
21.1k
            {
1405
21.1k
                memcpy(
1406
21.1k
                    ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1407
21.1k
                    ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1408
21.1k
                    (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1409
1410
21.1k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
1411
21.1k
                    [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1412
21.1k
            }
1413
90.8k
            else
1414
90.8k
            {
1415
454k
                for(i = 0; i < 4; i++)
1416
363k
                {
1417
363k
                    memcpy(
1418
363k
                        ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1419
363k
                        ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1420
363k
                        (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1421
1422
363k
                    ps_cu_intra_cand->au1_intra_luma_modes_nxn
1423
363k
                        [i][MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1424
363k
                }
1425
90.8k
            }
1426
1427
112k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
1428
112k
                [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
1429
112k
#endif
1430
1431
#if FORCE_NXN_MODE_BASED_ON_OL_IPE
1432
            if((i4_quality_preset == IHEVCE_QUALITY_P6) && (i1_slice_type != ISLICE))
1433
            {
1434
                /*Evaluate nxn mode for 8x8 if ol ipe wins for nxn over cu=tu and cu=4tu.*/
1435
                /*Disbale CU=TU and CU=4TU modes */
1436
                if(ps_8x8_ipe_analyze->b1_enable_nxn == 1)
1437
                {
1438
                    ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1439
                    ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1440
                    ps_cu_intra_cand->au1_intra_luma_modes_nxn[0][1] = 255;
1441
                    ps_cu_intra_cand->au1_intra_luma_modes_nxn[1][1] = 255;
1442
                    ps_cu_intra_cand->au1_intra_luma_modes_nxn[2][1] = 255;
1443
                    ps_cu_intra_cand->au1_intra_luma_modes_nxn[3][1] = 255;
1444
                }
1445
            }
1446
#endif
1447
1448
112k
            break;
1449
0
        }
1450
331k
        }
1451
331k
    }
1452
478k
    else
1453
478k
    {
1454
478k
        switch(ps_cu_tree_data->u1_cu_size)
1455
478k
        {
1456
27.1k
        case 64:
1457
27.1k
        {
1458
27.1k
            memcpy(
1459
27.1k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1460
27.1k
                ps_ipe_data->au1_best_modes_32x32_tu,
1461
27.1k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1462
1463
27.1k
            ps_cu_intra_cand->b1_eval_tx_cusize = 0;
1464
27.1k
            ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
1465
27.1k
            ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1466
1467
27.1k
            break;
1468
0
        }
1469
114k
        case 32:
1470
114k
        {
1471
114k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1472
1473
114k
            memcpy(
1474
114k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1475
114k
                ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
1476
114k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1477
1478
114k
            memcpy(
1479
114k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1480
114k
                ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
1481
114k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1482
1483
114k
            break;
1484
0
        }
1485
146k
        case 16:
1486
146k
        {
1487
            /* Copy best 16x16 CU modes */
1488
146k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1489
1490
146k
            intra16_analyse_t *ps_16x16_ipe_analyze =
1491
146k
                &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1492
1493
146k
            memcpy(
1494
146k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1495
146k
                ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
1496
146k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1497
1498
146k
            memcpy(
1499
146k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1500
146k
                ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
1501
146k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1502
1503
146k
            break;
1504
0
        }
1505
190k
        case 8:
1506
190k
        {
1507
190k
            WORD32 i;
1508
1509
190k
            intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
1510
1511
190k
            intra16_analyse_t *ps_16x16_ipe_analyze =
1512
190k
                &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
1513
1514
190k
            intra8_analyse_t *ps_8x8_ipe_analyze =
1515
190k
                &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
1516
1517
190k
            memcpy(
1518
190k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
1519
190k
                ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
1520
190k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1521
1522
190k
            memcpy(
1523
190k
                ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
1524
190k
                ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
1525
190k
                (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1526
1527
954k
            for(i = 0; i < 4; i++)
1528
763k
            {
1529
763k
                memcpy(
1530
763k
                    ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
1531
763k
                    ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
1532
763k
                    (NUM_BEST_MODES + 1) * sizeof(UWORD8));
1533
1534
763k
                ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
1535
763k
            }
1536
1537
190k
            break;
1538
0
        }
1539
478k
        }
1540
478k
    }
1541
2.78M
}
1542
/**
1543
******************************************************************************
1544
* \if Function name : ihevce_compute_rdo \endif
1545
*
1546
* \brief
1547
*    Coding Unit mode decide function. Performs RD opt and decides the best mode
1548
*
1549
* \param[in] pv_ctxt : pointer to enc_loop module
1550
* \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
1551
* \param[in] ps_cu_analyse : pointer to cu analyse
1552
* \param[out] ps_cu_final : pointer to cu final
1553
* \param[out] pu1_ecd_data :pointer to store coeff data for ECD
1554
* \param[out]ps_row_col_pu; colocated pu buffer pointer
1555
* \param[out]pu1_row_pu_map; colocated pu map buffer pointer
1556
* \param[in]col_start_pu_idx : pu index start value
1557
*
1558
* \return
1559
*    None
1560
*
1561
*
1562
* \author
1563
*  Ittiam
1564
*
1565
*****************************************************************************
1566
*/
1567
LWORD64 ihevce_compute_rdo(
1568
    ihevce_enc_loop_ctxt_t *ps_ctxt,
1569
    enc_loop_cu_prms_t *ps_cu_prms,
1570
    cur_ctb_cu_tree_t *ps_cu_tree_analyse,
1571
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
1572
    me_ctb_data_t *ps_cu_me_data,
1573
    pu_col_mv_t *ps_col_pu,
1574
    final_mode_state_t *ps_final_mode_state,
1575
    UWORD8 *pu1_col_pu_map,
1576
    UWORD8 *pu1_ecd_data,
1577
    WORD32 col_start_pu_idx,
1578
    WORD32 i4_ctb_x_off,
1579
    WORD32 i4_ctb_y_off)
1580
3.18M
{
1581
    /* Populate the rdo candiates to the structure */
1582
3.18M
    cu_analyse_t s_cu_analyse;
1583
3.18M
    LWORD64 rdopt_best_cost;
1584
    /* Populate candidates of child nodes to CU analyse struct for further evaluation */
1585
3.18M
    cu_analyse_t *ps_cu_analyse;
1586
3.18M
    WORD32 curr_cu_pos_in_row;
1587
3.18M
    WORD32 cu_top_right_offset, cu_top_right_dep_pos;
1588
3.18M
    WORD32 is_first_cu_in_ctb, is_ctb_level_quant_rounding, is_nctb_level_quant_rounding;
1589
1590
3.18M
    WORD32 cu_pos_x = ps_cu_tree_analyse->b3_cu_pos_x;
1591
3.18M
    WORD32 cu_pos_y = ps_cu_tree_analyse->b3_cu_pos_y;
1592
1593
    /*Derive the indices of 32*32, 16*16 and 8*8 blocks*/
1594
3.18M
    WORD32 i4_32x32_id = ((cu_pos_x & 4) >> 2) + ((cu_pos_y & 4) >> 1);
1595
1596
3.18M
    WORD32 i4_16x16_id = ((cu_pos_x & 2) >> 1) + ((cu_pos_y & 2));
1597
1598
3.18M
    WORD32 i4_8x8_id = (cu_pos_x & 1) + ((cu_pos_y & 1) << 1);
1599
3.18M
    if(i4_ctb_y_off == 0)
1600
2.49M
    {
1601
        /* No wait for 1st row */
1602
2.49M
        cu_top_right_offset = -(MAX_CTB_SIZE);
1603
2.49M
        {
1604
2.49M
            ihevce_tile_params_t *ps_col_tile_params =
1605
2.49M
                ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + ps_ctxt->i4_tile_col_idx);
1606
1607
2.49M
            cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1608
2.49M
        }
1609
1610
2.49M
        cu_top_right_dep_pos = 0;
1611
2.49M
    }
1612
687k
    else
1613
687k
    {
1614
687k
        cu_top_right_offset = ps_cu_tree_analyse->u1_cu_size << 1;
1615
687k
        cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1616
687k
    }
1617
3.18M
    ps_cu_analyse = &s_cu_analyse;
1618
1619
3.18M
    ps_cu_analyse->b3_cu_pos_x = cu_pos_x;
1620
3.18M
    ps_cu_analyse->b3_cu_pos_y = cu_pos_y;
1621
3.18M
    ps_cu_analyse->u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
1622
1623
    /* Default initializations */
1624
3.18M
    ps_cu_analyse->u1_num_intra_rdopt_cands = MAX_INTRA_CU_CANDIDATES;
1625
3.18M
    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1626
3.18M
    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1627
3.18M
    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1628
1629
3.18M
    ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize = 1;
1630
3.18M
    ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
1631
1632
3.18M
    switch(ps_cu_tree_analyse->u1_cu_size)
1633
3.18M
    {
1634
100k
    case 64:
1635
100k
    {
1636
100k
        memcpy(
1637
100k
            ps_cu_analyse[0].i4_act_factor,
1638
100k
            ps_cur_ipe_ctb->i4_64x64_act_factor,
1639
100k
            4 * 2 * sizeof(WORD32));
1640
1641
100k
        ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize = 0;
1642
100k
        ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
1643
100k
        ps_cu_analyse[0].s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1644
1645
100k
        break;
1646
0
    }
1647
708k
    case 32:
1648
708k
    {
1649
708k
        memcpy(
1650
708k
            ps_cu_analyse[0].i4_act_factor,
1651
708k
            ps_cur_ipe_ctb->i4_32x32_act_factor[i4_32x32_id],
1652
708k
            3 * 2 * sizeof(WORD32));
1653
1654
708k
        break;
1655
0
    }
1656
1.25M
    case 16:
1657
1.25M
    {
1658
1.25M
        memcpy(
1659
1.25M
            ps_cu_analyse[0].i4_act_factor,
1660
1.25M
            ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
1661
1.25M
            2 * 2 * sizeof(WORD32));
1662
1663
1.25M
        break;
1664
0
    }
1665
1.12M
    case 8:
1666
1.12M
    {
1667
1.12M
        memcpy(
1668
1.12M
            ps_cu_analyse[0].i4_act_factor,
1669
1.12M
            ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
1670
1.12M
            2 * 2 * sizeof(WORD32));
1671
1672
1.12M
        break;
1673
0
    }
1674
3.18M
    }
1675
1676
    /* Populate the me data in cu_analyse struct */
1677
    /* For CU size 32 and 64, add me data to array of cu analyse struct */
1678
3.18M
    if(ISLICE != ps_ctxt->i1_slice_type)
1679
1.85M
    {
1680
1.85M
        if((ps_cu_tree_analyse->u1_cu_size >= 32) && (ps_cu_tree_analyse->u1_inter_eval_enable))
1681
431k
        {
1682
431k
            if(32 == ps_cu_tree_analyse->u1_cu_size)
1683
402k
            {
1684
402k
                ihevce_populate_cu_struct(
1685
402k
                    ps_ctxt,
1686
402k
                    ps_cur_ipe_ctb,
1687
402k
                    ps_cu_tree_analyse,
1688
402k
                    ps_cu_me_data->as_32x32_block_data[i4_32x32_id].as_best_results,
1689
402k
                    ps_cu_analyse,
1690
402k
                    i4_32x32_id,
1691
#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1692
                    ps_cu_prms->u1_is_cu_noisy,
1693
#endif
1694
402k
                    ps_cu_me_data->as_32x32_block_data[i4_32x32_id].num_best_results);
1695
402k
            }
1696
29.2k
            else
1697
29.2k
            {
1698
29.2k
                ihevce_populate_cu_struct(
1699
29.2k
                    ps_ctxt,
1700
29.2k
                    ps_cur_ipe_ctb,
1701
29.2k
                    ps_cu_tree_analyse,
1702
29.2k
                    ps_cu_me_data->s_64x64_block_data.as_best_results,
1703
29.2k
                    ps_cu_analyse,
1704
29.2k
                    i4_32x32_id,
1705
#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1706
                    ps_cu_prms->u1_is_cu_noisy,
1707
#endif
1708
29.2k
                    ps_cu_me_data->s_64x64_block_data.num_best_results);
1709
29.2k
            }
1710
431k
        }
1711
1.42M
        else if(ps_cu_tree_analyse->u1_cu_size < 32)
1712
1.42M
        {
1713
1.42M
            i4_8x8_id += (i4_32x32_id << 4) + (i4_16x16_id << 2);
1714
1.42M
            i4_16x16_id += (i4_32x32_id << 2);
1715
1716
1.42M
            if(16 == ps_cu_tree_analyse->u1_cu_size)
1717
807k
            {
1718
807k
                block_data_16x16_t *ps_data = &ps_cu_me_data->as_block_data[i4_16x16_id];
1719
1720
807k
                if(ps_cu_tree_analyse->u1_inter_eval_enable)
1721
807k
                {
1722
807k
                    ihevce_populate_cu_struct(
1723
807k
                        ps_ctxt,
1724
807k
                        ps_cur_ipe_ctb,
1725
807k
                        ps_cu_tree_analyse,
1726
807k
                        ps_data->as_best_results,
1727
807k
                        ps_cu_analyse,
1728
807k
                        i4_32x32_id,
1729
#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1730
                        ps_cu_prms->u1_is_cu_noisy,
1731
#endif
1732
807k
                        ps_data->num_best_results);
1733
807k
                }
1734
0
                else
1735
0
                {
1736
0
                    ps_cu_analyse->u1_num_inter_cands = 0;
1737
0
                    ps_cu_analyse->u1_best_is_intra = 1;
1738
0
                }
1739
807k
            }
1740
613k
            else /* If CU size is 8 */
1741
613k
            {
1742
613k
                block_data_8x8_t *ps_data = &ps_cu_me_data->as_8x8_block_data[i4_8x8_id];
1743
1744
613k
                if(ps_cu_tree_analyse->u1_inter_eval_enable)
1745
613k
                {
1746
613k
                    ihevce_populate_cu_struct(
1747
613k
                        ps_ctxt,
1748
613k
                        ps_cur_ipe_ctb,
1749
613k
                        ps_cu_tree_analyse,
1750
613k
                        ps_data->as_best_results,
1751
613k
                        ps_cu_analyse,
1752
613k
                        i4_32x32_id,
1753
#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
1754
                        ps_cu_prms->u1_is_cu_noisy,
1755
#endif
1756
613k
                        ps_data->num_best_results);
1757
613k
                }
1758
0
                else
1759
0
                {
1760
0
                    ps_cu_analyse->u1_num_inter_cands = 0;
1761
0
                    ps_cu_analyse->u1_best_is_intra = 1;
1762
0
                }
1763
613k
            }
1764
1.42M
        }
1765
1.09k
        else
1766
1.09k
        {
1767
1.09k
            ps_cu_analyse->u1_num_inter_cands = 0;
1768
1.09k
            ps_cu_analyse->u1_best_is_intra = 1;
1769
1.09k
        }
1770
1.85M
    }
1771
1.32M
    else
1772
1.32M
    {
1773
1.32M
        ps_cu_analyse->u1_num_inter_cands = 0;
1774
1.32M
        ps_cu_analyse->u1_best_is_intra = 1;
1775
1.32M
    }
1776
1777
3.18M
    if(!ps_ctxt->i1_cu_qp_delta_enable)
1778
2.11M
    {
1779
2.11M
        ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_frame_qp;
1780
1781
        /*cu qp must be populated in cu_analyse_t struct*/
1782
2.11M
        ps_ctxt->i4_cu_qp = ps_cu_analyse->i1_cu_qp;
1783
2.11M
    }
1784
1.06M
    else
1785
1.06M
    {
1786
1.06M
        ASSERT(ps_cu_analyse->i4_act_factor[0] > 0);
1787
1.06M
        ASSERT(
1788
1.06M
            ((ps_cu_analyse->i4_act_factor[1] > 0) && (ps_cu_analyse->u1_cu_size != 8)) ||
1789
1.06M
            ((ps_cu_analyse->u1_cu_size == 8)));
1790
1.06M
        ASSERT(
1791
1.06M
            ((ps_cu_analyse->i4_act_factor[2] > 0) && (ps_cu_analyse->u1_cu_size == 32)) ||
1792
1.06M
            ((ps_cu_analyse->u1_cu_size != 32)));
1793
1.06M
    }
1794
1795
3.18M
    if(ps_ctxt->u1_disable_intra_eval)
1796
83.9k
    {
1797
        /* rdopt evaluation of intra disabled as inter is clear winner */
1798
83.9k
        ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1799
1800
        /* all the modes invalidated */
1801
83.9k
        ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1802
83.9k
        ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1803
83.9k
        ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1804
83.9k
        ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
1805
1806
        /* no intra candt to verify */
1807
83.9k
        ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
1808
83.9k
    }
1809
1810
3.18M
#if DISABLE_L2_IPE_IN_PB_L1_IN_B
1811
3.18M
    if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_cu_analyse->u1_cu_size == 32) &&
1812
3.18M
       (ps_ctxt->i1_slice_type != ISLICE))
1813
168k
    {
1814
        /* rdopt evaluation of intra disabled as inter is clear winner */
1815
168k
        ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1816
1817
        /* all the modes invalidated */
1818
168k
        ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
1819
168k
        ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
1820
168k
        ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
1821
168k
        ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
1822
1823
        /* no intra candt to verify */
1824
168k
        ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
1825
168k
    }
1826
3.18M
#endif
1827
1828
3.18M
    if(DISABLE_INTRA_WHEN_NOISY && ps_cu_prms->u1_is_cu_noisy)
1829
0
    {
1830
0
        ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
1831
0
    }
1832
1833
3.18M
    if(ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_tree_analyse->u1_intra_eval_enable)
1834
2.78M
    {
1835
2.78M
        ihevce_intra_mode_populator(
1836
2.78M
            &ps_cu_analyse->s_cu_intra_cand,
1837
2.78M
            ps_cur_ipe_ctb,
1838
2.78M
            ps_cu_tree_analyse,
1839
2.78M
            ps_ctxt->i1_slice_type,
1840
2.78M
            ps_ctxt->i4_quality_preset);
1841
1842
2.78M
        ps_cu_analyse->u1_num_intra_rdopt_cands = 1;
1843
2.78M
    }
1844
1845
3.18M
    ASSERT(!!ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_analyse->u1_num_inter_cands);
1846
1847
3.18M
    if(ps_ctxt->u1_use_top_at_ctb_boundary)
1848
1.97M
    {
1849
        /* Wait till top data is ready          */
1850
        /* Currently checking till top right CU */
1851
1.97M
        curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1852
1853
1.97M
        if(0 == ps_cu_analyse->b3_cu_pos_y)
1854
603k
        {
1855
603k
            ihevce_dmgr_chk_row_row_sync(
1856
603k
                ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1857
603k
                curr_cu_pos_in_row,
1858
603k
                cu_top_right_offset,
1859
603k
                cu_top_right_dep_pos,
1860
603k
                ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1861
603k
                ps_ctxt->thrd_id);
1862
603k
        }
1863
1.97M
    }
1864
1865
3.18M
#if !DISABLE_TOP_SYNC
1866
3.18M
    {
1867
3.18M
        if(0 == ps_cu_analyse->b3_cu_pos_y)
1868
1.06M
        {
1869
1.06M
            if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
1870
70.9k
            {
1871
70.9k
                if(ps_cu_analyse->b3_cu_pos_x == 0)
1872
24.8k
                {
1873
24.8k
                    if(!ps_ctxt->u1_use_top_at_ctb_boundary)
1874
8.59k
                    {
1875
                        /* Wait till top data is ready          */
1876
                        /* Currently checking till top right CU */
1877
8.59k
                        curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1878
1879
8.59k
                        if(0 == ps_cu_analyse->b3_cu_pos_y)
1880
8.59k
                        {
1881
8.59k
                            ihevce_dmgr_chk_row_row_sync(
1882
8.59k
                                ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1883
8.59k
                                curr_cu_pos_in_row,
1884
8.59k
                                cu_top_right_offset,
1885
8.59k
                                cu_top_right_dep_pos,
1886
8.59k
                                ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1887
8.59k
                                ps_ctxt->thrd_id);
1888
8.59k
                        }
1889
8.59k
                    }
1890
1891
24.8k
                    ihevce_entropy_rdo_copy_states(
1892
24.8k
                        &ps_ctxt->s_rdopt_entropy_ctxt,
1893
24.8k
                        ps_ctxt->pu1_top_rt_cabac_state,
1894
24.8k
                        UPDATE_ENT_SYNC_RDO_STATE);
1895
24.8k
                }
1896
70.9k
            }
1897
1.06M
        }
1898
3.18M
    }
1899
#else
1900
    {
1901
        if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset))
1902
        {
1903
            if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
1904
            {
1905
                if(ps_cu_analyse->b3_cu_pos_x == 0)
1906
                {
1907
                    if(!ps_ctxt->u1_use_top_at_ctb_boundary)
1908
                    {
1909
                        /* Wait till top data is ready          */
1910
                        /* Currently checking till top right CU */
1911
                        curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1912
1913
                        if(0 == ps_cu_analyse->b3_cu_pos_y)
1914
                        {
1915
                            ihevce_dmgr_chk_row_row_sync(
1916
                                ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1917
                                curr_cu_pos_in_row,
1918
                                cu_top_right_offset,
1919
                                cu_top_right_dep_pos,
1920
                                ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1921
                                ps_ctxt->thrd_id);
1922
                        }
1923
                    }
1924
1925
                    ihevce_entropy_rdo_copy_states(
1926
                        &ps_ctxt->s_rdopt_entropy_ctxt,
1927
                        ps_ctxt->pu1_top_rt_cabac_state,
1928
                        UPDATE_ENT_SYNC_RDO_STATE);
1929
                }
1930
            }
1931
        }
1932
        else if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset))
1933
        {
1934
            UWORD8 u1_cabac_init_idc;
1935
            WORD8 i1_cabac_init_flag =
1936
                ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt->ps_slice_hdr->i1_cabac_init_flag;
1937
1938
            if(ps_ctxt->i1_slice_type == ISLICE)
1939
            {
1940
                u1_cabac_init_idc = 0;
1941
            }
1942
            else if(ps_ctxt->i1_slice_type == PSLICE)
1943
            {
1944
                u1_cabac_init_idc = i1_cabac_init_flag ? 2 : 1;
1945
            }
1946
            else
1947
            {
1948
                u1_cabac_init_idc = i1_cabac_init_flag ? 1 : 2;
1949
            }
1950
1951
            ihevce_entropy_rdo_copy_states(
1952
                &ps_ctxt->s_rdopt_entropy_ctxt,
1953
                (UWORD8 *)gau1_ihevc_cab_ctxts[u1_cabac_init_idc][ps_ctxt->i4_frame_qp],
1954
                UPDATE_ENT_SYNC_RDO_STATE);
1955
        }
1956
    }
1957
#endif
1958
1959
    /*2 Multi- dimensinal array based on trans size  of rounding factor to be added here */
1960
    /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
1961
    /* Currently the complete array will contain only single value*/
1962
    /*The rounding factor is calculated with the formula
1963
    Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
1964
    rounding factor = (1 - DeadZone Val)
1965
1966
    Assumption: Cabac states of All the sub-blocks in the TU are considered independent
1967
    */
1968
1969
    /*As long as coef level rdoq is enabled perform this operation */
1970
3.18M
    is_first_cu_in_ctb = ((0 == ps_cu_analyse->b3_cu_pos_x) && (0 == ps_cu_analyse->b3_cu_pos_y));
1971
3.18M
    is_ctb_level_quant_rounding =
1972
3.18M
        ((ps_ctxt->i4_quant_rounding_level == CTB_LEVEL_QUANT_ROUNDING) &&
1973
3.18M
         (1 == is_first_cu_in_ctb));
1974
3.18M
    is_nctb_level_quant_rounding =
1975
3.18M
        ((ps_ctxt->i4_quant_rounding_level == NCTB_LEVEL_QUANT_ROUNDING) &&
1976
3.18M
         (1 == is_first_cu_in_ctb) && (((i4_ctb_x_off >> 6) % NUM_CTB_QUANT_ROUNDING) == 0));
1977
1978
3.18M
    if((ps_ctxt->i4_quant_rounding_level == CU_LEVEL_QUANT_ROUNDING) ||
1979
3.18M
       (ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) ||
1980
3.18M
       (1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
1981
1.97M
    {
1982
1.97M
        double i4_lamda_modifier, i4_lamda_modifier_uv;
1983
1.97M
        WORD32 trans_size, trans_size_cr;
1984
1.97M
        trans_size = ps_cu_analyse->u1_cu_size;
1985
1986
1.97M
        if((1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
1987
0
        {
1988
0
            trans_size = MAX_TU_SIZE;
1989
0
        }
1990
1.97M
        else
1991
1.97M
        {
1992
1.97M
            if(ps_cu_analyse->u1_cu_size == 64)
1993
61.4k
            {
1994
61.4k
                trans_size >>= 1;
1995
61.4k
            }
1996
1.97M
        }
1997
1998
        /*Chroma trans size = half of luma trans size */
1999
1.97M
        trans_size_cr = trans_size >> 1;
2000
2001
1.97M
        if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
2002
307k
        {
2003
307k
            i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
2004
307k
                                CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
2005
307k
            i4_lamda_modifier_uv =
2006
307k
                ps_ctxt->i4_uv_lamda_modifier *
2007
307k
                CLIP3((((double)(ps_ctxt->i4_chrm_cu_qp - 12)) / 6.0), 2.00, 4.00);
2008
307k
        }
2009
1.66M
        else
2010
1.66M
        {
2011
1.66M
            i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
2012
1.66M
            i4_lamda_modifier_uv = ps_ctxt->i4_uv_lamda_modifier;
2013
1.66M
        }
2014
1.97M
        if(ps_ctxt->i4_use_const_lamda_modifier)
2015
0
        {
2016
0
            if(ISLICE == ps_ctxt->i1_slice_type)
2017
0
            {
2018
0
                i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
2019
0
                i4_lamda_modifier_uv = ps_ctxt->f_i_pic_lamda_modifier;
2020
0
            }
2021
0
            else
2022
0
            {
2023
0
                i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
2024
0
                i4_lamda_modifier_uv = CONST_LAMDA_MOD_VAL;
2025
0
            }
2026
0
        }
2027
2028
1.97M
        do
2029
5.53M
        {
2030
5.53M
            memset(
2031
5.53M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
2032
5.53M
                0,
2033
5.53M
                trans_size * trans_size * sizeof(WORD32));
2034
5.53M
            memset(
2035
5.53M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
2036
5.53M
                0,
2037
5.53M
                trans_size * trans_size * sizeof(WORD32));
2038
2039
            /*ps_ctxt->i4_quant_rnd_factor[intra_flag], is currently not used */
2040
5.53M
            ihevce_quant_rounding_factor_gen(
2041
5.53M
                trans_size,
2042
5.53M
                1,  //is_luma = 1
2043
5.53M
                &ps_ctxt->s_rdopt_entropy_ctxt,
2044
5.53M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
2045
5.53M
                ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
2046
5.53M
                i4_lamda_modifier,
2047
5.53M
                0);  //is_tu_level_quant rounding = 0
2048
2049
5.53M
            trans_size = trans_size >> 1;
2050
2051
5.53M
        } while(trans_size >= 4);
2052
2053
        /*CHROMA Quant Rounding is to be enabled with CU/TU/CTB/NCTB Luma rounding */
2054
        /*Please note chroma is calcualted only for 1st TU at TU level Rounding */
2055
1.97M
        if(ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING)
2056
1.97M
        {
2057
1.97M
            do
2058
3.55M
            {
2059
3.55M
                memset(
2060
3.55M
                    ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
2061
3.55M
                    0,
2062
3.55M
                    trans_size_cr * trans_size_cr * sizeof(WORD32));
2063
3.55M
                memset(
2064
3.55M
                    ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
2065
3.55M
                    0,
2066
3.55M
                    trans_size_cr * trans_size_cr * sizeof(WORD32));
2067
2068
3.55M
                ihevce_quant_rounding_factor_gen(
2069
3.55M
                    trans_size_cr,
2070
3.55M
                    0,  //is_luma = 0
2071
3.55M
                    &ps_ctxt->s_rdopt_entropy_ctxt,
2072
3.55M
                    ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
2073
3.55M
                    ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
2074
3.55M
                    i4_lamda_modifier_uv,
2075
3.55M
                    0);  //is_tu_level_quant rounding = 0
2076
2077
3.55M
                trans_size_cr = trans_size_cr >> 1;
2078
2079
3.55M
            } while(trans_size_cr >= 4);
2080
1.97M
        }
2081
1.97M
    }
2082
2083
#if DISABLE_INTRAS_IN_BPIC
2084
    if((ps_ctxt->i1_slice_type == BSLICE) && (ps_cu_analyse->u1_num_inter_cands))
2085
    {
2086
        ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
2087
    }
2088
#endif
2089
2090
3.18M
    rdopt_best_cost = ihevce_cu_mode_decide(
2091
3.18M
        ps_ctxt,
2092
3.18M
        ps_cu_prms,
2093
3.18M
        ps_cu_analyse,
2094
3.18M
        ps_final_mode_state,
2095
3.18M
        pu1_ecd_data,
2096
3.18M
        ps_col_pu,
2097
3.18M
        pu1_col_pu_map,
2098
3.18M
        col_start_pu_idx);
2099
2100
3.18M
    return rdopt_best_cost;
2101
3.18M
}
2102
2103
/**
2104
******************************************************************************
2105
* \if Function name : ihevce_enc_loop_cu_bot_copy \endif
2106
*
2107
* \brief
2108
*    This function copy the bottom data at CU level to row buffers
2109
*
2110
* \date
2111
*    18/09/2012
2112
*
2113
* \author
2114
*    Ittiam
2115
*
2116
* \return
2117
*
2118
* List of Functions
2119
*
2120
*
2121
******************************************************************************
2122
*/
2123
void ihevce_enc_loop_cu_bot_copy(
2124
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2125
    enc_loop_cu_prms_t *ps_cu_prms,
2126
    ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
2127
    WORD32 curr_cu_pos_in_row,
2128
    WORD32 curr_cu_pos_in_ctb)
2129
598k
{
2130
    /* ---------------------------------------------- */
2131
    /* copy the bottom row  data to the row buffers   */
2132
    /* ---------------------------------------------- */
2133
598k
    nbr_4x4_t *ps_top_nbr;
2134
598k
    UWORD8 *pu1_buff;
2135
598k
    UWORD8 *pu1_luma_top, *pu1_chrm_top;
2136
598k
    WORD32 nbr_strd;
2137
2138
598k
    WORD32 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
2139
2140
    /* derive the appropraite pointers */
2141
598k
    pu1_luma_top = (UWORD8 *)ps_ctxt->pv_bot_row_luma + curr_cu_pos_in_row;
2142
598k
    pu1_chrm_top = (UWORD8 *)ps_ctxt->pv_bot_row_chroma + curr_cu_pos_in_row;
2143
598k
    ps_top_nbr = ps_ctxt->ps_bot_row_nbr + (curr_cu_pos_in_row >> 2);
2144
598k
    nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
2145
2146
    /* copy bottom luma data */
2147
598k
    pu1_buff = ps_cu_prms->pu1_luma_recon +
2148
598k
               (ps_cu_prms->i4_luma_recon_stride * (ps_cu_prms->i4_ctb_size - 1));
2149
2150
598k
    pu1_buff += curr_cu_pos_in_ctb;
2151
2152
598k
    memcpy(pu1_luma_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
2153
2154
    /* copy bottom chroma data cb and cr pixel interleaved */
2155
598k
    pu1_buff = ps_cu_prms->pu1_chrm_recon + (ps_cu_prms->i4_chrm_recon_stride *
2156
598k
                                             ((ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)) - 1));
2157
2158
598k
    pu1_buff += curr_cu_pos_in_ctb;
2159
2160
598k
    memcpy(pu1_chrm_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
2161
2162
    /* store the nbr 4x4 data at cu level */
2163
598k
    {
2164
598k
        nbr_4x4_t *ps_nbr;
2165
2166
        /* copy bottom nbr data */
2167
598k
        ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
2168
598k
        ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1) * nbr_strd;
2169
2170
598k
        ps_nbr += (curr_cu_pos_in_ctb >> 2);
2171
2172
598k
        memcpy(ps_top_nbr, ps_nbr, (ps_enc_out_ctxt->u1_cu_size >> 2) * sizeof(nbr_4x4_t));
2173
598k
    }
2174
598k
    return;
2175
598k
}
2176
2177
/**
2178
******************************************************************************
2179
* \if Function name : ihevce_update_final_cu_results \endif
2180
*
2181
* \brief
2182
*
2183
* \return
2184
*    None
2185
*
2186
* \author
2187
*  Ittiam
2188
*
2189
*****************************************************************************
2190
*/
2191
void ihevce_update_final_cu_results(
2192
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2193
    ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
2194
    enc_loop_cu_prms_t *ps_cu_prms,
2195
    pu_col_mv_t **pps_row_col_pu,
2196
    WORD32 *pi4_col_pu_map_idx,
2197
    cu_final_update_prms *ps_cu_update_prms,
2198
    WORD32 ctb_ctr,
2199
    WORD32 vert_ctb_ctr)
2200
2.00M
{
2201
2.00M
    WORD32 curr_cu_pos_in_row;
2202
2203
2.00M
    cu_enc_loop_out_t *ps_cu_final = *ps_cu_update_prms->pps_cu_final;
2204
2.00M
    pu_t **pps_row_pu = ps_cu_update_prms->pps_row_pu;
2205
2.00M
    tu_enc_loop_out_t **pps_row_tu = ps_cu_update_prms->pps_row_tu;
2206
2.00M
    UWORD8 **ppu1_row_ecd_data = ps_cu_update_prms->ppu1_row_ecd_data;
2207
2.00M
    WORD32 *pi4_num_pus_in_ctb = ps_cu_update_prms->pi4_num_pus_in_ctb;
2208
2.00M
    UWORD32 u4_cu_size = ps_enc_out_ctxt->u1_cu_size;
2209
2.00M
    ps_cu_final->b3_cu_pos_x = ps_enc_out_ctxt->b3_cu_pos_x;
2210
2.00M
    ps_cu_final->b3_cu_pos_y = ps_enc_out_ctxt->b3_cu_pos_y;
2211
2212
2.00M
    ps_cu_final->b4_cu_size = ps_enc_out_ctxt->u1_cu_size >> 3;
2213
2214
    /* store the current pu and tu pointes */
2215
2.00M
    ps_cu_final->ps_pu = *pps_row_pu;
2216
2.00M
    ps_cu_final->ps_enc_tu = *pps_row_tu;
2217
2.00M
    curr_cu_pos_in_row = ctb_ctr * ps_cu_prms->i4_ctb_size + (ps_cu_final->b3_cu_pos_x << 3);
2218
2219
2.00M
    ihevce_store_cu_final(ps_ctxt, ps_cu_final, *ppu1_row_ecd_data, ps_enc_out_ctxt, ps_cu_prms);
2220
2221
2.00M
    if(NULL != pps_row_col_pu)
2222
1.39M
    {
2223
1.39M
        (*pps_row_col_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2224
1.39M
    }
2225
2.00M
    if(NULL != pi4_col_pu_map_idx)
2226
1.39M
    {
2227
1.39M
        (*pi4_col_pu_map_idx) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2228
1.39M
    }
2229
2.00M
    (*pi4_num_pus_in_ctb) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2230
2.00M
    (*pps_row_tu) += ps_cu_final->u2_num_tus_in_cu;
2231
2.00M
    (*pps_row_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2232
2.00M
    (*ppu1_row_ecd_data) += ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2233
2234
2.00M
    (*ps_cu_update_prms->pps_cu_final)++;
2235
2.00M
    (*ps_cu_update_prms->pu1_num_cus_in_ctb_out)++;
2236
2237
    /* Updated for each CU in bottom row  of CTB */
2238
2.00M
    if(((ps_cu_final->b3_cu_pos_y << 3) + u4_cu_size) == ps_ctxt->u4_cur_ctb_ht)
2239
598k
    {
2240
        /* copy the bottom data to row buffers */
2241
598k
        ((pf_enc_loop_cu_bot_copy)ps_ctxt->pv_enc_loop_cu_bot_copy)(
2242
598k
            ps_ctxt,
2243
598k
            ps_cu_prms,
2244
598k
            ps_enc_out_ctxt,
2245
598k
            curr_cu_pos_in_row,
2246
598k
            (ps_enc_out_ctxt->b3_cu_pos_x << 3));
2247
2248
        /* Setting Dependency for CU TopRight */
2249
598k
        ihevce_dmgr_set_row_row_sync(
2250
598k
            ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
2251
598k
            (curr_cu_pos_in_row + ps_enc_out_ctxt->u1_cu_size),
2252
598k
            vert_ctb_ctr,
2253
598k
            ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2254
2255
        /* Setting Dependency for Entropy to consume is made at CTB level */
2256
598k
    }
2257
2.00M
}
2258
2259
/**
2260
******************************************************************************
2261
* \if Function name : ihevce_cu_recurse_decide \endif
2262
*
2263
* \brief
2264
*    Coding Unit mode decide function. Performs RD opt and decides the best mode
2265
*
2266
* \param[in] pv_ctxt : pointer to enc_loop module
2267
* \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
2268
* \param[in] ps_cu_analyse : pointer to cu analyse
2269
* \param[out] ps_cu_final : pointer to cu final
2270
* \param[out] pu1_ecd_data :pointer to store coeff data for ECD
2271
* \param[out]ps_row_col_pu; colocated pu buffer pointer
2272
* \param[out]pu1_row_pu_map; colocated pu map buffer pointer
2273
* \param[in]col_start_pu_idx : pu index start value
2274
*
2275
* \return
2276
*    None
2277
*
2278
*
2279
* \author
2280
*  Ittiam
2281
*
2282
*****************************************************************************
2283
*/
2284
WORD32 ihevce_cu_recurse_decide(
2285
    ihevce_enc_loop_ctxt_t *ps_ctxt,
2286
    enc_loop_cu_prms_t *ps_cu_prms,
2287
    cur_ctb_cu_tree_t *ps_cu_tree_analyse,
2288
    cur_ctb_cu_tree_t *ps_cu_tree_analyse_parent,
2289
    ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
2290
    me_ctb_data_t *ps_cu_me_data,
2291
    pu_col_mv_t **pps_col_pu,
2292
    cu_final_update_prms *ps_cu_update_prms,
2293
    UWORD8 *pu1_col_pu_map,
2294
    WORD32 *pi4_col_start_pu_idx,
2295
    WORD32 i4_tree_depth,
2296
    WORD32 i4_ctb_x_off,
2297
    WORD32 i4_ctb_y_off,
2298
    WORD32 cur_ctb_ht)
2299
6.19M
{
2300
6.19M
    cur_ctb_cu_tree_t *ps_cu_tree_analyse_child[4];
2301
6.19M
    final_mode_state_t s_final_mode_state;
2302
2303
6.19M
    WORD32 i;
2304
6.19M
    WORD32 child_nodes_null;
2305
6.19M
    LWORD64 i8_least_child_cost;
2306
2307
6.19M
    WORD32 num_children_encoded = 0;
2308
2309
    /* Take backup of collocated start PU index for parent node rdo for PQ */
2310
6.19M
    WORD32 i4_col_pu_idx_bkup = *pi4_col_start_pu_idx;
2311
6.19M
    pu_col_mv_t *ps_col_mv_bkup = *pps_col_pu;
2312
2313
6.19M
#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2314
6.19M
    WORD32 x0_frm = i4_ctb_x_off + (ps_cu_tree_analyse->b3_cu_pos_x << 3);
2315
6.19M
    WORD32 y0_frm = i4_ctb_y_off + (ps_cu_tree_analyse->b3_cu_pos_y << 3);
2316
6.19M
    WORD32 pic_wd = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_width_in_luma_samples;
2317
6.19M
    WORD32 pic_ht = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_height_in_luma_samples;
2318
6.19M
    WORD32 log2_min_cb_size = ps_ctxt->s_sao_ctxt_t.ps_sps->i1_log2_min_coding_block_size;
2319
6.19M
    WORD32 cu_size = ps_cu_tree_analyse->u1_cu_size;
2320
2321
    /* bits for coding split_cu_flag = 1 */
2322
6.19M
    WORD32 split_cu1_bits_q12 = 0;
2323
2324
    /* bits for coding split_cu_flag = 0 */
2325
6.19M
    WORD32 split_cu0_bits_q12 = 0;
2326
6.19M
#endif
2327
2328
6.19M
    UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_stasino_enabled
2329
6.19M
                                ? ihevce_determine_cu_noise_based_on_8x8Blk_data(
2330
0
                                      ps_cu_prms->pu1_is_8x8Blk_noisy,
2331
0
                                      ((ps_cu_tree_analyse->b3_cu_pos_x << 3) >> 4) << 4,
2332
0
                                      ((ps_cu_tree_analyse->b3_cu_pos_y << 3) >> 4) << 4,
2333
0
                                      MAX(16, ps_cu_tree_analyse->u1_cu_size))
2334
6.19M
                                : 0;
2335
2336
6.19M
#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2337
6.19M
    LWORD64 i8_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
2338
6.19M
#endif
2339
2340
6.19M
    (void)ps_cu_tree_analyse_parent;
2341
2342
6.19M
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
2343
6.19M
    if(!ps_ctxt->u1_enable_psyRDOPT && u1_is_cu_noisy)
2344
0
    {
2345
0
        ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
2346
0
        ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
2347
0
    }
2348
6.19M
#endif
2349
2350
6.19M
    if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
2351
0
    {
2352
0
        i8_lambda_qf = ((float)i8_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
2353
0
    }
2354
2355
6.19M
    ps_cu_tree_analyse_child[0] = ps_cu_tree_analyse->ps_child_node_tl;
2356
6.19M
    ps_cu_tree_analyse_child[1] = ps_cu_tree_analyse->ps_child_node_tr;
2357
6.19M
    ps_cu_tree_analyse_child[2] = ps_cu_tree_analyse->ps_child_node_bl;
2358
6.19M
    ps_cu_tree_analyse_child[3] = ps_cu_tree_analyse->ps_child_node_br;
2359
2360
6.19M
    child_nodes_null =
2361
6.19M
        ((ps_cu_tree_analyse_child[0] == NULL) + (ps_cu_tree_analyse_child[1] == NULL) +
2362
6.19M
         (ps_cu_tree_analyse_child[2] == NULL) + (ps_cu_tree_analyse_child[3] == NULL));
2363
2364
6.19M
#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2365
6.19M
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2366
6.19M
    if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2367
3.82M
#endif
2368
3.82M
    {
2369
        /*----------------------------------------------*/
2370
        /* ---------- CU Depth Bit Estimation --------- */
2371
        /*----------------------------------------------*/
2372
2373
        /* Encode cu split flags based on following conditions; See section 7.3.8*/
2374
3.82M
        if(((x0_frm + cu_size) <= pic_wd) && ((y0_frm + cu_size) <= pic_ht) &&
2375
3.82M
           (cu_size > (1 << log2_min_cb_size))) /* &&(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0)) */
2376
1.12M
        {
2377
1.12M
            WORD32 left_cu_depth = 0;
2378
1.12M
            WORD32 top_cu_depth = 0;
2379
1.12M
            WORD32 pos_x_4x4 = ps_cu_tree_analyse->b3_cu_pos_x << 1;
2380
1.12M
            WORD32 pos_y_4x4 = ps_cu_tree_analyse->b3_cu_pos_y << 1;
2381
1.12M
            WORD32 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
2382
1.12M
            WORD32 cur_4x4_in_ctb = pos_x_4x4 + (pos_y_4x4 * num_4x4_in_ctb);
2383
1.12M
            UWORD8 u1_split_cu_flag_cab_model;
2384
1.12M
            WORD32 split_cu_ctxt_inc;
2385
2386
            /* Left and Top CU depth is required for cabac context */
2387
2388
            /* CU left */
2389
1.12M
            if(0 == pos_x_4x4)
2390
418k
            {
2391
                /* CTB boundary */
2392
418k
                if(i4_ctb_x_off)
2393
141k
                {
2394
141k
                    left_cu_depth = ps_ctxt->as_left_col_nbr[pos_y_4x4].b2_cu_depth;
2395
141k
                }
2396
418k
            }
2397
703k
            else
2398
703k
            {
2399
                /* inside CTB */
2400
703k
                left_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - 1].b2_cu_depth;
2401
703k
            }
2402
2403
            /* CU top */
2404
1.12M
            if(0 == pos_y_4x4)
2405
423k
            {
2406
                /* CTB boundary */
2407
423k
                if(i4_ctb_y_off)
2408
113k
                {
2409
                    /* Wait till top cu depth is available */
2410
113k
                    ihevce_dmgr_chk_row_row_sync(
2411
113k
                        ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
2412
113k
                        (i4_ctb_x_off) + (pos_x_4x4 << 2),
2413
113k
                        4,
2414
113k
                        ((i4_ctb_y_off >> 6) - 1),
2415
113k
                        ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2416
113k
                        ps_ctxt->thrd_id);
2417
2418
113k
                    top_cu_depth =
2419
113k
                        ps_ctxt->ps_top_row_nbr[(i4_ctb_x_off >> 2) + pos_x_4x4].b2_cu_depth;
2420
113k
                }
2421
423k
            }
2422
698k
            else
2423
698k
            {
2424
                /* inside CTB */
2425
698k
                top_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - num_4x4_in_ctb].b2_cu_depth;
2426
698k
            }
2427
2428
1.12M
            split_cu_ctxt_inc = IHEVC_CAB_SPLIT_CU_FLAG + (left_cu_depth > i4_tree_depth) +
2429
1.12M
                                (top_cu_depth > i4_tree_depth);
2430
2431
1.12M
            u1_split_cu_flag_cab_model =
2432
1.12M
                ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc];
2433
2434
            /* bits for coding split_cu_flag = 1 */
2435
1.12M
            split_cu1_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 1];
2436
2437
            /* bits for coding split_cu_flag = 0 */
2438
1.12M
            split_cu0_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 0];
2439
2440
            /* update the cu split cabac context of all child nodes before evaluating child */
2441
2.69M
            for(i = (i4_tree_depth + 1); i < 4; i++)
2442
1.57M
            {
2443
1.57M
                ps_ctxt->au1_rdopt_recur_ctxt_models[i][split_cu_ctxt_inc] =
2444
1.57M
                    gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 1];
2445
1.57M
            }
2446
2447
            /* update the cu split cabac context of the parent node with split flag = 0 */
2448
1.12M
            ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc] =
2449
1.12M
                gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 0];
2450
1.12M
        }
2451
3.82M
    }
2452
6.19M
#endif
2453
2454
    /* If all the child nodes are null, then do rdo for this node and return the cost */
2455
6.19M
    if((1 == ps_cu_tree_analyse->is_node_valid) && (4 == child_nodes_null))
2456
2.41M
    {
2457
2.41M
        WORD32 i4_num_bytes_ecd_data;
2458
2459
#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2460
        COPY_CABAC_STATES(
2461
            &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2462
            &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2463
            IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2464
#else
2465
2.41M
        if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2466
1.02M
        {
2467
1.02M
            COPY_CABAC_STATES(
2468
1.02M
                &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2469
1.02M
                &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2470
1.02M
                IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2471
1.02M
        }
2472
2.41M
#endif
2473
2474
2.41M
        ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
2475
2.41M
        ihevce_update_pred_qp(
2476
2.41M
            ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
2477
        /* DO rdo for current node here */
2478
        /* return rdo cost for current node*/
2479
2.41M
        ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
2480
2.41M
            ps_ctxt,
2481
2.41M
            ps_cu_prms,
2482
2.41M
            ps_cu_tree_analyse,
2483
2.41M
            ps_cur_ipe_ctb,
2484
2.41M
            ps_cu_me_data,
2485
2.41M
            *pps_col_pu,
2486
2.41M
            &s_final_mode_state,
2487
2.41M
            pu1_col_pu_map,
2488
2.41M
            *ps_cu_update_prms->ppu1_row_ecd_data,
2489
2.41M
            *pi4_col_start_pu_idx,
2490
2.41M
            i4_ctb_x_off,
2491
2.41M
            i4_ctb_y_off);
2492
2493
2.41M
        if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
2494
2.41M
            cur_ctb_ht) &&
2495
2.41M
           (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
2496
169k
        {
2497
            /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
2498
            /* copy current ctb CU states into a entropy sync state */
2499
            /* to be used for next row                              */
2500
169k
            COPY_CABAC_STATES(
2501
169k
                ps_ctxt->pu1_curr_row_cabac_state,
2502
169k
                &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2503
169k
                IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2504
169k
        }
2505
2506
#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2507
        {
2508
#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2509
            /* Add parent split cu = 0 cost signalling */
2510
            ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2511
                split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2512
#endif
2513
            for(i = (i4_tree_depth); i < 4; i++)
2514
            {
2515
                COPY_CABAC_STATES(
2516
                    &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2517
                    &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2518
                    IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2519
            }
2520
        }
2521
#else
2522
2.41M
        if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2523
1.02M
        {
2524
1.02M
#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2525
            /* Add parent split cu = 0 cost signalling */
2526
1.02M
            ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2527
1.02M
                split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2528
1.02M
#endif
2529
2530
2.47M
            for(i = (i4_tree_depth); i < 4; i++)
2531
1.45M
            {
2532
1.45M
                COPY_CABAC_STATES(
2533
1.45M
                    &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2534
1.45M
                    &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2535
1.45M
                    IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2536
1.45M
            }
2537
1.02M
        }
2538
2.41M
#endif
2539
2540
2.41M
        ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
2541
2.41M
            ps_ctxt, ps_cu_prms, &s_final_mode_state);
2542
2543
2.41M
#if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2544
2.41M
        if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
2545
1.39M
        {
2546
1.39M
            ihevce_update_final_cu_results(
2547
1.39M
                ps_ctxt,
2548
1.39M
                ps_ctxt->ps_enc_out_ctxt,
2549
1.39M
                ps_cu_prms,
2550
1.39M
                pps_col_pu,
2551
1.39M
                pi4_col_start_pu_idx,
2552
1.39M
                ps_cu_update_prms,
2553
1.39M
                i4_ctb_x_off >> 6,
2554
1.39M
                i4_ctb_y_off >> 6);
2555
1.39M
        }
2556
1.02M
        else
2557
1.02M
        {
2558
            /* ---- copy the luma & chroma coeffs to final output -------- */
2559
1.02M
            i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2560
2561
1.02M
            if(0 != i4_num_bytes_ecd_data)
2562
219k
            {
2563
219k
                memcpy(
2564
219k
                    ps_ctxt->pu1_ecd_data,
2565
219k
                    &ps_ctxt->pu1_cu_recur_coeffs[0],
2566
219k
                    i4_num_bytes_ecd_data * sizeof(UWORD8));
2567
2568
219k
                ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2569
219k
            }
2570
2571
            /* Collocated PU updates */
2572
1.02M
            *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2573
1.02M
            *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2574
1.02M
        }
2575
#else
2576
        /* ---- copy the luma & chroma coeffs to final output -------- */
2577
        i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2578
        if(0 != i4_num_bytes_ecd_data)
2579
        {
2580
            memcpy(
2581
                ps_ctxt->pu1_ecd_data,
2582
                &ps_ctxt->pu1_cu_recur_coeffs[0],
2583
                i4_num_bytes_ecd_data * sizeof(UWORD8));
2584
2585
            ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2586
        }
2587
2588
        /* Collocated PU updates */
2589
        *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2590
        *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2591
#endif
2592
2593
2.41M
        ps_ctxt->ps_enc_out_ctxt++;
2594
2.41M
        num_children_encoded++;
2595
2.41M
    }
2596
3.77M
    else
2597
3.77M
    {
2598
3.77M
        i8_least_child_cost = 0;
2599
2600
18.8M
        for(i = 0; i < 4; i++)
2601
15.1M
        {
2602
15.1M
            if(ps_cu_tree_analyse_child[i] != NULL)
2603
5.89M
            {
2604
5.89M
                num_children_encoded += ihevce_cu_recurse_decide(
2605
5.89M
                    ps_ctxt,
2606
5.89M
                    ps_cu_prms,
2607
5.89M
                    ps_cu_tree_analyse_child[i],
2608
5.89M
                    ps_cu_tree_analyse,
2609
5.89M
                    ps_cur_ipe_ctb,
2610
5.89M
                    ps_cu_me_data,
2611
5.89M
                    pps_col_pu,
2612
5.89M
                    ps_cu_update_prms,
2613
5.89M
                    pu1_col_pu_map,
2614
5.89M
                    pi4_col_start_pu_idx,
2615
5.89M
                    i4_tree_depth + 1,
2616
5.89M
                    i4_ctb_x_off,
2617
5.89M
                    i4_ctb_y_off,
2618
5.89M
                    cur_ctb_ht);
2619
2620
                /* In case of incomplete ctb, */
2621
                //if(MAX_COST != ps_cu_tree_analyse_child[i]->i4_best_rdopt_cost)
2622
5.89M
                if(((ULWORD64)(
2623
5.89M
                       i8_least_child_cost + ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost)) >
2624
5.89M
                   MAX_COST_64)
2625
1.96M
                {
2626
1.96M
                    i8_least_child_cost = MAX_COST_64;
2627
1.96M
                }
2628
3.93M
                else
2629
3.93M
                {
2630
3.93M
                    i8_least_child_cost += ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost;
2631
3.93M
                }
2632
5.89M
            }
2633
9.20M
            else
2634
9.20M
            {
2635
                /* If the child node is NULL, return MAX_COST*/
2636
9.20M
                i8_least_child_cost = MAX_COST_64;
2637
9.20M
            }
2638
15.1M
        }
2639
2640
3.77M
        if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2641
2.79M
        {
2642
#if !ENABLE_4CTB_EVALUATION
2643
            if((ps_cu_tree_analyse->u1_cu_size == 64) && (num_children_encoded > 10) &&
2644
               (ps_ctxt->i1_slice_type != ISLICE))
2645
            {
2646
                ps_cu_tree_analyse->is_node_valid = 0;
2647
            }
2648
#endif
2649
2.79M
        }
2650
2651
        /* If current CU node is valid, do rdo for the node and decide btwn child nodes and parent nodes  */
2652
3.77M
        if(ps_cu_tree_analyse->is_node_valid)
2653
768k
        {
2654
768k
            UWORD8 au1_cu_pu_map[(MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE)];
2655
768k
            pu_col_mv_t as_col_mv[2]; /* Max of 2 PUs only per CU */
2656
2657
768k
            WORD32 i4_col_pu_idx_start = i4_col_pu_idx_bkup;
2658
2659
            /* Copy the collocated PU map to the local array */
2660
768k
            memcpy(
2661
768k
                au1_cu_pu_map,
2662
768k
                pu1_col_pu_map,
2663
768k
                (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2664
2665
#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2666
            COPY_CABAC_STATES(
2667
                &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2668
                &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2669
                IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2670
2671
            /* Reset the nbr maps while computing Parent CU node ()*/
2672
            /* set the neighbour map to 0 */
2673
            ihevce_set_nbr_map(
2674
                ps_ctxt->pu1_ctb_nbr_map,
2675
                ps_ctxt->i4_nbr_map_strd,
2676
                (ps_cu_tree_analyse->b3_cu_pos_x << 1),
2677
                (ps_cu_tree_analyse->b3_cu_pos_y << 1),
2678
                (ps_cu_tree_analyse->u1_cu_size >> 2),
2679
                0);
2680
#else
2681
768k
            if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2682
768k
            {
2683
768k
                COPY_CABAC_STATES(
2684
768k
                    &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2685
768k
                    &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2686
768k
                    IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2687
2688
                /* Reset the nbr maps while computing Parent CU node ()*/
2689
                /* set the neighbour map to 0 */
2690
768k
                ihevce_set_nbr_map(
2691
768k
                    ps_ctxt->pu1_ctb_nbr_map,
2692
768k
                    ps_ctxt->i4_nbr_map_strd,
2693
768k
                    (ps_cu_tree_analyse->b3_cu_pos_x << 1),
2694
768k
                    (ps_cu_tree_analyse->b3_cu_pos_y << 1),
2695
768k
                    (ps_cu_tree_analyse->u1_cu_size >> 2),
2696
768k
                    0);
2697
768k
            }
2698
768k
#endif
2699
2700
            /* Do rdo for the parent node */
2701
            /* Compare parent node cost vs child node costs */
2702
768k
            ps_ctxt->is_parent_cu_rdopt = 1;
2703
2704
768k
            ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
2705
2706
768k
            ihevce_update_pred_qp(
2707
768k
                ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
2708
2709
768k
            ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
2710
768k
                ps_ctxt,
2711
768k
                ps_cu_prms,
2712
768k
                ps_cu_tree_analyse,
2713
768k
                ps_cur_ipe_ctb,
2714
768k
                ps_cu_me_data,
2715
768k
                as_col_mv,
2716
768k
                &s_final_mode_state,
2717
768k
                au1_cu_pu_map,
2718
768k
                *ps_cu_update_prms->ppu1_row_ecd_data,
2719
768k
                i4_col_pu_idx_start,
2720
768k
                i4_ctb_x_off,
2721
768k
                i4_ctb_y_off);
2722
2723
768k
            ps_ctxt->is_parent_cu_rdopt = 0;
2724
2725
#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2726
            /* Add parent split cu cost signalling */
2727
            ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2728
                split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2729
2730
            COPY_CABAC_STATES(
2731
                &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2732
                &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2733
                IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2734
2735
            /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
2736
            + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
2737
            ;
2738
            /* bits for coding cu split flag as  1 */
2739
            i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
2740
                split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2741
#else
2742
768k
#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
2743
768k
            if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2744
768k
            {
2745
                /* Add parent split cu cost signalling */
2746
768k
                ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
2747
768k
                    split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2748
2749
768k
                COPY_CABAC_STATES(
2750
768k
                    &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2751
768k
                    &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2752
768k
                    IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2753
2754
                /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
2755
                + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
2756
768k
                ;
2757
                /* bits for coding cu split flag as  1 */
2758
768k
                i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
2759
768k
                    split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
2760
768k
            }
2761
#else
2762
            i8_least_child_cost +=
2763
                (num_children_encoded * ps_ctxt->i4_sad_lamda + ((1 << (LAMBDA_Q_SHIFT)))) >>
2764
                (LAMBDA_Q_SHIFT + 1);
2765
#endif
2766
768k
#endif
2767
2768
            /* If child modes win over parent, discard parent enc ctxt */
2769
            /* else discard child ctxt */
2770
768k
            if(ps_cu_tree_analyse->i8_best_rdopt_cost > i8_least_child_cost)
2771
114k
            {
2772
#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2773
                /* Store child node Models for evalution of next CU */
2774
                for(i = (i4_tree_depth); i < 4; i++)
2775
                {
2776
                    COPY_CABAC_STATES(
2777
                        &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2778
                        &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2779
                        IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2780
                }
2781
                /* Reset cabac states if child has won */
2782
                COPY_CABAC_STATES(
2783
                    &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2784
                    &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2785
                    IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2786
#else
2787
114k
                if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2788
114k
                {
2789
406k
                    for(i = i4_tree_depth; i < 4; i++)
2790
291k
                    {
2791
291k
                        COPY_CABAC_STATES(
2792
291k
                            &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2793
291k
                            &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2794
291k
                            IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2795
291k
                    }
2796
                    /* Reset cabac states if child has won */
2797
114k
                    COPY_CABAC_STATES(
2798
114k
                        &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2799
114k
                        &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2800
114k
                        IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2801
114k
                }
2802
114k
#endif
2803
114k
                ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
2804
114k
                ps_cu_tree_analyse->is_node_valid = 0;
2805
114k
            }
2806
653k
            else
2807
653k
            {
2808
                /* Parent node wins over child node */
2809
653k
                ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
2810
653k
                WORD32 i4_num_bytes_ecd_data;
2811
653k
                WORD32 num_child_nodes = 0;
2812
653k
                WORD32 i4_num_pus_in_cu;
2813
2814
653k
                if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
2815
653k
                    cur_ctb_ht) &&
2816
653k
                   (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
2817
69.1k
                {
2818
                    /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
2819
                    /* copy current ctb CU states into a entropy sync state */
2820
                    /* to be used for next row                              */
2821
69.1k
                    COPY_CABAC_STATES(
2822
69.1k
                        ps_ctxt->pu1_curr_row_cabac_state,
2823
69.1k
                        &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2824
69.1k
                        IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2825
69.1k
                }
2826
2827
#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2828
                /* Store parent node Models for evalution of next CU */
2829
                for(i = (i4_tree_depth + 1); i < 4; i++)
2830
                {
2831
                    COPY_CABAC_STATES(
2832
                        &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2833
                        &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2834
                        IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2835
                }
2836
#else
2837
653k
                if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2838
653k
                {
2839
1.49M
                    for(i = (i4_tree_depth + 1); i < 4; i++)
2840
846k
                    {
2841
846k
                        COPY_CABAC_STATES(
2842
846k
                            &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2843
846k
                            &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
2844
846k
                            IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2845
846k
                    }
2846
653k
                }
2847
653k
#endif
2848
653k
                ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
2849
653k
                    ps_ctxt, ps_cu_prms, &s_final_mode_state);
2850
2851
653k
#if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2852
653k
                if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
2853
0
                {
2854
0
                    ihevce_update_final_cu_results(
2855
0
                        ps_ctxt,
2856
0
                        ps_ctxt->ps_enc_out_ctxt,
2857
0
                        ps_cu_prms,
2858
0
                        pps_col_pu,
2859
0
                        pi4_col_start_pu_idx,
2860
0
                        ps_cu_update_prms,
2861
0
                        i4_ctb_x_off >> 6,
2862
0
                        i4_ctb_y_off >> 6);
2863
2864
0
                    ps_ctxt->ps_enc_out_ctxt++;
2865
0
                }
2866
653k
                else
2867
653k
                {
2868
653k
                    ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
2869
2870
653k
                    num_child_nodes = num_children_encoded;
2871
2872
                    /* ---- copy the luma & chroma coeffs to final output -------- */
2873
1.71M
                    for(i = 0; i < num_child_nodes; i++)
2874
1.06M
                    {
2875
1.06M
                        i4_num_bytes_ecd_data =
2876
1.06M
                            (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
2877
1.06M
                        ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
2878
1.06M
                    }
2879
2880
653k
                    i4_num_bytes_ecd_data =
2881
653k
                        ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2882
653k
                    if(0 != i4_num_bytes_ecd_data)
2883
36.3k
                    {
2884
36.3k
                        memcpy(
2885
36.3k
                            ps_ctxt->pu1_ecd_data,
2886
36.3k
                            &ps_ctxt->pu1_cu_recur_coeffs[0],
2887
36.3k
                            i4_num_bytes_ecd_data);
2888
2889
36.3k
                        ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2890
36.3k
                    }
2891
2892
653k
                    ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
2893
2894
653k
                    memcpy(
2895
653k
                        ps_enc_tmp_out_ctxt,
2896
653k
                        ps_ctxt->ps_enc_out_ctxt,
2897
653k
                        sizeof(ihevce_enc_cu_node_ctxt_t));
2898
653k
                    ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
2899
2900
                    /* Collocated PU updates */
2901
653k
                    i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2902
                    /* Copy the collocated MVs and the PU map to frame buffers */
2903
653k
                    memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
2904
653k
                    memcpy(
2905
653k
                        pu1_col_pu_map,
2906
653k
                        au1_cu_pu_map,
2907
653k
                        (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2908
                    /* Update the frame buffer pointer and the map index */
2909
653k
                    *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
2910
653k
                    *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
2911
2912
653k
                    ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
2913
653k
                }
2914
#else
2915
2916
                ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
2917
2918
                num_child_nodes = num_children_encoded;
2919
2920
                /* ---- copy the luma & chroma coeffs to final output -------- */
2921
                for(i = 0; i < num_child_nodes; i++)
2922
                {
2923
                    i4_num_bytes_ecd_data =
2924
                        (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
2925
                    ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
2926
                }
2927
2928
                i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
2929
                if(0 != i4_num_bytes_ecd_data)
2930
                {
2931
                    memcpy(
2932
                        ps_ctxt->pu1_ecd_data,
2933
                        &ps_ctxt->pu1_cu_recur_coeffs[0],
2934
                        i4_num_bytes_ecd_data * sizeof(UWORD8));
2935
2936
                    ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
2937
                }
2938
2939
                ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
2940
2941
                memcpy(
2942
                    ps_enc_tmp_out_ctxt,
2943
                    ps_ctxt->ps_enc_out_ctxt,
2944
                    sizeof(ihevce_enc_cu_node_ctxt_t));
2945
2946
                ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
2947
2948
                /* Collocated PU updates */
2949
                i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
2950
                /* Copy the collocated MVs and the PU map to frame buffers */
2951
                memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
2952
                memcpy(
2953
                    pu1_col_pu_map,
2954
                    au1_cu_pu_map,
2955
                    (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
2956
                /* Update the frame buffer pointer and the map index */
2957
                *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
2958
                *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
2959
2960
                ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
2961
#endif
2962
2963
653k
                num_children_encoded = 1;
2964
653k
                DISABLE_THE_CHILDREN_NODES(ps_cu_tree_analyse);
2965
653k
            }
2966
768k
        }
2967
3.00M
        else /* if(ps_cu_tree_analyse->is_node_valid) */
2968
3.00M
        {
2969
3.00M
            ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
2970
2971
            /* Tree depth of four will occur for Incomplete CTB */
2972
3.00M
            if((i8_least_child_cost > 0) && (i4_tree_depth != 3))
2973
713k
            {
2974
#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
2975
                /* Store child node Models for evalution of next CU */
2976
                for(i = i4_tree_depth; i < 4; i++)
2977
                {
2978
                    COPY_CABAC_STATES(
2979
                        &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2980
                        &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2981
                        IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2982
                }
2983
#else
2984
713k
                if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2985
164k
                {
2986
607k
                    for(i = (i4_tree_depth); i < 4; i++)
2987
443k
                    {
2988
443k
                        COPY_CABAC_STATES(
2989
443k
                            &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2990
443k
                            &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
2991
443k
                            IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2992
443k
                    }
2993
164k
                }
2994
713k
#endif
2995
713k
            }
2996
3.00M
        }
2997
3.77M
    }
2998
2999
6.19M
    return num_children_encoded;
3000
6.19M
}
3001
3002
static UWORD8 ihevce_intraData_availability_extractor(
3003
    WORD8 *pi1_8x8CULevel_intraData_availability_indicator,
3004
    UWORD8 u1_cu_size,
3005
    UWORD8 u1_x_8x8CU_units,
3006
    UWORD8 u1_y_8x8CU_units)
3007
0
{
3008
0
    if(8 == u1_cu_size)
3009
0
    {
3010
0
        return (!pi1_8x8CULevel_intraData_availability_indicator
3011
0
                    [u1_x_8x8CU_units + MAX_CU_IN_CTB_ROW * u1_y_8x8CU_units]);
3012
0
    }
3013
0
    else
3014
0
    {
3015
0
        UWORD8 u1_data_availability = 0;
3016
0
        UWORD8 u1_child_cu_size = u1_cu_size / 2;
3017
3018
0
        u1_data_availability |= ihevce_intraData_availability_extractor(
3019
0
            pi1_8x8CULevel_intraData_availability_indicator,
3020
0
            u1_child_cu_size,
3021
0
            u1_x_8x8CU_units,
3022
0
            u1_y_8x8CU_units);
3023
3024
0
        u1_data_availability |= ihevce_intraData_availability_extractor(
3025
0
            pi1_8x8CULevel_intraData_availability_indicator,
3026
0
            u1_child_cu_size,
3027
0
            u1_x_8x8CU_units + u1_child_cu_size / 8,
3028
0
            u1_y_8x8CU_units);
3029
3030
0
        u1_data_availability |= ihevce_intraData_availability_extractor(
3031
0
            pi1_8x8CULevel_intraData_availability_indicator,
3032
0
            u1_child_cu_size,
3033
0
            u1_x_8x8CU_units,
3034
0
            u1_y_8x8CU_units + u1_child_cu_size / 8);
3035
3036
0
        u1_data_availability |= ihevce_intraData_availability_extractor(
3037
0
            pi1_8x8CULevel_intraData_availability_indicator,
3038
0
            u1_child_cu_size,
3039
0
            u1_x_8x8CU_units + u1_child_cu_size / 8,
3040
0
            u1_y_8x8CU_units + u1_child_cu_size / 8);
3041
3042
0
        return u1_data_availability;
3043
0
    }
3044
0
}
3045
3046
void ihevce_intra_and_inter_cuTree_merger(
3047
    cur_ctb_cu_tree_t *ps_merged_tree,
3048
    cur_ctb_cu_tree_t *ps_intra_tree,
3049
    cur_ctb_cu_tree_t *ps_inter_tree,
3050
    WORD8 *pi1_8x8CULevel_intraData_availability_indicator)
3051
0
{
3052
    /* 0 => Intra and inter children valid */
3053
    /* 1 => Only Intra valid */
3054
    /* 2 => Only Inter valid */
3055
    /* 3 => Neither */
3056
0
    UWORD8 au1_children_recursive_call_type[4];
3057
3058
0
    if(NULL != ps_intra_tree)
3059
0
    {
3060
0
        ps_intra_tree->is_node_valid =
3061
0
            ps_intra_tree->is_node_valid &
3062
0
            ihevce_intraData_availability_extractor(
3063
0
                pi1_8x8CULevel_intraData_availability_indicator,
3064
0
                ps_intra_tree->u1_cu_size,
3065
0
                ps_intra_tree->b3_cu_pos_x & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff),
3066
0
                ps_intra_tree->b3_cu_pos_y & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff));
3067
0
    }
3068
3069
0
    switch(((NULL == ps_intra_tree) << 1) | (NULL == ps_inter_tree))
3070
0
    {
3071
0
    case 0:
3072
0
    {
3073
0
        ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid ||
3074
0
                                        ps_inter_tree->is_node_valid;
3075
0
        ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
3076
0
        ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
3077
3078
0
        au1_children_recursive_call_type[POS_TL] =
3079
0
            ((NULL == ps_intra_tree->ps_child_node_tl) << 1) |
3080
0
            (NULL == ps_inter_tree->ps_child_node_tl);
3081
0
        au1_children_recursive_call_type[POS_TR] =
3082
0
            ((NULL == ps_intra_tree->ps_child_node_tr) << 1) |
3083
0
            (NULL == ps_inter_tree->ps_child_node_tr);
3084
0
        au1_children_recursive_call_type[POS_BL] =
3085
0
            ((NULL == ps_intra_tree->ps_child_node_bl) << 1) |
3086
0
            (NULL == ps_inter_tree->ps_child_node_bl);
3087
0
        au1_children_recursive_call_type[POS_BR] =
3088
0
            ((NULL == ps_intra_tree->ps_child_node_br) << 1) |
3089
0
            (NULL == ps_inter_tree->ps_child_node_br);
3090
3091
0
        break;
3092
0
    }
3093
0
    case 1:
3094
0
    {
3095
0
        ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid;
3096
0
        ps_merged_tree->u1_inter_eval_enable = 0;
3097
0
        ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
3098
3099
0
        au1_children_recursive_call_type[POS_TL] =
3100
0
            ((NULL == ps_intra_tree->ps_child_node_tl) << 1) + 1;
3101
0
        au1_children_recursive_call_type[POS_TR] =
3102
0
            ((NULL == ps_intra_tree->ps_child_node_tr) << 1) + 1;
3103
0
        au1_children_recursive_call_type[POS_BL] =
3104
0
            ((NULL == ps_intra_tree->ps_child_node_bl) << 1) + 1;
3105
0
        au1_children_recursive_call_type[POS_BR] =
3106
0
            ((NULL == ps_intra_tree->ps_child_node_br) << 1) + 1;
3107
3108
0
        break;
3109
0
    }
3110
0
    case 2:
3111
0
    {
3112
0
        ps_merged_tree->is_node_valid = ps_inter_tree->is_node_valid;
3113
0
        ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
3114
0
        ps_merged_tree->u1_intra_eval_enable = 0;
3115
3116
0
        au1_children_recursive_call_type[POS_TL] = 2 + (NULL == ps_inter_tree->ps_child_node_tl);
3117
0
        au1_children_recursive_call_type[POS_TR] = 2 + (NULL == ps_inter_tree->ps_child_node_tr);
3118
0
        au1_children_recursive_call_type[POS_BL] = 2 + (NULL == ps_inter_tree->ps_child_node_bl);
3119
0
        au1_children_recursive_call_type[POS_BR] = 2 + (NULL == ps_inter_tree->ps_child_node_br);
3120
3121
0
        break;
3122
0
    }
3123
0
    case 3:
3124
0
    {
3125
        /* The swamps of Dagobah! */
3126
0
        ASSERT(0);
3127
3128
0
        break;
3129
0
    }
3130
0
    }
3131
3132
0
    switch(au1_children_recursive_call_type[POS_TL])
3133
0
    {
3134
0
    case 0:
3135
0
    {
3136
0
        ihevce_intra_and_inter_cuTree_merger(
3137
0
            ps_merged_tree->ps_child_node_tl,
3138
0
            ps_intra_tree->ps_child_node_tl,
3139
0
            ps_inter_tree->ps_child_node_tl,
3140
0
            pi1_8x8CULevel_intraData_availability_indicator);
3141
3142
0
        break;
3143
0
    }
3144
0
    case 2:
3145
0
    {
3146
0
        ihevce_intra_and_inter_cuTree_merger(
3147
0
            ps_merged_tree->ps_child_node_tl,
3148
0
            NULL,
3149
0
            ps_inter_tree->ps_child_node_tl,
3150
0
            pi1_8x8CULevel_intraData_availability_indicator);
3151
3152
0
        break;
3153
0
    }
3154
0
    case 1:
3155
0
    {
3156
0
        ihevce_intra_and_inter_cuTree_merger(
3157
0
            ps_merged_tree->ps_child_node_tl,
3158
0
            ps_intra_tree->ps_child_node_tl,
3159
0
            NULL,
3160
0
            pi1_8x8CULevel_intraData_availability_indicator);
3161
3162
0
        break;
3163
0
    }
3164
0
    }
3165
3166
0
    switch(au1_children_recursive_call_type[POS_TR])
3167
0
    {
3168
0
    case 0:
3169
0
    {
3170
0
        ihevce_intra_and_inter_cuTree_merger(
3171
0
            ps_merged_tree->ps_child_node_tr,
3172
0
            ps_intra_tree->ps_child_node_tr,
3173
0
            ps_inter_tree->ps_child_node_tr,
3174
0
            pi1_8x8CULevel_intraData_availability_indicator);
3175
3176
0
        break;
3177
0
    }
3178
0
    case 2:
3179
0
    {
3180
0
        ihevce_intra_and_inter_cuTree_merger(
3181
0
            ps_merged_tree->ps_child_node_tr,
3182
0
            NULL,
3183
0
            ps_inter_tree->ps_child_node_tr,
3184
0
            pi1_8x8CULevel_intraData_availability_indicator);
3185
3186
0
        break;
3187
0
    }
3188
0
    case 1:
3189
0
    {
3190
0
        ihevce_intra_and_inter_cuTree_merger(
3191
0
            ps_merged_tree->ps_child_node_tr,
3192
0
            ps_intra_tree->ps_child_node_tr,
3193
0
            NULL,
3194
0
            pi1_8x8CULevel_intraData_availability_indicator);
3195
3196
0
        break;
3197
0
    }
3198
0
    }
3199
3200
0
    switch(au1_children_recursive_call_type[POS_BL])
3201
0
    {
3202
0
    case 0:
3203
0
    {
3204
0
        ihevce_intra_and_inter_cuTree_merger(
3205
0
            ps_merged_tree->ps_child_node_bl,
3206
0
            ps_intra_tree->ps_child_node_bl,
3207
0
            ps_inter_tree->ps_child_node_bl,
3208
0
            pi1_8x8CULevel_intraData_availability_indicator);
3209
3210
0
        break;
3211
0
    }
3212
0
    case 2:
3213
0
    {
3214
0
        ihevce_intra_and_inter_cuTree_merger(
3215
0
            ps_merged_tree->ps_child_node_bl,
3216
0
            NULL,
3217
0
            ps_inter_tree->ps_child_node_bl,
3218
0
            pi1_8x8CULevel_intraData_availability_indicator);
3219
3220
0
        break;
3221
0
    }
3222
0
    case 1:
3223
0
    {
3224
0
        ihevce_intra_and_inter_cuTree_merger(
3225
0
            ps_merged_tree->ps_child_node_bl,
3226
0
            ps_intra_tree->ps_child_node_bl,
3227
0
            NULL,
3228
0
            pi1_8x8CULevel_intraData_availability_indicator);
3229
3230
0
        break;
3231
0
    }
3232
0
    }
3233
3234
0
    switch(au1_children_recursive_call_type[POS_BR])
3235
0
    {
3236
0
    case 0:
3237
0
    {
3238
0
        ihevce_intra_and_inter_cuTree_merger(
3239
0
            ps_merged_tree->ps_child_node_br,
3240
0
            ps_intra_tree->ps_child_node_br,
3241
0
            ps_inter_tree->ps_child_node_br,
3242
0
            pi1_8x8CULevel_intraData_availability_indicator);
3243
3244
0
        break;
3245
0
    }
3246
0
    case 2:
3247
0
    {
3248
0
        ihevce_intra_and_inter_cuTree_merger(
3249
0
            ps_merged_tree->ps_child_node_br,
3250
0
            NULL,
3251
0
            ps_inter_tree->ps_child_node_br,
3252
0
            pi1_8x8CULevel_intraData_availability_indicator);
3253
3254
0
        break;
3255
0
    }
3256
0
    case 1:
3257
0
    {
3258
0
        ihevce_intra_and_inter_cuTree_merger(
3259
0
            ps_merged_tree->ps_child_node_br,
3260
0
            ps_intra_tree->ps_child_node_br,
3261
0
            NULL,
3262
0
            pi1_8x8CULevel_intraData_availability_indicator);
3263
3264
0
        break;
3265
0
    }
3266
0
    }
3267
0
}