Coverage Report

Created: 2025-12-08 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libhevc/encoder/ihevce_enc_loop_pass.c
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Copyright (C) 2018 The Android Open Source Project
4
 *
5
 * Licensed under the Apache License, Version 2.0 (the "License");
6
 * you may not use this file except in compliance with the License.
7
 * You may obtain a copy of the License at:
8
 *
9
 * http://www.apache.org/licenses/LICENSE-2.0
10
 *
11
 * Unless required by applicable law or agreed to in writing, software
12
 * distributed under the License is distributed on an "AS IS" BASIS,
13
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 * See the License for the specific language governing permissions and
15
 * limitations under the License.
16
 *
17
 *****************************************************************************
18
 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19
*/
20
21
/*!
22
******************************************************************************
23
* \file ihevce_enc_loop_pass.c
24
*
25
* \brief
26
*    This file contains Encoder normative loop pass related functions
27
*
28
* \date
29
*    18/09/2012
30
*
31
* \author
32
*    Ittiam
33
*
34
*
35
* List of Functions
36
*
37
*
38
******************************************************************************
39
*/
40
41
/*****************************************************************************/
42
/* File Includes                                                             */
43
/*****************************************************************************/
44
/* System include files */
45
#include <stdio.h>
46
#include <string.h>
47
#include <stdlib.h>
48
#include <assert.h>
49
#include <stdarg.h>
50
#include <math.h>
51
#include <limits.h>
52
53
/* User include files */
54
#include "ihevc_typedefs.h"
55
#include "itt_video_api.h"
56
#include "ihevce_api.h"
57
58
#include "rc_cntrl_param.h"
59
#include "rc_frame_info_collector.h"
60
#include "rc_look_ahead_params.h"
61
62
#include "ihevc_defs.h"
63
#include "ihevc_macros.h"
64
#include "ihevc_debug.h"
65
#include "ihevc_structs.h"
66
#include "ihevc_platform_macros.h"
67
#include "ihevc_deblk.h"
68
#include "ihevc_itrans_recon.h"
69
#include "ihevc_chroma_itrans_recon.h"
70
#include "ihevc_chroma_intra_pred.h"
71
#include "ihevc_intra_pred.h"
72
#include "ihevc_inter_pred.h"
73
#include "ihevc_mem_fns.h"
74
#include "ihevc_padding.h"
75
#include "ihevc_weighted_pred.h"
76
#include "ihevc_sao.h"
77
#include "ihevc_resi_trans.h"
78
#include "ihevc_quant_iquant_ssd.h"
79
#include "ihevc_cabac_tables.h"
80
#include "ihevc_common_tables.h"
81
#include "ihevc_quant_tables.h"
82
83
#include "ihevce_defs.h"
84
#include "ihevce_hle_interface.h"
85
#include "ihevce_lap_enc_structs.h"
86
#include "ihevce_multi_thrd_structs.h"
87
#include "ihevce_multi_thrd_funcs.h"
88
#include "ihevce_me_common_defs.h"
89
#include "ihevce_had_satd.h"
90
#include "ihevce_error_codes.h"
91
#include "ihevce_bitstream.h"
92
#include "ihevce_cabac.h"
93
#include "ihevce_rdoq_macros.h"
94
#include "ihevce_function_selector.h"
95
#include "ihevce_enc_structs.h"
96
#include "ihevce_entropy_structs.h"
97
#include "ihevce_cmn_utils_instr_set_router.h"
98
#include "ihevce_ipe_instr_set_router.h"
99
#include "ihevce_decomp_pre_intra_structs.h"
100
#include "ihevce_decomp_pre_intra_pass.h"
101
#include "ihevce_enc_loop_structs.h"
102
#include "ihevce_nbr_avail.h"
103
#include "ihevce_enc_loop_utils.h"
104
#include "ihevce_sub_pic_rc.h"
105
#include "ihevce_global_tables.h"
106
#include "ihevce_bs_compute_ctb.h"
107
#include "ihevce_cabac_rdo.h"
108
#include "ihevce_deblk.h"
109
#include "ihevce_frame_process.h"
110
#include "ihevce_rc_enc_structs.h"
111
#include "hme_datatype.h"
112
#include "hme_interface.h"
113
#include "hme_common_defs.h"
114
#include "hme_defs.h"
115
#include "ihevce_me_instr_set_router.h"
116
#include "ihevce_enc_subpel_gen.h"
117
#include "ihevce_inter_pred.h"
118
#include "ihevce_mv_pred.h"
119
#include "ihevce_mv_pred_merge.h"
120
#include "ihevce_enc_loop_inter_mode_sifter.h"
121
#include "ihevce_enc_cu_recursion.h"
122
#include "ihevce_enc_loop_pass.h"
123
#include "ihevce_common_utils.h"
124
#include "ihevce_dep_mngr_interface.h"
125
#include "ihevce_sao.h"
126
#include "ihevce_tile_interface.h"
127
#include "ihevce_profile.h"
128
129
#include "cast_types.h"
130
#include "osal.h"
131
#include "osal_defaults.h"
132
133
/*****************************************************************************/
134
/* Globals                                                                   */
135
/*****************************************************************************/
136
extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
137
138
extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
139
140
/*****************************************************************************/
141
/* Constant Macros                                                           */
142
/*****************************************************************************/
143
94.8k
#define UPDATE_QP_AT_CTB 6
144
21.4k
#define INTRAPRED_SIMD_LEFT_PADDING 16
145
14.2k
#define INTRAPRED_SIMD_RIGHT_PADDING 8
146
147
/*****************************************************************************/
148
/* Function Definitions                                                      */
149
/*****************************************************************************/
150
151
/*!
152
******************************************************************************
153
* \if Function name : ihevce_enc_loop_ctb_left_copy \endif
154
*
155
* \brief
156
*    This function copy the right data of CTB to context buffers
157
*
158
* \date
159
*    18/09/2012
160
*
161
* \author
162
*    Ittiam
163
*
164
* \return
165
*
166
* List of Functions
167
*
168
*
169
******************************************************************************
170
*/
171
void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
172
131k
{
173
    /* ------------------------------------------------------------------ */
174
    /* copy the right coloum data to the context buffers                  */
175
    /* ------------------------------------------------------------------ */
176
177
131k
    nbr_4x4_t *ps_left_nbr;
178
131k
    nbr_4x4_t *ps_nbr;
179
131k
    UWORD8 *pu1_buff;
180
131k
    WORD32 num_pels;
181
131k
    UWORD8 *pu1_luma_left, *pu1_chrm_left;
182
183
131k
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
184
185
131k
    pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
186
131k
    pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
187
131k
    ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
188
189
    /* copy right luma data */
190
131k
    pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
191
192
8.51M
    for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
193
8.38M
    {
194
8.38M
        WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
195
196
8.38M
        pu1_luma_left[num_pels] = pu1_buff[i4_indx];
197
8.38M
    }
198
199
    /* copy right chroma data */
200
131k
    pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
201
202
4.32M
    for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
203
4.19M
    {
204
4.19M
        WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
205
206
4.19M
        *pu1_chrm_left++ = pu1_buff[i4_indx];
207
4.19M
        *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
208
4.19M
    }
209
210
    /* store the nbr 4x4 data at ctb level */
211
131k
    {
212
131k
        WORD32 ctr;
213
131k
        WORD32 nbr_strd;
214
215
131k
        nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
216
217
        /* copy right nbr data */
218
131k
        ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
219
131k
        ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
220
221
2.22M
        for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
222
2.09M
        {
223
2.09M
            WORD32 i4_indx = nbr_strd * ctr;
224
225
2.09M
            ps_left_nbr[ctr] = ps_nbr[i4_indx];
226
2.09M
        }
227
131k
    }
228
131k
    return;
229
131k
}
230
231
/*!
232
******************************************************************************
233
* \if Function name : ihevce_mark_all_modes_to_evaluate \endif
234
*
235
* \brief
236
*   Mark all modes for inter/intra for evaluation. This function will be
237
*   called by ref instance
238
*
239
* \param[in] pv_ctxt : pointer to enc_loop module
240
* \param[in] ps_cu_analyse : pointer to cu analyse
241
*
242
* \return
243
*    None
244
*
245
* \author
246
*  Ittiam
247
*
248
*****************************************************************************
249
*/
250
void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
251
1.99M
{
252
1.99M
    UWORD8 ctr;
253
1.99M
    WORD32 i4_part;
254
255
1.99M
    (void)pv_ctxt;
256
    /* run a loop over all Inter cands */
257
9.97M
    for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
258
7.97M
    {
259
7.97M
        ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
260
7.97M
    }
261
262
    /* run a loop over all intra candidates */
263
1.99M
    if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
264
1.80M
    {
265
9.03M
        for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
266
7.22M
        {
267
7.22M
            ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
268
7.22M
            ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
269
270
36.1M
            for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
271
28.9M
            {
272
28.9M
                ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
273
28.9M
            }
274
7.22M
        }
275
1.80M
    }
276
1.99M
}
277
278
/*!
279
******************************************************************************
280
* \if Function name : ihevce_cu_mode_decide \endif
281
*
282
* \brief
283
*    Coding Unit mode decide function. Performs RD opt and decides the best mode
284
*
285
* \param[in] ps_ctxt : pointer to enc_loop module
286
* \param[in] ps_cu_prms  : pointer to coding unit params (position, buffer pointers)
287
* \param[in] ps_cu_analyse : pointer to cu analyse
288
* \param[out] ps_cu_final : pointer to cu final
289
* \param[out] pu1_ecd_data :pointer to store coeff data for ECD
290
* \param[out]ps_row_col_pu; colocated pu buffer pointer
291
* \param[out]pu1_row_pu_map; colocated pu map buffer pointer
292
* \param[in]col_start_pu_idx : pu index start value
293
*
294
* \return
295
*    None
296
*
297
*
298
* \author
299
*  Ittiam
300
*
301
*****************************************************************************
302
*/
303
LWORD64 ihevce_cu_mode_decide(
304
    ihevce_enc_loop_ctxt_t *ps_ctxt,
305
    enc_loop_cu_prms_t *ps_cu_prms,
306
    cu_analyse_t *ps_cu_analyse,
307
    final_mode_state_t *ps_final_mode_state,
308
    UWORD8 *pu1_ecd_data,
309
    pu_col_mv_t *ps_col_pu,
310
    UWORD8 *pu1_col_pu_map,
311
    WORD32 col_start_pu_idx)
312
1.99M
{
313
1.99M
    enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
314
1.99M
    cu_nbr_prms_t s_cu_nbr_prms;
315
1.99M
    inter_cu_mode_info_t s_inter_cu_mode_info;
316
1.99M
    cu_inter_cand_t *ps_best_inter_cand = NULL;
317
1.99M
    UWORD8 *pu1_cu_top;
318
1.99M
    UWORD8 *pu1_cu_top_left;
319
1.99M
    UWORD8 *pu1_cu_left;
320
1.99M
    UWORD8 *pu1_final_recon = NULL;
321
1.99M
    UWORD8 *pu1_curr_src = NULL;
322
1.99M
    void *pv_curr_src = NULL;
323
1.99M
    void *pv_cu_left = NULL;
324
1.99M
    void *pv_cu_top = NULL;
325
1.99M
    void *pv_cu_top_left = NULL;
326
327
1.99M
    WORD32 cu_left_stride = 0;
328
1.99M
    WORD32 ctr;
329
1.99M
    WORD32 rd_opt_best_idx;
330
1.99M
    LWORD64 rd_opt_least_cost;
331
1.99M
    WORD32 rd_opt_curr_idx;
332
1.99M
    WORD32 num_4x4_in_ctb;
333
1.99M
    WORD32 nbr_4x4_left_strd = 0;
334
335
1.99M
    nbr_4x4_t *ps_topleft_nbr_4x4;
336
1.99M
    nbr_4x4_t *ps_left_nbr_4x4 = NULL;
337
1.99M
    nbr_4x4_t *ps_top_nbr_4x4 = NULL;
338
1.99M
    nbr_4x4_t *ps_curr_nbr_4x4;
339
1.99M
    WORD32 enable_intra_eval_flag;
340
1.99M
    WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
341
1.99M
    WORD32 curr_cu_pos_in_row;
342
1.99M
    WORD32 cu_top_right_offset;
343
1.99M
    WORD32 cu_top_right_dep_pos;
344
1.99M
    WORD32 i4_ctb_x_off, i4_ctb_y_off;
345
346
1.99M
    UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
347
1.99M
    (void)ps_final_mode_state;
348
    /* default init */
349
1.99M
    rd_opt_least_cost = MAX_COST_64;
350
1.99M
    ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
351
1.99M
    ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
352
353
    /* Zero cbf tool is enabled by default for all presets */
354
1.99M
    ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
355
356
1.99M
    rd_opt_best_idx = 1;
357
1.99M
    rd_opt_curr_idx = 0;
358
1.99M
    enable_intra_eval_flag = 1;
359
360
    /* CU params in enc ctxt*/
361
1.99M
    ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
362
1.99M
    ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
363
1.99M
    ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
364
365
1.99M
    num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
366
1.99M
    ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
367
1.99M
    ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
368
1.99M
    ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
369
370
    /* CB and Cr are pixel interleaved */
371
1.99M
    s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
372
373
1.99M
    s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
374
375
1.99M
    if(!ps_ctxt->u1_is_input_data_hbd)
376
1.99M
    {
377
        /* --------------------------------------- */
378
        /* ----- Luma Pointers Derivation -------- */
379
        /* --------------------------------------- */
380
381
        /* based on CU position derive the pointers */
382
1.99M
        pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
383
384
1.99M
        pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
385
386
1.99M
        pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
387
388
1.99M
        pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
389
390
1.99M
        pv_curr_src = pu1_curr_src;
391
392
        /* CU left */
393
1.99M
        if(0 == ps_cu_analyse->b3_cu_pos_x)
394
594k
        {
395
            /* CTB boundary */
396
594k
            pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
397
594k
            pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
398
594k
            cu_left_stride = 1;
399
400
594k
            ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
401
594k
            ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
402
594k
            nbr_4x4_left_strd = 1;
403
594k
        }
404
1.40M
        else
405
1.40M
        {
406
            /* inside CTB */
407
1.40M
            pu1_cu_left = pu1_final_recon - 1;
408
1.40M
            cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
409
410
1.40M
            ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
411
1.40M
            nbr_4x4_left_strd = num_4x4_in_ctb;
412
1.40M
        }
413
414
1.99M
        pv_cu_left = pu1_cu_left;
415
416
        /* CU top */
417
1.99M
        if(0 == ps_cu_analyse->b3_cu_pos_y)
418
579k
        {
419
            /* CTB boundary */
420
579k
            pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
421
579k
            pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
422
579k
            pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
423
424
579k
            ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
425
579k
            ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
426
579k
            ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
427
579k
        }
428
1.41M
        else
429
1.41M
        {
430
            /* inside CTB */
431
1.41M
            pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
432
433
1.41M
            ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
434
1.41M
        }
435
436
1.99M
        pv_cu_top = pu1_cu_top;
437
438
        /* CU top left */
439
1.99M
        if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
440
405k
        {
441
            /* left ctb boundary but not first row */
442
405k
            pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
443
405k
            ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
444
405k
        }
445
1.58M
        else
446
1.58M
        {
447
            /* rest all cases topleft is top -1 */
448
1.58M
            pu1_cu_top_left = pu1_cu_top - 1;
449
1.58M
            ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
450
1.58M
        }
451
452
1.99M
        pv_cu_top_left = pu1_cu_top_left;
453
454
        /* Store the CU nbr information in the ctxt for final reconstruction fun. */
455
1.99M
        s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
456
1.99M
        s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
457
1.99M
        s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
458
1.99M
        s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
459
1.99M
        s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
460
1.99M
        s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
461
1.99M
        s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
462
1.99M
        s_cu_nbr_prms.cu_left_stride = cu_left_stride;
463
464
        /* ------------------------------------------------------------ */
465
        /* -- Initialize the number of neigbour skip cu count for rdo --*/
466
        /* ------------------------------------------------------------ */
467
1.99M
        {
468
1.99M
            nbr_avail_flags_t s_nbr;
469
1.99M
            WORD32 i4_num_nbr_skip_cus = 0;
470
471
            /* get the neighbour availability flags for current cu  */
472
1.99M
            ihevce_get_nbr_intra(
473
1.99M
                &s_nbr,
474
1.99M
                ps_ctxt->pu1_ctb_nbr_map,
475
1.99M
                ps_ctxt->i4_nbr_map_strd,
476
1.99M
                (ps_cu_analyse->b3_cu_pos_x << 1),
477
1.99M
                (ps_cu_analyse->b3_cu_pos_y << 1),
478
1.99M
                (ps_cu_analyse->u1_cu_size >> 2));
479
1.99M
            if(s_nbr.u1_top_avail)
480
1.52M
            {
481
1.52M
                i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
482
1.52M
            }
483
484
1.99M
            if(s_nbr.u1_left_avail)
485
1.53M
            {
486
1.53M
                i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
487
1.53M
            }
488
1.99M
            ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
489
1.99M
                i4_num_nbr_skip_cus;
490
1.99M
            ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
491
1.99M
                i4_num_nbr_skip_cus;
492
1.99M
        }
493
494
        /* --------------------------------------- */
495
        /* --- Chroma Pointers Derivation -------- */
496
        /* --------------------------------------- */
497
498
        /* based on CU position derive the pointers */
499
1.99M
        s_chrm_cu_buf_prms.pu1_final_recon =
500
1.99M
            ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
501
502
1.99M
        s_chrm_cu_buf_prms.pu1_curr_src =
503
1.99M
            ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
504
505
1.99M
        s_chrm_cu_buf_prms.pu1_final_recon +=
506
1.99M
            ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
507
508
1.99M
        s_chrm_cu_buf_prms.pu1_curr_src +=
509
1.99M
            ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
510
511
        /* CU left */
512
1.99M
        if(0 == ps_cu_analyse->b3_cu_pos_x)
513
594k
        {
514
            /* CTB boundary */
515
594k
            s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
516
594k
            s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
517
594k
            s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
518
594k
        }
519
1.40M
        else
520
1.40M
        {
521
            /* inside CTB */
522
1.40M
            s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
523
1.40M
            s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
524
1.40M
        }
525
526
        /* CU top */
527
1.99M
        if(0 == ps_cu_analyse->b3_cu_pos_y)
528
579k
        {
529
            /* CTB boundary */
530
579k
            s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
531
579k
            s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
532
579k
            s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
533
579k
        }
534
1.41M
        else
535
1.41M
        {
536
            /* inside CTB */
537
1.41M
            s_chrm_cu_buf_prms.pu1_cu_top =
538
1.41M
                s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
539
1.41M
        }
540
541
        /* CU top left */
542
1.99M
        if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
543
405k
        {
544
            /* left ctb boundary but not first row */
545
405k
            s_chrm_cu_buf_prms.pu1_cu_top_left =
546
405k
                s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
547
405k
        }
548
1.58M
        else
549
1.58M
        {
550
            /* rest all cases topleft is top -2 */
551
1.58M
            s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
552
1.58M
        }
553
1.99M
    }
554
555
    /* Set Variables for Dep. Checking and Setting */
556
1.99M
    i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
557
558
1.99M
    i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
559
1.99M
    ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
560
561
    /* Set the pred pointer count for ME/intra to 0 to start */
562
1.99M
    ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
563
564
1.99M
    ASSERT(
565
1.99M
        (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
566
567
1.99M
    ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
568
1.99M
    s_inter_cu_mode_info.u1_num_inter_cands = 0;
569
1.99M
    s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
570
1.99M
    s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
571
572
1.99M
    ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
573
1.99M
    ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
574
1.99M
    ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
575
1.99M
    ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
576
1.99M
    ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
577
1.99M
    if(0 != ps_cu_analyse->u1_num_inter_cands)
578
1.07M
    {
579
1.07M
        ihevce_inter_cand_sifter_prms_t s_prms;
580
581
1.07M
        UWORD8 u1_enable_top_row_sync;
582
583
1.07M
        if(ps_ctxt->u1_disable_intra_eval)
584
20.1k
        {
585
20.1k
            u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
586
20.1k
        }
587
1.05M
        else
588
1.05M
        {
589
1.05M
            u1_enable_top_row_sync = 1;
590
1.05M
        }
591
592
1.07M
        if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
593
306k
        {
594
            /* Wait till top data is ready          */
595
            /* Currently checking till top right CU */
596
306k
            curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
597
598
306k
            if(i4_ctb_y_off == 0)
599
266k
            {
600
                /* No wait for 1st row */
601
266k
                cu_top_right_offset = -(MAX_CTB_SIZE);
602
266k
                {
603
266k
                    ihevce_tile_params_t *ps_col_tile_params =
604
266k
                        ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
605
266k
                         ps_ctxt->i4_tile_col_idx);
606
                    /* No wait for 1st row */
607
266k
                    cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
608
266k
                }
609
266k
                cu_top_right_dep_pos = 0;
610
266k
            }
611
39.7k
            else
612
39.7k
            {
613
39.7k
                cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
614
39.7k
                cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
615
39.7k
            }
616
617
306k
            if(0 == ps_cu_analyse->b3_cu_pos_y)
618
117k
            {
619
117k
                ihevce_dmgr_chk_row_row_sync(
620
117k
                    ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
621
117k
                    curr_cu_pos_in_row,
622
117k
                    cu_top_right_offset,
623
117k
                    cu_top_right_dep_pos,
624
117k
                    ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
625
117k
                    ps_ctxt->thrd_id);
626
117k
            }
627
306k
        }
628
629
1.07M
        if(ps_ctxt->i1_cu_qp_delta_enable)
630
495k
        {
631
495k
            ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, 4, 0);
632
495k
        }
633
634
1.07M
        s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
635
1.07M
        s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
636
1.07M
        s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
637
1.07M
        s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
638
1.07M
        s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
639
1.07M
        s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
640
1.07M
        s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
641
1.07M
        s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
642
1.07M
        s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
643
1.07M
        s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
644
1.07M
        s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
645
1.07M
        s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
646
1.07M
        s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
647
1.07M
        s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
648
1.07M
        s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
649
1.07M
        s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
650
1.07M
        s_prms.pv_src = pv_curr_src;
651
1.07M
        s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
652
1.07M
        s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
653
1.07M
        s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
654
1.07M
        s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
655
1.07M
        s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
656
1.07M
        s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
657
1.07M
        s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
658
1.07M
        s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
659
1.07M
        s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
660
1.07M
        s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
661
1.07M
        s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
662
1.07M
        s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
663
1.07M
        s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
664
1.07M
        s_prms.u1_use_merge_cand_from_top_row =
665
1.07M
            (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
666
1.07M
        s_prms.u1_merge_idx_cabac_model =
667
1.07M
            ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
668
#if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
669
        s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
670
        s_prms.u1_reuse_me_sad = 1;
671
#else
672
1.07M
        s_prms.u1_reuse_me_sad = 0;
673
1.07M
#endif
674
675
1.07M
        if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
676
229k
        {
677
229k
            if(ps_ctxt->i4_temporal_layer == 1)
678
82.3k
            {
679
82.3k
                s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
680
82.3k
            }
681
147k
            else
682
147k
            {
683
147k
                s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
684
147k
            }
685
229k
        }
686
848k
        else
687
848k
        {
688
848k
            s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
689
848k
        }
690
1.07M
        s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
691
692
1.07M
        if(s_prms.u1_is_cu_noisy)
693
0
        {
694
0
            s_prms.i4_lambda_qf =
695
0
                ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
696
0
        }
697
1.07M
        s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
698
699
1.07M
        s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
700
701
1.07M
        s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
702
1.07M
        ihevce_inter_cand_sifter(&s_prms);
703
1.07M
    }
704
1.99M
    if(u1_is_422)
705
0
    {
706
0
        UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
707
0
        UWORD8 u1_num_bufs_allocated;
708
709
0
        u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
710
0
            au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
711
712
0
        ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
713
714
0
        for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
715
0
            ctr++)
716
0
        {
717
0
            {
718
0
                ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
719
0
                    (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
720
0
            }
721
722
0
            ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
723
724
0
            ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
725
0
        }
726
727
0
        {
728
0
            ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
729
0
                (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
730
0
        }
731
732
0
        ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
733
734
0
        ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
735
0
    }
736
1.99M
    else
737
1.99M
    {
738
1.99M
        UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
739
1.99M
        UWORD8 u1_num_bufs_allocated;
740
741
1.99M
        u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
742
1.99M
            au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
743
744
1.99M
        ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
745
746
7.97M
        for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
747
5.98M
            ctr++)
748
5.98M
        {
749
5.98M
            {
750
5.98M
                ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
751
5.98M
                    (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
752
5.98M
            }
753
754
5.98M
            ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
755
756
5.98M
            ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
757
5.98M
        }
758
1.99M
    }
759
760
1.99M
    ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
761
762
1.99M
    ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
763
1.99M
    ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
764
1.99M
    ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
765
1.99M
    ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
766
1.99M
    ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
767
1.99M
    ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
768
1.99M
    ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
769
1.99M
    ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
770
    /* --------------------------------------- */
771
    /* ------ Inter RD OPT stage ------------- */
772
    /* --------------------------------------- */
773
1.99M
    if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
774
1.07M
    {
775
1.07M
        UWORD8 u1_ssd_bit_info_ctr = 0;
776
777
        /* -- run a loop over all Inter rd opt cands ------ */
778
4.32M
        for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
779
3.24M
        {
780
3.24M
            cu_inter_cand_t *ps_inter_cand;
781
782
3.24M
            LWORD64 rd_opt_cost = 0;
783
784
3.24M
            ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
785
786
3.24M
            if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
787
831k
               (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
788
2.47M
            {
789
2.47M
                ps_inter_cand->b1_eval_mark = 1;
790
2.47M
            }
791
792
            /****************************************************************/
793
            /* This check is only valid for derived instances.              */
794
            /* check if this mode needs to be evaluated or not.             */
795
            /* if it is a skip candidate, go ahead and evaluate it even if  */
796
            /* it has not been marked while sorting.                        */
797
            /****************************************************************/
798
3.24M
            if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
799
0
            {
800
0
                continue;
801
0
            }
802
803
            /* RDOPT related copies and settings */
804
3.24M
            ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
805
806
            /* RDOPT copy States : Prev Cu best to current init */
807
3.24M
            COPY_CABAC_STATES(
808
3.24M
                &ps_ctxt->au1_rdopt_init_ctxt_models[0],
809
3.24M
                &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
810
3.24M
                IHEVC_CAB_CTXT_END * sizeof(UWORD8));
811
            /* MVP ,MVD calc and Motion compensation */
812
3.24M
            rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
813
3.24M
                ps_ctxt,
814
3.24M
                ps_inter_cand,
815
3.24M
                ps_cu_analyse->u1_cu_size,
816
3.24M
                ps_cu_analyse->b3_cu_pos_x,
817
3.24M
                ps_cu_analyse->b3_cu_pos_y,
818
3.24M
                ps_left_nbr_4x4,
819
3.24M
                ps_top_nbr_4x4,
820
3.24M
                ps_topleft_nbr_4x4,
821
3.24M
                nbr_4x4_left_strd,
822
3.24M
                rd_opt_curr_idx);
823
824
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
825
            if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
826
            {
827
                ihevce_determine_tu_tree_distribution(
828
                    ps_inter_cand,
829
                    (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
830
                    ps_ctxt->ai2_scratch,
831
                    (UWORD8 *)pv_curr_src,
832
                    ps_cu_prms->i4_luma_src_stride,
833
                    ps_ctxt->i4_satd_lamda,
834
                    LAMBDA_Q_SHIFT,
835
                    ps_cu_analyse->u1_cu_size,
836
                    ps_ctxt->u1_max_tr_depth);
837
            }
838
#endif
839
#if DISABLE_ZERO_ZBF_IN_INTER
840
            ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
841
#else
842
3.24M
            ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
843
3.24M
#endif
844
            /* Recon loop with different TUs based on partition type*/
845
3.24M
            rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
846
3.24M
                ps_ctxt,
847
3.24M
                ps_cu_prms,
848
3.24M
                pv_curr_src,
849
3.24M
                ps_cu_analyse->u1_cu_size,
850
3.24M
                ps_cu_analyse->b3_cu_pos_x,
851
3.24M
                ps_cu_analyse->b3_cu_pos_y,
852
3.24M
                rd_opt_curr_idx,
853
3.24M
                &s_chrm_cu_buf_prms,
854
3.24M
                ps_inter_cand,
855
3.24M
                ps_cu_analyse,
856
3.24M
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
857
3.24M
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
858
2.71M
                                          (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
859
2.71M
                                             100.0);
860
861
3.24M
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
862
3.24M
            if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
863
0
            {
864
0
                ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
865
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
866
0
                    ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
867
0
            }
868
3.24M
#endif
869
870
            /* based on the rd opt cost choose the best and current index */
871
3.24M
            if(rd_opt_cost < rd_opt_least_cost)
872
1.83M
            {
873
                /* swap the best and current indx */
874
1.83M
                rd_opt_best_idx = !rd_opt_best_idx;
875
1.83M
                rd_opt_curr_idx = !rd_opt_curr_idx;
876
877
1.83M
                ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
878
1.83M
                rd_opt_least_cost = rd_opt_cost;
879
1.83M
                i4_best_cu_qp = ps_ctxt->i4_cu_qp;
880
881
                /* Store the best Inter cand. for final_recon function */
882
1.83M
                ps_best_inter_cand = ps_inter_cand;
883
1.83M
            }
884
885
            /* set the neighbour map to 0 */
886
3.24M
            ihevce_set_nbr_map(
887
3.24M
                ps_ctxt->pu1_ctb_nbr_map,
888
3.24M
                ps_ctxt->i4_nbr_map_strd,
889
3.24M
                (ps_cu_analyse->b3_cu_pos_x << 1),
890
3.24M
                (ps_cu_analyse->b3_cu_pos_y << 1),
891
3.24M
                (ps_cu_analyse->u1_cu_size >> 2),
892
3.24M
                0);
893
894
3.24M
        } /* end of loop for all the Inter RD OPT cand */
895
1.07M
    }
896
    /* --------------------------------------- */
897
    /* ---- Conditional Eval of Intra -------- */
898
    /* --------------------------------------- */
899
1.99M
    {
900
1.99M
        enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
901
1.99M
        ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
902
903
        /* check if inter candidates are valid */
904
1.99M
        if(0 != ps_cu_analyse->u1_num_inter_cands)
905
1.07M
        {
906
            /* if skip or no residual inter candidates has won then */
907
            /* evaluation of intra candidates is disabled           */
908
1.07M
            if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
909
847k
               (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
910
842k
            {
911
842k
                enable_intra_eval_flag = 0;
912
842k
            }
913
1.07M
        }
914
        /* Disable Intra Gating for HIGH QUALITY PRESET */
915
1.99M
#if !ENABLE_INTRA_GATING_FOR_HQ
916
1.99M
        if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
917
1.28M
        {
918
1.28M
            enable_intra_eval_flag = 1;
919
920
1.28M
#if DISABLE_LARGE_INTRA_PQ
921
1.28M
            if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
922
0
               (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
923
0
            {
924
0
                if(ps_cu_analyse->u1_cu_size > 16)
925
0
                {
926
                    /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
927
0
                    enable_intra_eval_flag = 0;
928
0
                }
929
0
                else if(ps_cu_analyse->u1_cu_size == 16)
930
0
                {
931
                    /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
932
0
                    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
933
0
                }
934
0
            }
935
1.28M
#endif
936
1.28M
        }
937
1.99M
#endif
938
1.99M
    }
939
940
    /* --------------------------------------- */
941
    /* ------ Intra RD OPT stage ------------- */
942
    /* --------------------------------------- */
943
944
    /* -- run a loop over all Intra rd opt cands ------ */
945
1.99M
    if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
946
1.71M
    {
947
1.71M
        LWORD64 rd_opt_cost;
948
1.71M
        WORD32 end_flag = 0;
949
1.71M
        WORD32 cu_eval_done = 0;
950
1.71M
        WORD32 subcu_eval_done = 0;
951
1.71M
        WORD32 subpu_eval_done = 0;
952
1.71M
        WORD32 max_trans_size;
953
1.71M
        WORD32 sync_wait_stride;
954
1.71M
        max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
955
1.71M
        sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
956
957
1.71M
        if(!ps_ctxt->u1_use_top_at_ctb_boundary)
958
431k
        {
959
            /* Wait till top data is ready          */
960
            /* Currently checking till top right CU */
961
431k
            curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
962
963
431k
            if(i4_ctb_y_off == 0)
964
299k
            {
965
                /* No wait for 1st row */
966
299k
                cu_top_right_offset = -(MAX_CTB_SIZE);
967
299k
                {
968
299k
                    ihevce_tile_params_t *ps_col_tile_params =
969
299k
                        ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
970
299k
                         ps_ctxt->i4_tile_col_idx);
971
                    /* No wait for 1st row */
972
299k
                    cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
973
299k
                }
974
299k
                cu_top_right_dep_pos = 0;
975
299k
            }
976
131k
            else
977
131k
            {
978
131k
                cu_top_right_offset = sync_wait_stride;
979
131k
                cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
980
131k
            }
981
982
431k
            if(0 == ps_cu_analyse->b3_cu_pos_y)
983
131k
            {
984
131k
                ihevce_dmgr_chk_row_row_sync(
985
131k
                    ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
986
131k
                    curr_cu_pos_in_row,
987
131k
                    cu_top_right_offset,
988
131k
                    cu_top_right_dep_pos,
989
131k
                    ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
990
131k
                    ps_ctxt->thrd_id);
991
131k
            }
992
431k
        }
993
1.71M
        ctr = 0;
994
995
        /* Zero cbf tool is disabled for intra CUs */
996
#if ENABLE_ZERO_CBF_IN_INTRA
997
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
998
#else
999
1.71M
        ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
1000
1.71M
#endif
1001
1002
        /* Intra Mode gating based on MPM cand list and encoder quality preset */
1003
1.71M
        if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
1004
115k
        {
1005
115k
            ihevce_mpm_idx_based_filter_RDOPT_cand(
1006
115k
                ps_ctxt,
1007
115k
                ps_cu_analyse,
1008
115k
                ps_left_nbr_4x4,
1009
115k
                ps_top_nbr_4x4,
1010
115k
                &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
1011
115k
                &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
1012
1013
115k
            ihevce_mpm_idx_based_filter_RDOPT_cand(
1014
115k
                ps_ctxt,
1015
115k
                ps_cu_analyse,
1016
115k
                ps_left_nbr_4x4,
1017
115k
                ps_top_nbr_4x4,
1018
115k
                &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
1019
115k
                &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
1020
115k
        }
1021
1022
        /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
1023
1.71M
        if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
1024
1.28M
        {
1025
            /* For cu_size = 64, there won't be any TU_EQ_CU case */
1026
1.28M
            if(64 != ps_cu_analyse->u1_cu_size)
1027
1.27M
            {
1028
                /* RDOPT copy States : Prev Cu best to current init */
1029
1.27M
                COPY_CABAC_STATES(
1030
1.27M
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1031
1.27M
                    &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1032
1.27M
                    IHEVC_CAB_CTXT_END);
1033
1034
                /* RDOPT related copies and settings */
1035
1.27M
                ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1036
1037
                /* Calc. best SATD mode for TU_EQ_CU case */
1038
1.27M
                ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1039
1.27M
                    ps_ctxt,
1040
1.27M
                    &s_chrm_cu_buf_prms,
1041
1.27M
                    ps_cu_analyse,
1042
1.27M
                    rd_opt_curr_idx,
1043
1.27M
                    TU_EQ_CU,
1044
1.27M
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1045
1.27M
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1046
1.15M
                                              (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1047
1.15M
                                                 100.0,
1048
1.27M
                    ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1049
1050
1.27M
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1051
1.27M
                if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1052
0
                {
1053
0
                    ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1054
0
                    ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1055
0
                        ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1056
0
                }
1057
1.27M
#endif
1058
1.27M
            }
1059
1060
            /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
1061
            TU_EQ_CU_DIV2 case */
1062
1063
1.28M
            if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
1064
1.28M
                255) &&
1065
191k
               (8 != ps_cu_analyse->u1_cu_size))
1066
191k
            {
1067
                /* RDOPT copy States : Prev Cu best to current init */
1068
191k
                COPY_CABAC_STATES(
1069
191k
                    &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1070
191k
                    &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1071
191k
                    IHEVC_CAB_CTXT_END);
1072
1073
                /* RDOPT related copies and settings */
1074
191k
                ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1075
1076
                /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
1077
191k
                ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
1078
191k
                    ps_ctxt,
1079
191k
                    &s_chrm_cu_buf_prms,
1080
191k
                    ps_cu_analyse,
1081
191k
                    rd_opt_curr_idx,
1082
191k
                    TU_EQ_CU_DIV2,
1083
191k
                    !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1084
191k
                                           : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1085
188k
                                              (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1086
188k
                                                 100.0,
1087
191k
                    ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
1088
1089
191k
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1090
191k
                if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1091
0
                {
1092
0
                    ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1093
0
                    ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1094
0
                        ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1095
0
                }
1096
191k
#endif
1097
191k
            }
1098
1.28M
        }
1099
1100
8.48M
        while(0 == end_flag)
1101
8.48M
        {
1102
8.48M
            UWORD8 *pu1_mode = NULL;
1103
8.48M
            WORD32 curr_func_mode = 0;
1104
8.48M
            void *pv_pred;
1105
1106
8.48M
            ASSERT(ctr < 36);
1107
1108
            /* TU equal to CU size evaluation of different modes */
1109
8.48M
            if(0 == cu_eval_done)
1110
6.47M
            {
1111
                /* check if the all the modes have been evaluated */
1112
6.47M
                if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
1113
1.66M
                {
1114
1.66M
                    cu_eval_done = 1;
1115
1.66M
                    ctr = 0;
1116
1.66M
                }
1117
4.80M
                else if(
1118
4.80M
                    (1 == ctr) &&
1119
1.69M
                    ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1120
1.63M
                     (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1121
232k
                    (ps_ctxt->i1_slice_type != ISLICE))
1122
47.8k
                {
1123
47.8k
                    ctr = 0;
1124
47.8k
                    cu_eval_done = 1;
1125
47.8k
                    subcu_eval_done = 1;
1126
47.8k
                    subpu_eval_done = 1;
1127
47.8k
                }
1128
4.76M
                else
1129
4.76M
                {
1130
4.76M
                    if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
1131
55.8k
                    {
1132
55.8k
                        ctr++;
1133
55.8k
                        continue;
1134
55.8k
                    }
1135
1136
4.70M
                    pu1_mode =
1137
4.70M
                        &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
1138
4.70M
                    ctr++;
1139
4.70M
                    curr_func_mode = TU_EQ_CU;
1140
4.70M
                }
1141
6.47M
            }
1142
            /* Sub CU (NXN) mode evaluation of different pred modes */
1143
8.42M
            if((0 == subpu_eval_done) && (1 == cu_eval_done))
1144
2.12M
            {
1145
                /*For NxN modes evaluation all candidates for all PU parts are evaluated */
1146
                /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
1147
2.12M
                {
1148
2.12M
                    pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
1149
1150
2.12M
                    curr_func_mode = TU_EQ_SUBCU;
1151
                    /* check if the any modes have to be evaluated */
1152
2.12M
                    if(255 == *pu1_mode)
1153
1.20M
                    {
1154
1.20M
                        subpu_eval_done = 1;
1155
1.20M
                        ctr = 0;
1156
1.20M
                    }
1157
921k
                    else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
1158
460k
                    {
1159
460k
                        subpu_eval_done = 1;
1160
460k
                        ctr = 0;
1161
460k
                    }
1162
460k
                    else
1163
460k
                    {
1164
460k
                        ctr++;
1165
460k
                    }
1166
2.12M
                }
1167
2.12M
            }
1168
1169
            /* TU size equal to CU div2 mode evaluation of different pred modes */
1170
8.42M
            if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
1171
3.21M
            {
1172
                /* check if the all the modes have been evaluated */
1173
3.21M
                if(255 ==
1174
3.21M
                   ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
1175
1.66M
                {
1176
1.66M
                    subcu_eval_done = 1;
1177
1.66M
                }
1178
1.54M
                else if(
1179
1.54M
                    (1 == ctr) &&
1180
551k
                    ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
1181
507k
                     (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
1182
144k
                    (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
1183
6
                {
1184
6
                    subcu_eval_done = 1;
1185
6
                }
1186
1.54M
                else
1187
1.54M
                {
1188
1.54M
                    if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
1189
55.8k
                    {
1190
55.8k
                        ctr++;
1191
55.8k
                        continue;
1192
55.8k
                    }
1193
1194
1.48M
                    pu1_mode = &ps_cu_analyse->s_cu_intra_cand
1195
1.48M
                                    .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
1196
1197
1.48M
                    ctr++;
1198
1.48M
                    curr_func_mode = TU_EQ_CU_DIV2;
1199
1.48M
                }
1200
3.21M
            }
1201
1202
            /* check if all CU option have been evalueted */
1203
8.36M
            if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
1204
1.71M
            {
1205
1.71M
                break;
1206
1.71M
            }
1207
1208
            /* RDOPT related copies and settings */
1209
6.65M
            ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
1210
1211
            /* Assign ME/Intra pred buf. to the current intra cand. since we
1212
            are storing pred data for final_reon function */
1213
6.65M
            {
1214
6.65M
                pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
1215
6.65M
            }
1216
1217
            /* RDOPT copy States : Prev Cu best to current init */
1218
6.65M
            COPY_CABAC_STATES(
1219
6.65M
                &ps_ctxt->au1_rdopt_init_ctxt_models[0],
1220
6.65M
                &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
1221
6.65M
                IHEVC_CAB_CTXT_END);
1222
1223
            /* call the function which performs the normative Intra encode */
1224
6.65M
            rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
1225
6.65M
                ps_ctxt,
1226
6.65M
                ps_cu_prms,
1227
6.65M
                pv_pred,
1228
6.65M
                ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
1229
6.65M
                &s_chrm_cu_buf_prms,
1230
6.65M
                pu1_mode,
1231
6.65M
                ps_cu_analyse,
1232
6.65M
                pv_curr_src,
1233
6.65M
                pv_cu_left,
1234
6.65M
                pv_cu_top,
1235
6.65M
                pv_cu_top_left,
1236
6.65M
                ps_left_nbr_4x4,
1237
6.65M
                ps_top_nbr_4x4,
1238
6.65M
                nbr_4x4_left_strd,
1239
6.65M
                cu_left_stride,
1240
6.65M
                rd_opt_curr_idx,
1241
6.65M
                curr_func_mode,
1242
6.65M
                !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
1243
6.65M
                                       : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
1244
6.24M
                                          (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
1245
6.24M
                                             100.0);
1246
1247
6.65M
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1248
6.65M
            if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1249
0
            {
1250
0
                ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1251
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1252
0
                    ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1253
0
            }
1254
6.65M
#endif
1255
1256
            /* based on the rd opt cost choose the best and current index */
1257
6.65M
            if(rd_opt_cost < rd_opt_least_cost)
1258
1.70M
            {
1259
                /* swap the best and current indx */
1260
1.70M
                rd_opt_best_idx = !rd_opt_best_idx;
1261
1.70M
                rd_opt_curr_idx = !rd_opt_curr_idx;
1262
1.70M
                i4_best_cu_qp = ps_ctxt->i4_cu_qp;
1263
1264
1.70M
                rd_opt_least_cost = rd_opt_cost;
1265
1.70M
                ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
1266
1.70M
            }
1267
1268
6.65M
            if((TU_EQ_SUBCU == curr_func_mode) &&
1269
460k
               (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
1270
374k
               (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
1271
214k
            {
1272
214k
                UWORD8 au1_tu_eq_cu_div2_modes[4];
1273
214k
                UWORD8 au1_freq_of_mode[4];
1274
1275
214k
                if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
1276
79.7k
                {
1277
79.7k
                    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1278
79.7k
                        255;  //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
1279
79.7k
                    ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1280
79.7k
                        255;
1281
79.7k
                }
1282
134k
                else
1283
134k
                {
1284
134k
                    WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
1285
134k
                        ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
1286
134k
                        au1_tu_eq_cu_div2_modes,
1287
134k
                        au1_freq_of_mode,
1288
134k
                        4);
1289
1290
134k
                    if(2 == i4_num_clusters)
1291
51.9k
                    {
1292
51.9k
                        if(au1_freq_of_mode[0] == 3)
1293
22.9k
                        {
1294
22.9k
                            ps_cu_analyse->s_cu_intra_cand
1295
22.9k
                                .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1296
22.9k
                                au1_tu_eq_cu_div2_modes[0];
1297
22.9k
                            ps_cu_analyse->s_cu_intra_cand
1298
22.9k
                                .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1299
22.9k
                        }
1300
28.9k
                        else if(au1_freq_of_mode[1] == 3)
1301
4.93k
                        {
1302
4.93k
                            ps_cu_analyse->s_cu_intra_cand
1303
4.93k
                                .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1304
4.93k
                                au1_tu_eq_cu_div2_modes[1];
1305
4.93k
                            ps_cu_analyse->s_cu_intra_cand
1306
4.93k
                                .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
1307
4.93k
                        }
1308
24.0k
                        else
1309
24.0k
                        {
1310
24.0k
                            ps_cu_analyse->s_cu_intra_cand
1311
24.0k
                                .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
1312
24.0k
                                au1_tu_eq_cu_div2_modes[0];
1313
24.0k
                            ps_cu_analyse->s_cu_intra_cand
1314
24.0k
                                .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
1315
24.0k
                                au1_tu_eq_cu_div2_modes[1];
1316
24.0k
                            ps_cu_analyse->s_cu_intra_cand
1317
24.0k
                                .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
1318
24.0k
                        }
1319
51.9k
                    }
1320
134k
                }
1321
214k
            }
1322
1323
            /* set the neighbour map to 0 */
1324
6.65M
            ihevce_set_nbr_map(
1325
6.65M
                ps_ctxt->pu1_ctb_nbr_map,
1326
6.65M
                ps_ctxt->i4_nbr_map_strd,
1327
6.65M
                (ps_cu_analyse->b3_cu_pos_x << 1),
1328
6.65M
                (ps_cu_analyse->b3_cu_pos_y << 1),
1329
6.65M
                (ps_cu_analyse->u1_cu_size >> 2),
1330
6.65M
                0);
1331
6.65M
        }
1332
1333
1.71M
    } /* end of Intra RD OPT cand evaluation */
1334
1335
1.99M
    ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
1336
1.99M
    ps_ctxt->i4_cu_qp = i4_best_cu_qp;
1337
1.99M
    ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
1338
1339
    /* --------------------------------------- */
1340
    /* --------Final mode Recon ---------- */
1341
    /* --------------------------------------- */
1342
1.99M
    {
1343
1.99M
        enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1344
1.99M
        void *pv_final_pred = NULL;
1345
1.99M
        WORD32 final_pred_strd = 0;
1346
1.99M
        void *pv_final_pred_chrm = NULL;
1347
1.99M
        WORD32 final_pred_strd_chrm = 0;
1348
1.99M
        WORD32 packed_pred_mode;
1349
1350
1.99M
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1351
1.99M
        if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1352
1.18M
        {
1353
1.18M
            pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1354
1.18M
        }
1355
#else
1356
        pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
1357
#endif
1358
1359
1.99M
        ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1360
1.99M
        packed_pred_mode =
1361
1.99M
            ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
1362
1363
1.99M
        if(!ps_ctxt->u1_is_input_data_hbd)
1364
1.99M
        {
1365
1.99M
            if(ps_enc_loop_bestprms->u1_intra_flag)
1366
995k
            {
1367
995k
                pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
1368
995k
                final_pred_strd =
1369
995k
                    ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
1370
995k
            }
1371
999k
            else
1372
999k
            {
1373
999k
                pv_final_pred = ps_best_inter_cand->pu1_pred_data;
1374
999k
                final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
1375
999k
            }
1376
1377
1.99M
            pv_final_pred_chrm =
1378
1.99M
                ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
1379
1.99M
                rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
1380
1.99M
                                   (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
1381
1.99M
            final_pred_strd_chrm =
1382
1.99M
                ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
1383
1.99M
        }
1384
1385
1.99M
        ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
1386
1387
1.99M
        {
1388
1.99M
            final_mode_process_prms_t s_prms;
1389
1390
1.99M
            void *pv_cu_luma_recon;
1391
1.99M
            void *pv_cu_chroma_recon;
1392
1.99M
            WORD32 luma_stride, chroma_stride;
1393
1394
1.99M
            if(!ps_ctxt->u1_is_input_data_hbd)
1395
1.99M
            {
1396
1.99M
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1397
1.99M
                if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
1398
1.18M
                {
1399
1.18M
                    pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1400
1.18M
                    pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1401
1.18M
                    luma_stride = ps_cu_analyse->u1_cu_size;
1402
1.18M
                    chroma_stride = ps_cu_analyse->u1_cu_size;
1403
1.18M
                }
1404
814k
                else
1405
814k
                {
1406
                    /* based on CU position derive the luma pointers */
1407
814k
                    pv_cu_luma_recon = pu1_final_recon;
1408
1409
                    /* based on CU position derive the chroma pointers */
1410
814k
                    pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
1411
1412
814k
                    luma_stride = ps_cu_prms->i4_luma_recon_stride;
1413
1414
814k
                    chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
1415
814k
                }
1416
#else
1417
                pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
1418
                pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
1419
                luma_stride = ps_cu_analyse->u1_cu_size;
1420
                chroma_stride = ps_cu_analyse->u1_cu_size;
1421
#endif
1422
1423
1.99M
                s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
1424
1.99M
                s_prms.ps_best_inter_cand = ps_best_inter_cand;
1425
1.99M
                s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
1426
1.99M
                s_prms.packed_pred_mode = packed_pred_mode;
1427
1.99M
                s_prms.rd_opt_best_idx = rd_opt_best_idx;
1428
1.99M
                s_prms.pv_src = pu1_curr_src;
1429
1.99M
                s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
1430
1.99M
                s_prms.pv_pred = pv_final_pred;
1431
1.99M
                s_prms.pred_strd = final_pred_strd;
1432
1.99M
                s_prms.pv_pred_chrm = pv_final_pred_chrm;
1433
1.99M
                s_prms.pred_chrm_strd = final_pred_strd_chrm;
1434
1.99M
                s_prms.pu1_final_ecd_data = pu1_ecd_data;
1435
1.99M
                s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
1436
1.99M
                s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
1437
1.99M
                s_prms.pv_luma_recon = pv_cu_luma_recon;
1438
1.99M
                s_prms.recon_luma_strd = luma_stride;
1439
1.99M
                s_prms.pv_chrm_recon = pv_cu_chroma_recon;
1440
1.99M
                s_prms.recon_chrma_strd = chroma_stride;
1441
1.99M
                s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
1442
1.99M
                s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
1443
1.99M
                s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
1444
1.99M
                s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1445
1.99M
                s_prms.u1_will_cabac_state_change = 1;
1446
1.99M
                s_prms.u1_recompute_sbh_and_rdoq = 0;
1447
1.99M
                s_prms.u1_is_first_pass = 1;
1448
1.99M
            }
1449
1450
1.99M
#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
1451
1.99M
            s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
1452
1.99M
                                        ? ps_cu_prms->u1_is_cu_noisy
1453
1.99M
                                        : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
1454
1.99M
#endif
1455
1456
1.99M
            ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
1457
1458
1.99M
#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
1459
1.99M
            if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
1460
0
            {
1461
0
                ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
1462
0
                ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
1463
0
                    ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
1464
0
            }
1465
1.99M
#endif
1466
1.99M
        }
1467
1.99M
    }
1468
1469
    /* --------------------------------------- */
1470
    /* --------Populate CU out prms ---------- */
1471
    /* --------------------------------------- */
1472
1.99M
    {
1473
1.99M
        enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
1474
1.99M
        UWORD8 *pu1_pu_map;
1475
1.99M
        ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1476
1477
        /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
1478
        /* then it has to be coded as skip CU */
1479
1.99M
        if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
1480
1.76M
           (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
1481
841k
           (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
1482
535k
        {
1483
535k
            ps_enc_loop_bestprms->u1_skip_flag = 1;
1484
535k
        }
1485
1486
        /* update number PUs in CU */
1487
1.99M
        ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
1488
1489
        /* ---- populate the colocated pu map index --- */
1490
4.04M
        for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
1491
2.04M
        {
1492
2.04M
            WORD32 i;
1493
2.04M
            WORD32 vert_ht;
1494
2.04M
            WORD32 horz_wd;
1495
1496
2.04M
            if(ps_enc_loop_bestprms->u1_intra_flag)
1497
995k
            {
1498
995k
                ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
1499
995k
                vert_ht = ps_cu_analyse->u1_cu_size >> 2;
1500
995k
                horz_wd = ps_cu_analyse->u1_cu_size >> 2;
1501
995k
            }
1502
1.04M
            else
1503
1.04M
            {
1504
1.04M
                vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
1505
1.04M
                horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
1506
1.04M
            }
1507
1508
2.04M
            pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
1509
2.04M
            pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
1510
1511
10.0M
            for(i = 0; i < vert_ht; i++)
1512
7.99M
            {
1513
7.99M
                memset(pu1_pu_map, col_start_pu_idx, horz_wd);
1514
7.99M
                pu1_pu_map += num_4x4_in_ctb;
1515
7.99M
            }
1516
            /* increment the index */
1517
2.04M
            col_start_pu_idx++;
1518
2.04M
        }
1519
        /* ---- copy the colocated PUs to frm pu ----- */
1520
1.99M
        memcpy(
1521
1.99M
            ps_col_pu,
1522
1.99M
            &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
1523
1.99M
            ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
1524
1525
        /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
1526
1.99M
        {
1527
1.99M
            entropy_context_t *ps_entropy_ctxt;
1528
1529
1.99M
            WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
1530
1531
1.99M
            WORD32 log2_min_cu_qp_delta_size;
1532
1.99M
            UWORD32 block_addr_align;
1533
1.99M
            ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
1534
1535
1.99M
            log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
1536
1.99M
            diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
1537
1538
1.99M
            log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
1539
1.99M
            block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
1540
1541
1.99M
            ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
1542
1.99M
            ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
1543
            /*Update the Qp value used. It will not have a valid value iff
1544
            current CU is (skipped/no_cbf). In that case the Qp needed for
1545
            deblocking is calculated from top/left/previous coded CU*/
1546
1547
1.99M
            ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
1548
1549
1.99M
            if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
1550
1.99M
               ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
1551
1.99M
            {
1552
1.99M
                ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
1553
1.99M
            }
1554
0
            else
1555
0
            {
1556
0
                ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
1557
0
            }
1558
1.99M
        }
1559
1560
        /* -- at the end of CU set the neighbour map to 1 -- */
1561
1.99M
        ihevce_set_nbr_map(
1562
1.99M
            ps_ctxt->pu1_ctb_nbr_map,
1563
1.99M
            ps_ctxt->i4_nbr_map_strd,
1564
1.99M
            (ps_cu_analyse->b3_cu_pos_x << 1),
1565
1.99M
            (ps_cu_analyse->b3_cu_pos_y << 1),
1566
1.99M
            (ps_cu_analyse->u1_cu_size >> 2),
1567
1.99M
            1);
1568
1569
        /* -- at the end of CU update best cabac rdopt states -- */
1570
        /* -- and also set the top row skip flags  ------------- */
1571
1.99M
        ihevce_entropy_update_best_cu_states(
1572
1.99M
            &ps_ctxt->s_rdopt_entropy_ctxt,
1573
1.99M
            ps_cu_analyse->b3_cu_pos_x,
1574
1.99M
            ps_cu_analyse->b3_cu_pos_y,
1575
1.99M
            ps_cu_analyse->u1_cu_size,
1576
1.99M
            0,
1577
1.99M
            rd_opt_best_idx);
1578
1.99M
    }
1579
1580
    /* Store Output struct */
1581
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
1582
    {
1583
        {
1584
            memcpy(
1585
                &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1586
                &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1587
                sizeof(enc_loop_cu_final_prms_t));
1588
        }
1589
1590
        memcpy(
1591
            &ps_ctxt->as_cu_recur_nbr[0],
1592
            &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1593
            sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1594
                (ps_cu_analyse->u1_cu_size >> 2));
1595
1596
        ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1597
1598
        ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1599
    }
1600
#else
1601
1.99M
    if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
1602
814k
    {
1603
814k
        ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
1604
1605
814k
        ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
1606
1607
814k
        if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1608
7.16k
        {
1609
            /* Wait till top data is ready          */
1610
            /* Currently checking till top right CU */
1611
7.16k
            curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
1612
1613
7.16k
            if(i4_ctb_y_off == 0)
1614
7.05k
            {
1615
                /* No wait for 1st row */
1616
7.05k
                cu_top_right_offset = -(MAX_CTB_SIZE);
1617
7.05k
                {
1618
7.05k
                    ihevce_tile_params_t *ps_col_tile_params =
1619
7.05k
                        ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
1620
7.05k
                         ps_ctxt->i4_tile_col_idx);
1621
1622
                    /* No wait for 1st row */
1623
7.05k
                    cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
1624
7.05k
                }
1625
7.05k
                cu_top_right_dep_pos = 0;
1626
7.05k
            }
1627
109
            else
1628
109
            {
1629
109
                cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
1630
109
                cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
1631
109
            }
1632
1633
7.16k
            if(0 == ps_cu_analyse->b3_cu_pos_y)
1634
3.17k
            {
1635
3.17k
                ihevce_dmgr_chk_row_row_sync(
1636
3.17k
                    ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
1637
3.17k
                    curr_cu_pos_in_row,
1638
3.17k
                    cu_top_right_offset,
1639
3.17k
                    cu_top_right_dep_pos,
1640
3.17k
                    ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1641
3.17k
                    ps_ctxt->thrd_id);
1642
3.17k
            }
1643
7.16k
        }
1644
814k
    }
1645
1.18M
    else
1646
1.18M
    {
1647
1.18M
        {
1648
1.18M
            memcpy(
1649
1.18M
                &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
1650
1.18M
                &ps_ctxt->as_cu_prms[rd_opt_best_idx],
1651
1.18M
                sizeof(enc_loop_cu_final_prms_t));
1652
1.18M
        }
1653
1654
1.18M
        memcpy(
1655
1.18M
            &ps_ctxt->as_cu_recur_nbr[0],
1656
1.18M
            &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
1657
1.18M
            sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
1658
1.18M
                (ps_cu_analyse->u1_cu_size >> 2));
1659
1660
1.18M
        ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
1661
1662
1.18M
        ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
1663
1.18M
    }
1664
1.99M
#endif
1665
1666
1.99M
    ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
1667
1.99M
        ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
1668
1669
1.99M
    return rd_opt_least_cost;
1670
1.99M
}
1671
1672
/*!
1673
******************************************************************************
1674
* \if Function name : ihevce_enc_loop_process_row \endif
1675
*
1676
* \brief
1677
*    Row level enc_loop pass function
1678
*
1679
* \param[in] pv_ctxt : pointer to enc_loop module
1680
* \param[in] ps_curr_src_bufs  : pointer to input yuv buffer (row buffer)
1681
* \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
1682
* \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
1683
* \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
1684
* \param[out] ps_cu_out : pointer CU output structure (row buffer)
1685
* \param[out] ps_tu_out : pointer TU output structure (row buffer)
1686
* \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
1687
* \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
1688
*
1689
* \return
1690
*    None
1691
*
1692
* Note : Currently the frame level calcualtions done assumes that
1693
*        framewidth of the input /recon are excat multiple of ctbsize
1694
*
1695
* \author
1696
*  Ittiam
1697
*
1698
*****************************************************************************
1699
*/
1700
void ihevce_enc_loop_process_row(
1701
    ihevce_enc_loop_ctxt_t *ps_ctxt,
1702
    iv_enc_yuv_buf_t *ps_curr_src_bufs,
1703
    iv_enc_yuv_buf_t *ps_curr_recon_bufs,
1704
    iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
1705
    UWORD8 **ppu1_y_subpel_planes,
1706
    ctb_analyse_t *ps_ctb_in,
1707
    ctb_enc_loop_out_t *ps_ctb_out,
1708
    ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
1709
    cur_ctb_cu_tree_t *ps_row_cu_tree,
1710
    cu_enc_loop_out_t *ps_row_cu,
1711
    tu_enc_loop_out_t *ps_row_tu,
1712
    pu_t *ps_row_pu,
1713
    pu_col_mv_t *ps_row_col_pu,
1714
    UWORD16 *pu2_num_pu_map,
1715
    UWORD8 *pu1_row_pu_map,
1716
    UWORD8 *pu1_row_ecd_data,
1717
    UWORD32 *pu4_pu_offsets,
1718
    frm_ctb_ctxt_t *ps_frm_ctb_prms,
1719
    WORD32 vert_ctr,
1720
    recon_pic_buf_t *ps_frm_recon,
1721
    void *pv_dep_mngr_encloop_dep_me,
1722
    pad_interp_recon_frm_t *ps_pad_interp_recon,
1723
    WORD32 i4_pass,
1724
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
1725
    ihevce_tile_params_t *ps_tile_params)
1726
104k
{
1727
104k
    enc_loop_cu_prms_t s_cu_prms;
1728
104k
    ctb_enc_loop_out_t *ps_ctb_out_dblk;
1729
1730
104k
    WORD32 ctb_ctr, ctb_start, ctb_end;
1731
104k
    WORD32 col_pu_map_idx;
1732
104k
    WORD32 num_ctbs_horz_pic;
1733
104k
    WORD32 ctb_size;
1734
104k
    WORD32 last_ctb_row_flag;
1735
104k
    WORD32 last_ctb_col_flag;
1736
104k
    WORD32 last_hz_ctb_wd;
1737
104k
    WORD32 last_vt_ctb_ht;
1738
104k
    void *pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
1739
104k
    void *pv_dep_mngr_enc_loop_sao = ps_ctxt->pv_dep_mngr_enc_loop_sao;
1740
104k
    void *pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
1741
104k
    WORD32 dblk_offset, dblk_check_dep_pos;
1742
104k
    WORD32 sao_offset, sao_check_dep_pos;
1743
104k
    WORD32 aux_offset, aux_check_dep_pos;
1744
104k
    void *pv_dep_mngr_me_dep_encloop;
1745
104k
    ctb_enc_loop_out_t *ps_ctb_out_sao;
1746
    /*Structure to store deblocking parameters at CTB-row level*/
1747
104k
    deblk_ctbrow_prms_t s_deblk_ctb_row_params;
1748
104k
    UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
1749
1750
104k
    pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
1751
104k
    num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
1752
104k
    ctb_size = ps_frm_ctb_prms->i4_ctb_size;
1753
1754
    /* Store the num_ctb_horz in sao context*/
1755
104k
    ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
1756
104k
    ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
1757
1758
    /* Set Variables for Dep. Checking and Setting */
1759
104k
    aux_check_dep_pos = vert_ctr;
1760
104k
    aux_offset = 2; /* Should be there for 0th row also */
1761
104k
    if(vert_ctr > 0)
1762
9.64k
    {
1763
9.64k
        dblk_check_dep_pos = vert_ctr - 1;
1764
9.64k
        dblk_offset = 2;
1765
9.64k
    }
1766
94.8k
    else
1767
94.8k
    {
1768
        /* First row should run without waiting */
1769
94.8k
        dblk_check_dep_pos = 0;
1770
94.8k
        dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
1771
94.8k
    }
1772
1773
    /* Set sao_offset and sao_check_dep_pos */
1774
104k
    if(vert_ctr > 1)
1775
4.95k
    {
1776
4.95k
        sao_check_dep_pos = vert_ctr - 2;
1777
4.95k
        sao_offset = 2;
1778
4.95k
    }
1779
99.5k
    else
1780
99.5k
    {
1781
        /* First row should run without waiting */
1782
99.5k
        sao_check_dep_pos = 0;
1783
99.5k
        sao_offset = -(ps_tile_params->i4_first_sample_x + 1);
1784
99.5k
    }
1785
1786
    /* check if the current row processed in last CTb row */
1787
104k
    last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
1788
1789
    /* Valid Width (pixels) in the last CTB in every row (padding cases) */
1790
104k
    last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
1791
1792
    /* Valid Height (pixels) in the last CTB row (padding cases) */
1793
104k
    last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
1794
104k
                     ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
1795
    /* reset the states copied flag */
1796
104k
    ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
1797
104k
    ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
1798
1799
    /* populate the cu prms which are common for entire ctb row */
1800
104k
    s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
1801
104k
    s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
1802
104k
    s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
1803
104k
    s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
1804
104k
    s_cu_prms.i4_ctb_size = ctb_size;
1805
1806
104k
    ps_ctxt->i4_is_first_cu_qg_coded = 0;
1807
1808
    /* Initialize the number of PUs for the first CTB to 0 */
1809
104k
    *pu2_num_pu_map = 0;
1810
1811
    /*Getting the address of BS and Qp arrays and other info*/
1812
104k
    memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
1813
104k
    {
1814
104k
        WORD32 num_ctbs_horz_tile;
1815
        /* Update the pointers which are accessed not by using ctb_ctr
1816
        to the tile start here! */
1817
104k
        ps_ctb_in += ps_tile_params->i4_first_ctb_x;
1818
104k
        ps_ctb_out += ps_tile_params->i4_first_ctb_x;
1819
1820
104k
        ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
1821
104k
        ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
1822
104k
        ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1823
104k
        pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
1824
104k
        pu1_row_ecd_data +=
1825
104k
            (ps_tile_params->i4_first_ctb_x *
1826
104k
             ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
1827
104k
                                : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
1828
104k
             MAX_SCAN_COEFFS_BYTES_4x4);
1829
1830
        /* Update the pointers to the tile start */
1831
104k
        s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
1832
104k
            (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
1833
104k
        s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
1834
104k
            (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
1835
104k
        s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
1836
1837
104k
        num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
1838
1839
104k
        ctb_start = ps_tile_params->i4_first_ctb_x;
1840
104k
        ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
1841
104k
    }
1842
104k
    ps_ctb_out_dblk = ps_ctb_out;
1843
1844
104k
    ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
1845
1846
    /* --------- Loop over all the CTBs in a row --------------- */
1847
235k
    for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
1848
131k
    {
1849
131k
        cu_final_update_prms s_cu_update_prms;
1850
1851
131k
        cur_ctb_cu_tree_t *ps_cu_tree_analyse;
1852
131k
        me_ctb_data_t *ps_cu_me_data;
1853
131k
        ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
1854
131k
        cu_enc_loop_out_t *ps_cu_final;
1855
131k
        pu_col_mv_t *ps_ctb_col_pu;
1856
1857
131k
        WORD32 cur_ctb_ht, cur_ctb_wd;
1858
131k
        WORD32 last_cu_pos_in_ctb;
1859
131k
        WORD32 last_cu_size;
1860
131k
        WORD32 num_pus_in_ctb;
1861
131k
        UWORD8 u1_is_ctb_noisy;
1862
131k
        ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
1863
1864
131k
        if(ctb_ctr)
1865
26.5k
        {
1866
26.5k
            ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
1867
26.5k
        }
1868
        /*If Sup pic rc is enabled*/
1869
131k
        if(ps_ctxt->i4_sub_pic_level_rc)
1870
0
        {
1871
0
            ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
1872
0
        }
1873
        /* check if the current row processed in last CTb row */
1874
131k
        last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
1875
131k
        if(1 == last_ctb_col_flag)
1876
104k
        {
1877
104k
            cur_ctb_wd = last_hz_ctb_wd;
1878
104k
        }
1879
26.5k
        else
1880
26.5k
        {
1881
26.5k
            cur_ctb_wd = ctb_size;
1882
26.5k
        }
1883
1884
        /* If it's the last CTB, get the actual ht of CTB */
1885
131k
        if(1 == last_ctb_row_flag)
1886
108k
        {
1887
108k
            cur_ctb_ht = last_vt_ctb_ht;
1888
108k
        }
1889
23.0k
        else
1890
23.0k
        {
1891
23.0k
            cur_ctb_ht = ctb_size;
1892
23.0k
        }
1893
1894
131k
        ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
1895
131k
        ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
1896
1897
        /* Wait till reference frame recon is available */
1898
1899
        /* ------------ Wait till current data is ready from ME -------------- */
1900
1901
        /*only for ref instance and Non I pics */
1902
131k
        if((ps_ctxt->i4_bitrate_instance_num == 0) &&
1903
131k
           ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
1904
80.4k
        {
1905
80.4k
            if(ctb_ctr < (num_ctbs_horz_pic))
1906
80.4k
            {
1907
80.4k
                ihevce_dmgr_chk_row_row_sync(
1908
80.4k
                    pv_dep_mngr_encloop_dep_me,
1909
80.4k
                    ctb_ctr,
1910
80.4k
                    1,
1911
80.4k
                    vert_ctr,
1912
80.4k
                    ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
1913
80.4k
                    ps_ctxt->thrd_id);
1914
80.4k
            }
1915
80.4k
        }
1916
1917
        /* store the cu pointer for current ctb out */
1918
131k
        ps_ctb_out->ps_enc_cu = ps_row_cu;
1919
131k
        ps_cu_final = ps_row_cu;
1920
1921
        /* Get the base point of CU recursion tree */
1922
131k
        if(ISLICE != ps_ctxt->i1_slice_type)
1923
80.4k
        {
1924
80.4k
            ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
1925
80.4k
            ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
1926
80.4k
        }
1927
50.6k
        else
1928
50.6k
        {
1929
            /* Initialize ptr to current CTB */
1930
50.6k
            ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
1931
50.6k
        }
1932
1933
        /* Get the ME data pointer for 16x16 block data in ctb */
1934
131k
        ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
1935
131k
        u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
1936
131k
        s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
1937
131k
        s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
1938
1939
        /* store the ctb level prms in cu prms */
1940
131k
        s_cu_prms.i4_ctb_pos = ctb_ctr;
1941
1942
131k
        s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
1943
131k
        s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
1944
1945
131k
        {
1946
131k
            s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
1947
131k
            s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
1948
131k
        }
1949
1950
131k
        s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
1951
1952
131k
        s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
1953
1954
131k
        s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
1955
1956
        /* Initialize ptr to current CTB */
1957
131k
        ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr;  // * ctb_size;
1958
1959
        /* reset the map idx for current ctb */
1960
131k
        col_pu_map_idx = 0;
1961
131k
        num_pus_in_ctb = 0;
1962
1963
        /* reset the map buffer to 0*/
1964
1965
131k
        memset(
1966
131k
            &ps_ctxt->au1_nbr_ctb_map[0][0],
1967
131k
            0,
1968
131k
            (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
1969
1970
        /* set the CTB neighbour availability flags */
1971
131k
        ihevce_set_ctb_nbr(
1972
131k
            &ps_ctb_out->s_ctb_nbr_avail_flags,
1973
131k
            ps_ctxt->pu1_ctb_nbr_map,
1974
131k
            ps_ctxt->i4_nbr_map_strd,
1975
131k
            ctb_ctr,
1976
131k
            vert_ctr,
1977
131k
            ps_frm_ctb_prms);
1978
1979
        /* -------- update the cur CTB offsets for inter prediction-------- */
1980
131k
        ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
1981
131k
        ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
1982
1983
        /* -------- update the cur CTB offsets for MV prediction-------- */
1984
131k
        ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
1985
131k
        ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
1986
1987
        /* -------------- Boundary Strength Initialization ----------- */
1988
131k
        if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
1989
127k
        {
1990
127k
            ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
1991
127k
        }
1992
1993
        /* -------- update cur CTB offsets for entropy rdopt context------- */
1994
131k
        ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
1995
1996
        /* --------- CU Recursion --------------- */
1997
1998
131k
        {
1999
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2000
            WORD32 i4_max_tree_depth = 4;
2001
#endif
2002
131k
            WORD32 i4_tree_depth = 0;
2003
            /* Init no. of CU in CTB to 0*/
2004
131k
            ps_ctb_out->u1_num_cus_in_ctb = 0;
2005
2006
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2007
            if(ps_ctxt->i4_bitrate_instance_num == 0)
2008
            {
2009
                WORD32 i4_max_tree_depth = 4;
2010
                WORD32 i;
2011
                for(i = 0; i < i4_max_tree_depth; i++)
2012
                {
2013
                    COPY_CABAC_STATES(
2014
                        &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2015
                        &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2016
                        IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2017
                }
2018
            }
2019
#else
2020
131k
            if(ps_ctxt->i4_bitrate_instance_num == 0)
2021
131k
            {
2022
131k
                if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2023
38.4k
                {
2024
38.4k
                    WORD32 i4_max_tree_depth = 4;
2025
38.4k
                    WORD32 i;
2026
192k
                    for(i = 0; i < i4_max_tree_depth; i++)
2027
153k
                    {
2028
153k
                        COPY_CABAC_STATES(
2029
153k
                            &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
2030
153k
                            &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
2031
153k
                            IHEVC_CAB_CTXT_END * sizeof(UWORD8));
2032
153k
                    }
2033
38.4k
                }
2034
131k
            }
2035
2036
131k
#endif
2037
131k
            if(ps_ctxt->i4_bitrate_instance_num == 0)
2038
131k
            {
2039
                /* FOR I- PIC populate the curr_ctb accordingly */
2040
131k
                if(ISLICE == ps_ctxt->i1_slice_type)
2041
50.6k
                {
2042
50.6k
                    ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
2043
50.6k
                    ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2044
2045
50.6k
                    ihevce_populate_cu_tree(
2046
50.6k
                        ps_ctb_ipe_analyse,
2047
50.6k
                        ps_cu_tree_analyse,
2048
50.6k
                        0,
2049
50.6k
                        (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
2050
50.6k
                        POS_NA,
2051
50.6k
                        POS_NA,
2052
50.6k
                        POS_NA);
2053
50.6k
                }
2054
131k
            }
2055
131k
            ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
2056
131k
            ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2057
131k
            ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2058
2059
131k
            s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
2060
131k
            s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
2061
131k
            s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
2062
131k
            s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
2063
131k
            s_cu_update_prms.pps_cu_final = &ps_cu_final;
2064
131k
            s_cu_update_prms.pps_row_pu = &ps_row_pu;
2065
131k
            s_cu_update_prms.pps_row_tu = &ps_row_tu;
2066
131k
            s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
2067
2068
            // source satd computation
2069
            /* compute the source 8x8 SATD for the current CTB */
2070
            /* populate  pui4_source_satd in some structure and pass it inside */
2071
131k
            if(ps_ctxt->u1_enable_psyRDOPT)
2072
0
            {
2073
                /* declare local variables */
2074
0
                WORD32 i;
2075
0
                WORD32 ctb_size;
2076
0
                WORD32 num_comp_had_blocks;
2077
0
                UWORD8 *pu1_l0_block;
2078
0
                WORD32 block_ht;
2079
0
                WORD32 block_wd;
2080
0
                WORD32 ht_offset;
2081
0
                WORD32 wd_offset;
2082
2083
0
                WORD32 num_horz_blocks;
2084
0
                WORD32 had_block_size;
2085
0
                WORD32 total_had_block_size;
2086
0
                WORD16 pi2_residue_had_zscan[64];
2087
0
                UWORD8 ai1_zeros_buffer[64];
2088
2089
0
                WORD32 index_satd;
2090
0
                WORD32 is_hbd;
2091
                /* initialize the variables */
2092
0
                block_ht = cur_ctb_ht;
2093
0
                block_wd = cur_ctb_wd;
2094
2095
0
                is_hbd = ps_ctxt->u1_is_input_data_hbd;
2096
2097
0
                had_block_size = 8;
2098
0
                total_had_block_size = had_block_size * had_block_size;
2099
2100
0
                for(i = 0; i < total_had_block_size; i++)
2101
0
                {
2102
0
                    ai1_zeros_buffer[i] = 0;
2103
0
                }
2104
2105
0
                ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2106
0
                num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
2107
2108
0
                num_horz_blocks = block_wd / had_block_size;  //ctb_width / had_block_size;
2109
0
                ht_offset = -had_block_size;
2110
0
                wd_offset = -had_block_size;
2111
2112
0
                index_satd = 0;
2113
                /*Loop over all 8x8 blocsk in the CTB*/
2114
0
                for(i = 0; i < num_comp_had_blocks; i++)
2115
0
                {
2116
0
                    if(i % num_horz_blocks == 0)
2117
0
                    {
2118
0
                        wd_offset = -had_block_size;
2119
0
                        ht_offset += had_block_size;
2120
0
                    }
2121
0
                    wd_offset += had_block_size;
2122
2123
0
                    if(!is_hbd)
2124
0
                    {
2125
                        /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2126
0
                        pu1_l0_block = s_cu_prms.pu1_luma_src +
2127
0
                                       ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
2128
2129
0
                        ps_ctxt->ai4_source_satd_8x8[index_satd] =
2130
2131
0
                            ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
2132
0
                                pu1_l0_block,
2133
0
                                ps_curr_src_bufs->i4_y_strd,
2134
0
                                ai1_zeros_buffer,
2135
0
                                had_block_size,
2136
0
                                pi2_residue_had_zscan,
2137
0
                                had_block_size);
2138
0
                    }
2139
0
                    index_satd++;
2140
0
                }
2141
0
            }
2142
2143
131k
            if(ps_ctxt->u1_enable_psyRDOPT)
2144
0
            {
2145
                /* declare local variables */
2146
0
                WORD32 i;
2147
0
                WORD32 ctb_size;
2148
0
                WORD32 num_comp_had_blocks;
2149
0
                UWORD8 *pu1_l0_block;
2150
0
                UWORD8 *pu1_l0_block_prev = NULL;
2151
0
                WORD32 block_ht;
2152
0
                WORD32 block_wd;
2153
0
                WORD32 ht_offset;
2154
0
                WORD32 wd_offset;
2155
2156
0
                WORD32 num_horz_blocks;
2157
0
                WORD32 had_block_size;
2158
0
                WORD16 pi2_residue_had[64];
2159
0
                UWORD8 ai1_zeros_buffer[64];
2160
0
                WORD32 index_satd = 0;
2161
2162
0
                WORD32 is_hbd;
2163
0
                is_hbd = ps_ctxt->u1_is_input_data_hbd;  // 8 bit
2164
2165
                /* initialize the variables */
2166
                /* change this based ont he bit depth */
2167
                // ps_ctxt->u1_chroma_array_type
2168
0
                if(ps_ctxt->u1_chroma_array_type == 1)
2169
0
                {
2170
0
                    block_ht = cur_ctb_ht / 2;
2171
0
                    block_wd = cur_ctb_wd / 2;
2172
0
                }
2173
0
                else
2174
0
                {
2175
0
                    block_ht = cur_ctb_ht;
2176
0
                    block_wd = cur_ctb_wd / 2;
2177
0
                }
2178
2179
0
                had_block_size = 4;
2180
0
                memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
2181
2182
0
                ctb_size = block_ht * block_wd;  //ctb_width * ctb_height;
2183
0
                num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
2184
2185
0
                num_horz_blocks = 2 * block_wd / had_block_size;  //ctb_width / had_block_size;
2186
0
                ht_offset = -had_block_size;
2187
0
                wd_offset = -had_block_size;
2188
2189
0
                if(!is_hbd)
2190
0
                {
2191
                    /* loop over for every 4x4 blocks in the CU for Cb */
2192
0
                    for(i = 0; i < num_comp_had_blocks; i++)
2193
0
                    {
2194
0
                        if(i % num_horz_blocks == 0)
2195
0
                        {
2196
0
                            wd_offset = -had_block_size;
2197
0
                            ht_offset += had_block_size;
2198
0
                        }
2199
0
                        wd_offset += had_block_size;
2200
2201
                        /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
2202
0
                        if(i % 2 != 0)
2203
0
                        {
2204
0
                            if(!is_hbd)
2205
0
                            {
2206
0
                                pu1_l0_block = pu1_l0_block_prev + 1;
2207
0
                            }
2208
0
                        }
2209
0
                        else
2210
0
                        {
2211
0
                            if(!is_hbd)
2212
0
                            {
2213
0
                                pu1_l0_block = s_cu_prms.pu1_chrm_src +
2214
0
                                               s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
2215
0
                                pu1_l0_block_prev = pu1_l0_block;
2216
0
                            }
2217
0
                        }
2218
2219
0
                        if(had_block_size == 4)
2220
0
                        {
2221
0
                            if(!is_hbd)
2222
0
                            {
2223
0
                                ps_ctxt->ai4_source_chroma_satd[index_satd] =
2224
0
                                    ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
2225
0
                                        pu1_l0_block,
2226
0
                                        s_cu_prms.i4_chrm_src_stride,
2227
0
                                        ai1_zeros_buffer,
2228
0
                                        had_block_size,
2229
0
                                        pi2_residue_had,
2230
0
                                        had_block_size);
2231
0
                            }
2232
2233
0
                            index_satd++;
2234
2235
0
                        }  // block size of 4x4
2236
2237
0
                    }  // for all blocks
2238
2239
0
                }  // is hbd check
2240
0
            }
2241
2242
131k
            ihevce_cu_recurse_decide(
2243
131k
                ps_ctxt,
2244
131k
                &s_cu_prms,
2245
131k
                ps_cu_tree_analyse,
2246
131k
                ps_cu_tree_analyse,
2247
131k
                ps_ctb_ipe_analyse,
2248
131k
                ps_cu_me_data,
2249
131k
                &ps_ctb_col_pu,
2250
131k
                &s_cu_update_prms,
2251
131k
                pu1_row_pu_map,
2252
131k
                &col_pu_map_idx,
2253
131k
                i4_tree_depth,
2254
131k
                ctb_ctr << 6,
2255
131k
                vert_ctr << 6,
2256
131k
                cur_ctb_ht);
2257
2258
131k
            if(ps_ctxt->i1_slice_type != ISLICE)
2259
80.4k
            {
2260
80.4k
                ASSERT(
2261
80.4k
                    (cur_ctb_wd * cur_ctb_ht) <=
2262
80.4k
                    ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
2263
80.4k
            }
2264
            /*If Sup pic rc is enabled*/
2265
131k
            if(1 == ps_ctxt->i4_sub_pic_level_rc)
2266
0
            {
2267
                /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
2268
0
                ihevce_sub_pic_rc_in_data(
2269
0
                    (void *)ps_multi_thrd_ctxt,
2270
0
                    (void *)ps_ctxt,
2271
0
                    (void *)ps_ctb_ipe_analyse,
2272
0
                    (void *)ps_frm_ctb_prms);
2273
0
            }
2274
2275
131k
            ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
2276
2277
131k
        } /* End of CU recursion block */
2278
2279
#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
2280
        {
2281
            ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2282
            enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2283
            ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2284
2285
            do
2286
            {
2287
                ihevce_update_final_cu_results(
2288
                    ps_ctxt,
2289
                    ps_enc_out_ctxt,
2290
                    ps_cu_prms,
2291
                    NULL, /* &ps_ctb_col_pu */
2292
                    NULL, /* &col_pu_map_idx */
2293
                    &s_cu_update_prms,
2294
                    ctb_ctr,
2295
                    vert_ctr);
2296
2297
                ps_enc_out_ctxt++;
2298
2299
                ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2300
2301
            } while(ps_enc_out_ctxt->u1_cu_size != 128);
2302
        }
2303
#else
2304
131k
        if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
2305
38.4k
        {
2306
38.4k
            ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
2307
38.4k
            enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
2308
38.4k
            ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
2309
2310
38.4k
            do
2311
444k
            {
2312
444k
                ihevce_update_final_cu_results(
2313
444k
                    ps_ctxt,
2314
444k
                    ps_enc_out_ctxt,
2315
444k
                    ps_cu_prms,
2316
444k
                    NULL, /* &ps_ctb_col_pu */
2317
444k
                    NULL, /* &col_pu_map_idx */
2318
444k
                    &s_cu_update_prms,
2319
444k
                    ctb_ctr,
2320
444k
                    vert_ctr);
2321
2322
444k
                ps_enc_out_ctxt++;
2323
2324
444k
                ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
2325
2326
444k
            } while(ps_enc_out_ctxt->u1_cu_size != 128);
2327
38.4k
        }
2328
131k
#endif
2329
2330
        /* --- ctb level copy of data to left buffers--*/
2331
131k
        ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
2332
2333
131k
        if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2334
127k
        {
2335
            /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
2336
127k
            ihevce_bs_clear_invalid(
2337
127k
                &ps_ctxt->s_deblk_bs_prms,
2338
127k
                last_ctb_row_flag,
2339
127k
                (ctb_ctr == (num_ctbs_horz_pic - 1)),
2340
127k
                last_hz_ctb_wd,
2341
127k
                last_vt_ctb_ht);
2342
2343
            /* -----------------Read boundary strengts for current CTB------------- */
2344
2345
127k
            if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
2346
92.1k
            {
2347
                /*Storing boundary strengths of current CTB*/
2348
92.1k
                UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
2349
92.1k
                UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
2350
2351
92.1k
                memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
2352
92.1k
                memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
2353
92.1k
            }
2354
            //Increment for storing next CTB info
2355
127k
            s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2356
127k
                (ctb_size >> 3);  //one vertical edge per 8x8 block
2357
127k
            s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2358
127k
                (ctb_size >> 3);  //one horizontal edge per 8x8 block
2359
127k
        }
2360
2361
        /* -------------- ctb level updates ----------------- */
2362
131k
        ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
2363
2364
131k
        pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
2365
2366
        /* first ctb offset will be populated by the caller */
2367
131k
        if(0 != ctb_ctr)
2368
26.5k
        {
2369
26.5k
            pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
2370
26.5k
        }
2371
131k
        pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
2372
131k
        ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
2373
2374
131k
        ps_ctb_in++;
2375
131k
        ps_ctb_out++;
2376
131k
    }
2377
2378
    /* ---------- Encloop end of row updates ----------------- */
2379
2380
    /* at the end of row processing cu pixel counter is set to */
2381
    /* (num ctb * ctbzise) + ctb size                          */
2382
    /* this is to set the dependency for right most cu of last */
2383
    /* ctb's top right data dependency                         */
2384
    /* this even takes care of entropy dependency for          */
2385
    /* incomplete ctb as well                                  */
2386
104k
    ihevce_dmgr_set_row_row_sync(
2387
104k
        pv_dep_mngr_enc_loop_cu_top_right,
2388
104k
        (ctb_ctr * ctb_size + ctb_size),
2389
104k
        vert_ctr,
2390
104k
        ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2391
2392
104k
    ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
2393
2394
    /* Restore structure.
2395
    Getting the address of stored-BS and Qp-map and other info */
2396
104k
    memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
2397
104k
    {
2398
        /* Update the pointers to the tile start */
2399
104k
        s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2400
104k
            (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one vertical edge per 8x8 block
2401
104k
        s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2402
104k
            (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3));  //one horizontal edge per 8x8 block
2403
104k
        s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
2404
104k
    }
2405
2406
#if PROFILE_ENC_REG_DATA
2407
    s_profile.u8_enc_reg_data[vert_ctr] = 0;
2408
#endif
2409
2410
    /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
2411
104k
    if(!ps_ctxt->u1_is_input_data_hbd)
2412
104k
    {
2413
104k
        WORD32 last_col_pic, last_col_tile;
2414
2415
235k
        for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2416
131k
        {
2417
            /* store the ctb level prms in cu prms */
2418
131k
            s_cu_prms.i4_ctb_pos = ctb_ctr;
2419
131k
            s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
2420
131k
            s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
2421
2422
131k
            s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
2423
131k
            s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
2424
131k
            s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
2425
2426
131k
            s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
2427
2428
131k
            s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
2429
2430
            /* If last ctb in the horizontal row */
2431
131k
            if(ctb_ctr == (num_ctbs_horz_pic - 1))
2432
104k
            {
2433
104k
                last_col_pic = 1;
2434
104k
            }
2435
26.5k
            else
2436
26.5k
            {
2437
26.5k
                last_col_pic = 0;
2438
26.5k
            }
2439
2440
            /* If last ctb in the tile row */
2441
131k
            if(ctb_ctr == (ctb_end - 1))
2442
104k
            {
2443
104k
                last_col_tile = 1;
2444
104k
            }
2445
26.5k
            else
2446
26.5k
            {
2447
26.5k
                last_col_tile = 0;
2448
26.5k
            }
2449
2450
131k
            if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2451
127k
            {
2452
                /* for last ctb of a row check top instead of top right */
2453
127k
                if(((ctb_ctr + 1) == ctb_end) && (vert_ctr > 0))
2454
9.58k
                {
2455
9.58k
                    dblk_offset = 1;
2456
9.58k
                }
2457
                /* Wait till top neighbour CTB has done it's deblocking*/
2458
127k
                ihevce_dmgr_chk_row_row_sync(
2459
127k
                    pv_dep_mngr_enc_loop_dblk,
2460
127k
                    ctb_ctr,
2461
127k
                    dblk_offset,
2462
127k
                    dblk_check_dep_pos,
2463
127k
                    ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2464
127k
                    ps_ctxt->thrd_id);
2465
2466
127k
                if((0 == ps_ctxt->i4_deblock_type))
2467
92.1k
                {
2468
                    /* Populate Qp-map */
2469
92.1k
                    if(ctb_start == ctb_ctr)
2470
73.8k
                    {
2471
73.8k
                        ihevce_deblk_populate_qp_map(
2472
73.8k
                            ps_ctxt,
2473
73.8k
                            &s_deblk_ctb_row_params,
2474
73.8k
                            ps_ctb_out_dblk,
2475
73.8k
                            vert_ctr,
2476
73.8k
                            ps_frm_ctb_prms,
2477
73.8k
                            ps_tile_params);
2478
73.8k
                    }
2479
92.1k
                    ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
2480
2481
                    /* recon pointers and stride */
2482
92.1k
                    ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
2483
92.1k
                    ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
2484
92.1k
                    ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
2485
92.1k
                    ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
2486
2487
92.1k
                    ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
2488
92.1k
                    {
2489
92.1k
                        ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
2490
92.1k
                            (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
2491
92.1k
                    }
2492
92.1k
                    ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
2493
                    //or according to slice boundary. Support yet to be added !!!!
2494
2495
92.1k
                    ihevce_deblk_ctb(
2496
92.1k
                        &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
2497
2498
                    //Increment for storing next CTB info
2499
92.1k
                    s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
2500
92.1k
                        (ctb_size >> 3);  //one vertical edge per 8x8 block
2501
92.1k
                    s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
2502
92.1k
                        (ctb_size >> 3);  //one horizontal edge per 8x8 block
2503
92.1k
                    s_deblk_ctb_row_params.pi1_ctb_row_qp +=
2504
92.1k
                        (ctb_size >> 2);  //one qp per 4x4 block.
2505
92.1k
                }
2506
127k
            }  // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2507
2508
            /* update the number of ctbs deblocked for this row */
2509
131k
            ihevce_dmgr_set_row_row_sync(
2510
131k
                pv_dep_mngr_enc_loop_dblk,
2511
131k
                (ctb_ctr + 1),
2512
131k
                vert_ctr,
2513
131k
                ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2514
2515
131k
        }  //end of loop over CTBs in current CTB-row
2516
2517
        /* Apply SAO over the previous CTB-row */
2518
235k
        for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2519
131k
        {
2520
131k
            if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2521
60.7k
               ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2522
70.3k
            {
2523
70.3k
                sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2524
2525
70.3k
                if(vert_ctr > ps_tile_params->i4_first_ctb_y)
2526
16.6k
                {
2527
                    /*For last ctb check top dep only*/
2528
16.6k
                    if((vert_ctr > 1) && ((ctb_ctr + 1) == ctb_end))
2529
3.98k
                    {
2530
3.98k
                        sao_offset = 1;
2531
3.98k
                    }
2532
2533
16.6k
                    ihevce_dmgr_chk_row_row_sync(
2534
16.6k
                        pv_dep_mngr_enc_loop_sao,
2535
16.6k
                        ctb_ctr,
2536
16.6k
                        sao_offset,
2537
16.6k
                        sao_check_dep_pos,
2538
16.6k
                        ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
2539
16.6k
                        ps_ctxt->thrd_id);
2540
2541
                    /* Call the sao function to do sao for the current ctb*/
2542
2543
                    /* Register the curr ctb's x pos in sao context*/
2544
16.6k
                    ps_sao_ctxt->i4_ctb_x = ctb_ctr;
2545
2546
                    /* Register the curr ctb's y pos in sao context*/
2547
16.6k
                    ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
2548
2549
16.6k
                    ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
2550
16.6k
                                     (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2551
16.6k
                    ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
2552
16.6k
                    ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
2553
16.6k
                    ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
2554
2555
16.6k
                    ps_sao_ctxt->i4_is_last_ctb_row = 0;
2556
16.6k
                    ps_sao_ctxt->i4_is_last_ctb_col = 0;
2557
2558
16.6k
                    if((ctb_ctr + 1) == ctb_end)
2559
6.90k
                    {
2560
6.90k
                        ps_sao_ctxt->i4_is_last_ctb_col = 1;
2561
6.90k
                        ps_sao_ctxt->i4_sao_blk_wd =
2562
6.90k
                            ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2563
6.90k
                                        ps_tile_params->i4_curr_tile_width);
2564
6.90k
                    }
2565
2566
                    /* Calculate the recon buf pointer and stride for teh current ctb */
2567
16.6k
                    ps_sao_ctxt->pu1_cur_luma_recon_buf =
2568
16.6k
                        ps_sao_ctxt->pu1_frm_luma_recon_buf +
2569
16.6k
                        (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2570
16.6k
                        (ps_sao_ctxt->i4_ctb_x * ctb_size);
2571
2572
16.6k
                    ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2573
2574
16.6k
                    ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2575
16.6k
                        ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2576
16.6k
                        (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2577
16.6k
                         (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2578
16.6k
                        (ps_sao_ctxt->i4_ctb_x * ctb_size);
2579
2580
16.6k
                    ps_sao_ctxt->i4_cur_chroma_recon_stride =
2581
16.6k
                        ps_sao_ctxt->i4_frm_chroma_recon_stride;
2582
2583
16.6k
                    ps_sao_ctxt->pu1_cur_luma_src_buf =
2584
16.6k
                        ps_sao_ctxt->pu1_frm_luma_src_buf +
2585
16.6k
                        (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2586
16.6k
                        (ps_sao_ctxt->i4_ctb_x * ctb_size);
2587
2588
16.6k
                    ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2589
2590
16.6k
                    ps_sao_ctxt->pu1_cur_chroma_src_buf =
2591
16.6k
                        ps_sao_ctxt->pu1_frm_chroma_src_buf +
2592
16.6k
                        (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2593
16.6k
                         (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2594
16.6k
                        (ps_sao_ctxt->i4_ctb_x * ctb_size);
2595
2596
16.6k
                    ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2597
2598
                    /* Calculate the pointer to buff to store the (x,y)th sao
2599
                    * for the top merge of (x,y+1)th ctb
2600
                    */
2601
16.6k
                    ps_sao_ctxt->ps_top_ctb_sao =
2602
16.6k
                        &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2603
16.6k
                                                         [ps_sao_ctxt->i4_ctb_x +
2604
16.6k
                                                          (ps_sao_ctxt->i4_ctb_y) *
2605
16.6k
                                                              ps_frm_ctb_prms->i4_num_ctbs_horz +
2606
16.6k
                                                          (ps_ctxt->i4_bitrate_instance_num *
2607
16.6k
                                                           ps_sao_ctxt->i4_num_ctb_units)];
2608
2609
                    /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2610
16.6k
                    ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2611
16.6k
                        ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2612
16.6k
                        (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2613
16.6k
                        ps_sao_ctxt->i4_ctb_x * ctb_size +
2614
16.6k
                        ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2615
16.6k
                                                            ps_sao_ctxt->i4_top_chroma_buf_size);
2616
2617
                    /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2618
16.6k
                    ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2619
16.6k
                        ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2620
16.6k
                        (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2621
16.6k
                        ps_sao_ctxt->i4_ctb_x * ctb_size +
2622
16.6k
                        ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2623
16.6k
                                                            ps_sao_ctxt->i4_top_chroma_buf_size);
2624
2625
16.6k
                    {
2626
16.6k
                        UWORD32 u4_ctb_sao_bits;
2627
2628
16.6k
                        ihevce_sao_analyse(
2629
16.6k
                            &ps_ctxt->s_sao_ctxt_t,
2630
16.6k
                            ps_ctb_out_sao,
2631
16.6k
                            &u4_ctb_sao_bits,
2632
16.6k
                            ps_tile_params);
2633
16.6k
                        ps_ctxt
2634
16.6k
                            ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2635
16.6k
                                                     [ps_ctxt->i4_bitrate_instance_num]
2636
16.6k
                            ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2637
16.6k
                        ps_ctxt
2638
16.6k
                            ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2639
16.6k
                                                     [ps_ctxt->i4_bitrate_instance_num]
2640
16.6k
                            ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2641
16.6k
                    }
2642
                    /** Subpel generation not done for non-ref picture **/
2643
16.6k
                    if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2644
16.6k
                    {
2645
                        /* Recon Padding */
2646
16.6k
                        ihevce_recon_padding(
2647
16.6k
                            ps_pad_interp_recon,
2648
16.6k
                            ctb_ctr,
2649
16.6k
                            vert_ctr - 1,
2650
16.6k
                            ps_frm_ctb_prms,
2651
16.6k
                            ps_ctxt->ps_func_selector);
2652
16.6k
                    }
2653
                    /* update the number of SAO ctbs for this row */
2654
16.6k
                    ihevce_dmgr_set_row_row_sync(
2655
16.6k
                        pv_dep_mngr_enc_loop_sao,
2656
16.6k
                        ctb_ctr + 1,
2657
16.6k
                        vert_ctr - 1,
2658
16.6k
                        ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
2659
16.6k
                }
2660
70.3k
            }
2661
60.7k
            else  //SAO Disabled
2662
60.7k
            {
2663
60.7k
                if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2664
57.1k
                {
2665
                    /* Recon Padding */
2666
57.1k
                    ihevce_recon_padding(
2667
57.1k
                        ps_pad_interp_recon,
2668
57.1k
                        ctb_ctr,
2669
57.1k
                        vert_ctr,
2670
57.1k
                        ps_frm_ctb_prms,
2671
57.1k
                        ps_ctxt->ps_func_selector);
2672
57.1k
                }
2673
60.7k
            }
2674
131k
        }  // end of SAO for loop
2675
2676
        /* Call the sao function again for the last ctb row of frame */
2677
104k
        if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2678
51.4k
           ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2679
53.0k
        {
2680
53.0k
            sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2681
2682
53.0k
            if(vert_ctr ==
2683
53.0k
               (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2684
46.1k
            {
2685
99.9k
                for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2686
53.7k
                {
2687
                    /* Register the curr ctb's x pos in sao context*/
2688
53.7k
                    ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
2689
2690
                    /* Register the curr ctb's y pos in sao context*/
2691
53.7k
                    ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
2692
2693
53.7k
                    ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
2694
53.7k
                                     vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr;
2695
2696
53.7k
                    ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
2697
2698
53.7k
                    ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
2699
53.7k
                    ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
2700
2701
53.7k
                    if((ctb_ctr + 1) == ctb_end)
2702
46.1k
                    {
2703
46.1k
                        ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
2704
46.1k
                        ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
2705
46.1k
                            ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
2706
46.1k
                                        ps_tile_params->i4_curr_tile_width);
2707
46.1k
                    }
2708
2709
53.7k
                    ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
2710
53.7k
                        ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
2711
53.7k
                                    ps_tile_params->i4_curr_tile_height);
2712
2713
53.7k
                    ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
2714
2715
                    /* Calculate the recon buf pointer and stride for teh current ctb */
2716
53.7k
                    ps_sao_ctxt->pu1_cur_luma_recon_buf =
2717
53.7k
                        ps_sao_ctxt->pu1_frm_luma_recon_buf +
2718
53.7k
                        (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2719
53.7k
                        (ps_sao_ctxt->i4_ctb_x * ctb_size);
2720
2721
53.7k
                    ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
2722
2723
53.7k
                    ps_sao_ctxt->pu1_cur_chroma_recon_buf =
2724
53.7k
                        ps_sao_ctxt->pu1_frm_chroma_recon_buf +
2725
53.7k
                        (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
2726
53.7k
                         (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2727
53.7k
                        (ps_sao_ctxt->i4_ctb_x * ctb_size);
2728
2729
53.7k
                    ps_sao_ctxt->i4_cur_chroma_recon_stride =
2730
53.7k
                        ps_sao_ctxt->i4_frm_chroma_recon_stride;
2731
2732
53.7k
                    ps_sao_ctxt->pu1_cur_luma_src_buf =
2733
53.7k
                        ps_sao_ctxt->pu1_frm_luma_src_buf +
2734
53.7k
                        (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
2735
53.7k
                        (ps_sao_ctxt->i4_ctb_x * ctb_size);
2736
2737
53.7k
                    ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
2738
2739
53.7k
                    ps_sao_ctxt->pu1_cur_chroma_src_buf =
2740
53.7k
                        ps_sao_ctxt->pu1_frm_chroma_src_buf +
2741
53.7k
                        (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
2742
53.7k
                         (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
2743
53.7k
                        (ps_sao_ctxt->i4_ctb_x * ctb_size);
2744
2745
53.7k
                    ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
2746
2747
                    /* Calculate the pointer to buff to store the (x,y)th sao
2748
                    * for the top merge of (x,y+1)th ctb
2749
                    */
2750
53.7k
                    ps_sao_ctxt->ps_top_ctb_sao =
2751
53.7k
                        &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
2752
53.7k
                                                         [ps_sao_ctxt->i4_ctb_x +
2753
53.7k
                                                          (ps_sao_ctxt->i4_ctb_y) *
2754
53.7k
                                                              ps_frm_ctb_prms->i4_num_ctbs_horz +
2755
53.7k
                                                          (ps_ctxt->i4_bitrate_instance_num *
2756
53.7k
                                                           ps_sao_ctxt->i4_num_ctb_units)];
2757
2758
                    /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2759
53.7k
                    ps_sao_ctxt->pu1_curr_sao_src_top_luma =
2760
53.7k
                        ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
2761
53.7k
                        (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
2762
53.7k
                        ps_sao_ctxt->i4_ctb_x * ctb_size +
2763
53.7k
                        ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2764
53.7k
                                                            ps_sao_ctxt->i4_top_chroma_buf_size);
2765
2766
                    /* Calculate the pointer to buff to store the top pixels of curr ctb*/
2767
53.7k
                    ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
2768
53.7k
                        ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
2769
53.7k
                        (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
2770
53.7k
                        ps_sao_ctxt->i4_ctb_x * ctb_size +
2771
53.7k
                        ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
2772
53.7k
                                                            ps_sao_ctxt->i4_top_chroma_buf_size);
2773
2774
53.7k
                    {
2775
53.7k
                        UWORD32 u4_ctb_sao_bits;
2776
53.7k
                        ihevce_sao_analyse(
2777
53.7k
                            &ps_ctxt->s_sao_ctxt_t,
2778
53.7k
                            ps_ctb_out_sao,
2779
53.7k
                            &u4_ctb_sao_bits,
2780
53.7k
                            ps_tile_params);
2781
53.7k
                        ps_ctxt
2782
53.7k
                            ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2783
53.7k
                                                     [ps_ctxt->i4_bitrate_instance_num]
2784
53.7k
                            ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
2785
53.7k
                        ps_ctxt
2786
53.7k
                            ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
2787
53.7k
                                                     [ps_ctxt->i4_bitrate_instance_num]
2788
53.7k
                            ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
2789
53.7k
                    }
2790
                    /** Subpel generation not done for non-ref picture **/
2791
53.7k
                    if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2792
53.7k
                    {
2793
                        /* Recon Padding */
2794
53.7k
                        ihevce_recon_padding(
2795
53.7k
                            ps_pad_interp_recon,
2796
53.7k
                            ctb_ctr,
2797
53.7k
                            vert_ctr,
2798
53.7k
                            ps_frm_ctb_prms,
2799
53.7k
                            ps_ctxt->ps_func_selector);
2800
53.7k
                    }
2801
53.7k
                }
2802
46.1k
            }  //end of loop over CTBs in current CTB-row
2803
53.0k
        }
2804
2805
        /* Subpel Plane Generation*/
2806
235k
        for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2807
131k
        {
2808
131k
            if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2809
60.7k
               ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2810
70.3k
            {
2811
70.3k
                if(0 != vert_ctr)
2812
16.6k
                {
2813
                    /** Subpel generation not done for non-ref picture **/
2814
16.6k
                    if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2815
16.6k
                    {
2816
                        /* Padding and Subpel Plane Generation */
2817
16.6k
                        ihevce_pad_interp_recon_ctb(
2818
16.6k
                            ps_pad_interp_recon,
2819
16.6k
                            ctb_ctr,
2820
16.6k
                            vert_ctr - 1,
2821
16.6k
                            ps_ctxt->i4_quality_preset,
2822
16.6k
                            ps_frm_ctb_prms,
2823
16.6k
                            ps_ctxt->ai2_scratch,
2824
16.6k
                            ps_ctxt->i4_bitrate_instance_num,
2825
16.6k
                            ps_ctxt->ps_func_selector);
2826
16.6k
                    }
2827
16.6k
                }
2828
70.3k
            }
2829
60.7k
            else
2830
60.7k
            {  // SAO Disabled
2831
60.7k
                if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2832
57.1k
                {
2833
                    /* Padding and Subpel Plane Generation */
2834
57.1k
                    ihevce_pad_interp_recon_ctb(
2835
57.1k
                        ps_pad_interp_recon,
2836
57.1k
                        ctb_ctr,
2837
57.1k
                        vert_ctr,
2838
57.1k
                        ps_ctxt->i4_quality_preset,
2839
57.1k
                        ps_frm_ctb_prms,
2840
57.1k
                        ps_ctxt->ai2_scratch,
2841
57.1k
                        ps_ctxt->i4_bitrate_instance_num,
2842
57.1k
                        ps_ctxt->ps_func_selector);
2843
57.1k
                }
2844
60.7k
            }
2845
131k
        }
2846
2847
104k
        {
2848
104k
            if(!ps_ctxt->i4_bitrate_instance_num)
2849
104k
            {
2850
104k
                if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2851
51.4k
                   ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2852
53.0k
                {
2853
                    /* If SAO is on, then signal completion of previous CTB row */
2854
53.0k
                    if(0 != vert_ctr)
2855
6.90k
                    {
2856
6.90k
                        {
2857
6.90k
                            WORD32 post_ctb_ctr;
2858
2859
23.5k
                            for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2860
16.6k
                            {
2861
16.6k
                                ihevce_dmgr_map_set_sync(
2862
16.6k
                                    pv_dep_mngr_me_dep_encloop,
2863
16.6k
                                    post_ctb_ctr,
2864
16.6k
                                    (vert_ctr - 1),
2865
16.6k
                                    MAP_CTB_COMPLETE);
2866
16.6k
                            }
2867
6.90k
                        }
2868
6.90k
                    }
2869
53.0k
                }
2870
51.4k
                else
2871
51.4k
                {
2872
51.4k
                    {
2873
51.4k
                        WORD32 post_ctb_ctr;
2874
2875
112k
                        for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2876
60.7k
                        {
2877
60.7k
                            ihevce_dmgr_map_set_sync(
2878
60.7k
                                pv_dep_mngr_me_dep_encloop,
2879
60.7k
                                post_ctb_ctr,
2880
60.7k
                                vert_ctr,
2881
60.7k
                                MAP_CTB_COMPLETE);
2882
60.7k
                        }
2883
51.4k
                    }
2884
51.4k
                }
2885
104k
            }
2886
104k
        }
2887
2888
        /*process last ctb row*/
2889
104k
        if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
2890
51.4k
           ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
2891
53.0k
        {
2892
53.0k
            sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
2893
2894
53.0k
            if(vert_ctr ==
2895
53.0k
               (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1))
2896
46.1k
            {
2897
99.9k
                for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
2898
53.7k
                {
2899
53.7k
                    if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
2900
53.7k
                    {
2901
                        /* Padding and Subpel Plane Generation */
2902
53.7k
                        ihevce_pad_interp_recon_ctb(
2903
53.7k
                            ps_pad_interp_recon,
2904
53.7k
                            ctb_ctr,
2905
53.7k
                            vert_ctr,
2906
53.7k
                            ps_ctxt->i4_quality_preset,
2907
53.7k
                            ps_frm_ctb_prms,
2908
53.7k
                            ps_ctxt->ai2_scratch,
2909
53.7k
                            ps_ctxt->i4_bitrate_instance_num,
2910
53.7k
                            ps_ctxt->ps_func_selector);
2911
53.7k
                    }
2912
53.7k
                }
2913
46.1k
            }
2914
            /* If SAO is on, then signal completion of the last CTB row of frame */
2915
53.0k
            {
2916
53.0k
                if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
2917
46.1k
                {
2918
46.1k
                    if(!ps_ctxt->i4_bitrate_instance_num)
2919
46.1k
                    {
2920
46.1k
                        {
2921
46.1k
                            WORD32 post_ctb_ctr;
2922
2923
99.9k
                            for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
2924
53.7k
                            {
2925
53.7k
                                ihevce_dmgr_map_set_sync(
2926
53.7k
                                    pv_dep_mngr_me_dep_encloop,
2927
53.7k
                                    post_ctb_ctr,
2928
53.7k
                                    vert_ctr,
2929
53.7k
                                    MAP_CTB_COMPLETE);
2930
53.7k
                            }
2931
46.1k
                        }
2932
46.1k
                    }
2933
46.1k
                }
2934
53.0k
            }
2935
53.0k
        }
2936
104k
    }
2937
2938
104k
    return;
2939
104k
}
2940
2941
/*!
2942
******************************************************************************
2943
* \if Function name : ihevce_enc_loop_pass \endif
2944
*
2945
* \brief
2946
*    Frame level enc_loop pass function
2947
*
2948
* \param[in] pv_ctxt : pointer to enc_loop module
2949
* \param[in] ps_frm_lamda : Frame level Lambda params
2950
* \param[in] ps_inp  : pointer to input yuv buffer (frame buffer)
2951
* \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
2952
* \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
2953
* \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
2954
* \param[out] ps_cu_out : pointer CU output structure (frame buffer)
2955
* \param[out] ps_tu_out : pointer TU output structure (frame buffer)
2956
* \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
2957
*
2958
* \return
2959
*    None
2960
*
2961
* Note : Currently the frame level calcualtions done assumes that
2962
*        framewidth of the input /recon are excat multiple of ctbsize
2963
*
2964
* \author
2965
*  Ittiam
2966
*
2967
*****************************************************************************
2968
*/
2969
void ihevce_enc_loop_process(
2970
    void *pv_ctxt,
2971
    ihevce_lap_enc_buf_t *ps_curr_inp,
2972
    ctb_analyse_t *ps_ctb_in,
2973
    ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
2974
    recon_pic_buf_t *ps_frm_recon,
2975
    cur_ctb_cu_tree_t *ps_cu_tree_out,
2976
    ctb_enc_loop_out_t *ps_ctb_out,
2977
    cu_enc_loop_out_t *ps_cu_out,
2978
    tu_enc_loop_out_t *ps_tu_out,
2979
    pu_t *ps_pu_out,
2980
    UWORD8 *pu1_frm_ecd_data,
2981
    frm_ctb_ctxt_t *ps_frm_ctb_prms,
2982
    frm_lambda_ctxt_t *ps_frm_lamda,
2983
    multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
2984
    WORD32 thrd_id,
2985
    WORD32 i4_enc_frm_id,
2986
    WORD32 i4_pass)
2987
94.8k
{
2988
94.8k
    WORD32 vert_ctr;
2989
94.8k
    WORD32 tile_col_idx;
2990
94.8k
    iv_enc_yuv_buf_t s_curr_src_bufs;
2991
94.8k
    iv_enc_yuv_buf_t s_curr_recon_bufs;
2992
94.8k
    iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
2993
94.8k
    UWORD32 *pu4_pu_offsets;
2994
94.8k
    WORD32 end_of_frame;
2995
94.8k
    UWORD8 *apu1_y_sub_pel_planes[3];
2996
94.8k
    pad_interp_recon_frm_t s_pad_interp_recon;
2997
94.8k
    ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
2998
2999
94.8k
    ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
3000
3001
94.8k
    WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
3002
3003
    /* initialize the closed loop lambda for the current frame */
3004
94.8k
    ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3005
94.8k
    ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3006
94.8k
    ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
3007
94.8k
    ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
3008
94.8k
    ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
3009
94.8k
    ps_ctxt->thrd_id = thrd_id;
3010
94.8k
    ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
3011
3012
94.8k
#if DISABLE_SAO_WHEN_NOISY
3013
94.8k
    ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
3014
94.8k
    ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
3015
94.8k
#endif
3016
3017
#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
3018
    ps_ctxt->pv_err_func_selector = ps_func_selector;
3019
#endif
3020
3021
94.8k
    ps_ctxt->i4_deblk_pad_hpel_cur_pic =
3022
94.8k
        ps_frm_recon->i4_deblk_pad_hpel_cur_pic ||
3023
7.51k
        ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
3024
3.07k
        ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag;
3025
3026
    /* Share all reference pictures with nbr clients. This flag will be used only
3027
    in case of dist-enc mode */
3028
94.8k
    ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
3029
94.8k
    ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
3030
3031
    /* Register the frame level ssd lamda for both luma and chroma*/
3032
94.8k
    ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
3033
94.8k
    ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
3034
3035
94.8k
    ihevce_populate_cl_cu_lambda_prms(
3036
94.8k
        ps_ctxt,
3037
94.8k
        ps_frm_lamda,
3038
94.8k
        (WORD32)ps_ctxt->i1_slice_type,
3039
94.8k
        ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
3040
94.8k
        ENC_LOOP_LAMBDA_TYPE);
3041
3042
94.8k
    ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
3043
94.8k
                                     (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
3044
31.0k
                                     (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
3045
3046
94.8k
    end_of_frame = 0;
3047
3048
    /* ----------------------------------------------------- */
3049
    /* store the stride and dimensions of source and recon   */
3050
    /* buffer pointers will be over written at every CTB row */
3051
    /* ----------------------------------------------------- */
3052
94.8k
    memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
3053
3054
94.8k
    memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
3055
3056
94.8k
    memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
3057
3058
    /* get the frame level pu offset pointer*/
3059
94.8k
    pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
3060
3061
94.8k
    s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
3062
3063
    /* ------------ Loop over all the CTB rows --------------- */
3064
294k
    while(0 == end_of_frame)
3065
199k
    {
3066
199k
        UWORD8 *pu1_tmp;
3067
199k
        UWORD8 *pu1_row_pu_map;
3068
199k
        UWORD8 *pu1_row_ecd_data;
3069
199k
        ctb_analyse_t *ps_ctb_row_in;
3070
199k
        ctb_enc_loop_out_t *ps_ctb_row_out;
3071
199k
        cu_enc_loop_out_t *ps_row_cu;
3072
199k
        tu_enc_loop_out_t *ps_row_tu;
3073
199k
        pu_t *ps_row_pu;
3074
199k
        pu_col_mv_t *ps_row_col_pu;
3075
199k
        job_queue_t *ps_job;
3076
199k
        UWORD32 *pu4_pu_row_offsets;
3077
199k
        UWORD16 *pu2_num_pu_row;
3078
3079
199k
        ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
3080
199k
        cur_ctb_cu_tree_t *ps_row_cu_tree;
3081
199k
        UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
3082
3083
        /* Get the current row from the job queue */
3084
199k
        ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
3085
199k
            ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
3086
3087
        /* Register the pointer to ctb out of the current frame*/
3088
199k
        ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
3089
3090
        /* If all rows are done, set the end of process flag to 1, */
3091
        /* and the current row to -1 */
3092
199k
        if(NULL == ps_job)
3093
94.8k
        {
3094
94.8k
            vert_ctr = -1;
3095
94.8k
            tile_col_idx = -1;
3096
94.8k
            end_of_frame = 1;
3097
94.8k
        }
3098
104k
        else
3099
104k
        {
3100
104k
            ihevce_tile_params_t *ps_col_tile_params_temp;
3101
104k
            ihevce_tile_params_t *ps_tile_params;
3102
104k
            WORD32 i4_tile_id;
3103
3104
104k
            ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
3105
            /* set the output dependency */
3106
104k
            ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
3107
3108
            /* Obtain the current row's details from the job */
3109
104k
            vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
3110
104k
            {
3111
                /* Obtain the current colum tile index from the job */
3112
104k
                tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
3113
3114
                /* The tile parameter for the col. idx. Use only the properties
3115
                which is same for all the bottom tiles like width, start_x, etc.
3116
                Don't use height, start_y, etc.                                  */
3117
104k
                ps_col_tile_params_temp =
3118
104k
                    ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
3119
3120
                /* Derive actual tile_id based on vert_ctr */
3121
104k
                i4_tile_id =
3122
104k
                    *(ps_frm_ctb_prms->pi4_tile_id_map +
3123
104k
                      vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
3124
104k
                      ps_col_tile_params_temp->i4_first_ctb_x);
3125
                /* Derive pointer to current tile prms */
3126
104k
                ps_tile_params =
3127
104k
                    ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
3128
104k
            }
3129
3130
104k
            ps_ctxt->i4_tile_col_idx = tile_col_idx;
3131
            /* derive the current ctb row pointers */
3132
3133
            /* luma src */
3134
104k
            pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3135
104k
                      (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3136
104k
                       ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3137
104k
                      ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3138
3139
104k
            pu1_tmp +=
3140
104k
                (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
3141
104k
                 ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
3142
3143
104k
            s_curr_src_bufs.pv_y_buf = pu1_tmp;
3144
3145
104k
            if(!ps_ctxt->u1_is_input_data_hbd)
3146
104k
            {
3147
                /* cb src */
3148
104k
                pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3149
104k
                pu1_tmp +=
3150
104k
                    (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3151
104k
                     ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
3152
3153
104k
                s_curr_src_bufs.pv_u_buf = pu1_tmp;
3154
104k
            }
3155
3156
            /* luma recon */
3157
104k
            pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3158
104k
            pu1_tmp +=
3159
104k
                (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3160
3161
104k
            s_curr_recon_bufs.pv_y_buf = pu1_tmp;
3162
104k
            s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3163
104k
            s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3164
104k
            if(!ps_ctxt->u1_is_input_data_hbd)
3165
104k
            {
3166
                /* cb recon */
3167
104k
                pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3168
104k
                pu1_tmp +=
3169
104k
                    (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
3170
104k
                     ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
3171
3172
104k
                s_curr_recon_bufs.pv_u_buf = pu1_tmp;
3173
104k
                s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3174
104k
                s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3175
3176
104k
                s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
3177
3178
                /* Register the source buffer pointers in sao context*/
3179
104k
                ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
3180
104k
                    (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
3181
104k
                    (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
3182
104k
                     ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
3183
104k
                    ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
3184
3185
104k
                ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
3186
104k
                    ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
3187
3188
104k
                ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
3189
104k
                    (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
3190
3191
104k
                ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
3192
104k
                    ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
3193
104k
            }
3194
3195
            /* Subpel planes hxfy, fxhy, hxhy*/
3196
104k
            pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
3197
104k
            pu1_tmp +=
3198
104k
                (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3199
104k
            apu1_y_sub_pel_planes[0] = pu1_tmp;
3200
104k
            s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
3201
3202
104k
            pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
3203
104k
            pu1_tmp +=
3204
104k
                (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3205
104k
            apu1_y_sub_pel_planes[1] = pu1_tmp;
3206
104k
            s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
3207
3208
104k
            pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
3209
104k
            pu1_tmp +=
3210
104k
                (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
3211
104k
            apu1_y_sub_pel_planes[2] = pu1_tmp;
3212
104k
            s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
3213
3214
            /* row level coeffs buffer */
3215
104k
            pu1_row_ecd_data =
3216
104k
                pu1_frm_ecd_data +
3217
104k
                (vert_ctr *
3218
104k
                 ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
3219
104k
                                    : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
3220
104k
                 MAX_SCAN_COEFFS_BYTES_4x4);
3221
3222
            /* Row level CU buffer */
3223
104k
            ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
3224
3225
            /* Row level TU buffer */
3226
104k
            ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
3227
3228
            /* Row level PU buffer */
3229
104k
            ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
3230
3231
            /* Row level colocated PU buffer */
3232
            /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
3233
104k
            ps_row_col_pu =
3234
104k
                ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3235
104k
                                               ps_frm_ctb_prms->i4_num_pus_in_ctb);
3236
            /* Row level col PU map buffer */
3237
            /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
3238
104k
            pu1_row_pu_map =
3239
104k
                ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
3240
104k
                                                ps_frm_ctb_prms->i4_num_pus_in_ctb);
3241
            /* row ctb in pointer  */
3242
104k
            ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3243
3244
            /* row ctb out pointer  */
3245
104k
            ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3246
3247
            /* row number of PUs map pointer */
3248
104k
            pu2_num_pu_row =
3249
104k
                ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3250
3251
            /* row pu offsets pointer  */
3252
104k
            pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
3253
            /* store the first CTB pu offset pointer */
3254
104k
            *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
3255
            /* Initialize ptr to current IPE row */
3256
104k
            ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
3257
3258
            /* Initialize ptr to current row */
3259
104k
            ps_row_cu_tree = ps_cu_tree_out +
3260
104k
                             (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
3261
3262
            /* Get the EncLoop Top-Right CU Dep Mngr */
3263
104k
            ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
3264
104k
                ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
3265
104k
                                                                   [i4_bitrate_instance_num];
3266
            /* Get the EncLoop Deblock Dep Mngr */
3267
104k
            ps_ctxt->pv_dep_mngr_enc_loop_dblk =
3268
104k
                ps_master_ctxt
3269
104k
                    ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3270
            /* Get the EncLoop Sao Dep Mngr */
3271
104k
            ps_ctxt->pv_dep_mngr_enc_loop_sao =
3272
104k
                ps_master_ctxt
3273
104k
                    ->aapv_dep_mngr_enc_loop_sao[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
3274
3275
104k
            ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
3276
3277
104k
            {
3278
                /* derive the pointers of top row buffers */
3279
104k
                ps_ctxt->pv_top_row_luma =
3280
104k
                    (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3281
104k
                    (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3282
104k
                    (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
3283
3284
104k
                ps_ctxt->pv_top_row_chroma =
3285
104k
                    (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3286
104k
                    (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3287
104k
                    (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
3288
3289
                /* derive the pointers of bottom row buffers to update current row data */
3290
104k
                ps_ctxt->pv_bot_row_luma =
3291
104k
                    (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
3292
104k
                    (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
3293
104k
                    (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
3294
3295
104k
                ps_ctxt->pv_bot_row_chroma =
3296
104k
                    (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
3297
104k
                    (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
3298
104k
                    (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
3299
3300
                /* Register the buffer pointers in sao context*/
3301
104k
                ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
3302
104k
                    (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
3303
104k
                ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
3304
104k
                    ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
3305
3306
104k
                ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
3307
104k
                    (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
3308
104k
                ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
3309
104k
                    ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
3310
3311
104k
                ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
3312
3313
104k
                ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
3314
104k
                    ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
3315
3316
104k
                ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
3317
104k
                    ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
3318
104k
            }
3319
3320
104k
            ps_ctxt->ps_top_row_nbr =
3321
104k
                ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3322
104k
                (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3323
104k
                (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
3324
3325
104k
            ps_ctxt->ps_bot_row_nbr =
3326
104k
                ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
3327
104k
                (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
3328
104k
                (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
3329
3330
104k
            if(vert_ctr > 0)
3331
9.64k
            {
3332
9.64k
                ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
3333
9.64k
            }
3334
94.8k
            else
3335
94.8k
            {
3336
94.8k
                ps_ctxt->pu1_top_rt_cabac_state = NULL;
3337
94.8k
            }
3338
3339
104k
            ASSERT(
3340
104k
                ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
3341
104k
                    .ps_pps->i1_sign_data_hiding_flag ==
3342
104k
                ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
3343
104k
                    .ps_pps->i1_sign_data_hiding_flag);
3344
3345
            /* call the row level processing function */
3346
104k
            ihevce_enc_loop_process_row(
3347
104k
                ps_ctxt,
3348
104k
                &s_curr_src_bufs,
3349
104k
                &s_curr_recon_bufs,
3350
104k
                &s_curr_recon_bufs_src,
3351
104k
                &apu1_y_sub_pel_planes[0],
3352
104k
                ps_ctb_row_in,
3353
104k
                ps_ctb_row_out,
3354
104k
                ps_row_ipe_analyse,
3355
104k
                ps_row_cu_tree,
3356
104k
                ps_row_cu,
3357
104k
                ps_row_tu,
3358
104k
                ps_row_pu,
3359
104k
                ps_row_col_pu,
3360
104k
                pu2_num_pu_row,
3361
104k
                pu1_row_pu_map,
3362
104k
                pu1_row_ecd_data,
3363
104k
                pu4_pu_row_offsets,
3364
104k
                ps_frm_ctb_prms,
3365
104k
                vert_ctr,
3366
104k
                ps_frm_recon,
3367
104k
                ps_ctxt->pv_dep_mngr_encloop_dep_me,
3368
104k
                &s_pad_interp_recon,
3369
104k
                i4_pass,
3370
104k
                ps_multi_thrd_ctxt,
3371
104k
                ps_tile_params);
3372
104k
        }
3373
199k
    }
3374
94.8k
}
3375
3376
/*!
3377
******************************************************************************
3378
* \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
3379
*
3380
* \brief Returns to the caller key attributes relevant for dependency manager,
3381
*        ie, the number of vertical units in l0 layer
3382
*
3383
* \par Description:
3384
*
3385
* \param[in] pai4_ht    : ht
3386
* \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
3387
*                                         for deblocking
3388
*
3389
* \return
3390
*    None
3391
*
3392
* \author
3393
*  Ittiam
3394
*
3395
*****************************************************************************
3396
*/
3397
void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
3398
21.4k
{
3399
    /* Blk ht at a given layer*/
3400
21.4k
    WORD32 unit_ht_c;
3401
21.4k
    WORD32 ctb_size = 64;
3402
3403
    /* compute blk ht and unit ht */
3404
21.4k
    unit_ht_c = ctb_size;
3405
3406
    /* set the numebr of vertical units */
3407
21.4k
    *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
3408
21.4k
}
3409
3410
/*!
3411
******************************************************************************
3412
* \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
3413
*
3414
* \brief
3415
*    Number of memory records are returned for enc_loop module
3416
* Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
3417
*
3418
* \return
3419
*    None
3420
*
3421
* \author
3422
*  Ittiam
3423
*
3424
*****************************************************************************
3425
*/
3426
WORD32
3427
    ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
3428
7.14k
{
3429
7.14k
    WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
3430
7.14k
    WORD32 enc_loop_dblk_dep_mngr_mem_recs =
3431
7.14k
        i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3432
7.14k
    WORD32 enc_loop_sao_dep_mngr_mem_recs =
3433
7.14k
        i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3434
7.14k
    WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
3435
7.14k
        i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
3436
7.14k
    WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
3437
7.14k
        i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
3438
3439
7.14k
    return (
3440
7.14k
        (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs + enc_loop_sao_dep_mngr_mem_recs +
3441
7.14k
         enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
3442
7.14k
}
3443
/*!
3444
******************************************************************************
3445
* \if Function name : ihevce_enc_loop_get_mem_recs \endif
3446
*
3447
* \brief
3448
*    Memory requirements are returned for ENC_LOOP.
3449
*
3450
* \param[in,out]  ps_mem_tab : pointer to memory descriptors table
3451
* \param[in] ps_init_prms : Create time static parameters
3452
* \param[in] i4_num_proc_thrds : Number of processing threads for this module
3453
* \param[in] i4_mem_space : memspace in whihc memory request should be done
3454
*
3455
* \return
3456
*    None
3457
*
3458
* \author
3459
*  Ittiam
3460
*
3461
*****************************************************************************
3462
*/
3463
WORD32 ihevce_enc_loop_get_mem_recs(
3464
    iv_mem_rec_t *ps_mem_tab,
3465
    ihevce_static_cfg_params_t *ps_init_prms,
3466
    WORD32 i4_num_proc_thrds,
3467
    WORD32 i4_num_bitrate_inst,
3468
    WORD32 i4_num_enc_loop_frm_pllel,
3469
    WORD32 i4_mem_space,
3470
    WORD32 i4_resolution_id)
3471
3.57k
{
3472
3.57k
    UWORD32 u4_width, u4_height, n_tabs;
3473
3.57k
    UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
3474
3.57k
    WORD32 ctr;
3475
3.57k
    WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
3476
3477
    /* derive frame dimensions */
3478
    /*width of the input YUV to be encoded */
3479
3.57k
    u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
3480
    /*making the width a multiple of CTB size*/
3481
3.57k
    u4_width += SET_CTB_ALIGN(
3482
3.57k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
3483
3484
    /*height of the input YUV to be encoded */
3485
3.57k
    u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
3486
    /*making the height a multiple of CTB size*/
3487
3.57k
    u4_height += SET_CTB_ALIGN(
3488
3.57k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
3489
3.57k
    u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
3490
3.57k
    u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
3491
    /* memories should be requested assuming worst case requirememnts */
3492
3493
    /* Module context structure */
3494
3.57k
    ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
3495
3496
3.57k
    ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3497
3498
3.57k
    ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
3499
3500
    /* Thread context structure */
3501
3.57k
    ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
3502
3.57k
        i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
3503
3504
3.57k
    ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3505
3506
3.57k
    ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
3507
3508
    /* Scale matrices */
3509
3.57k
    ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3510
3511
3.57k
    ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3512
3513
3.57k
    ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
3514
3515
    /* Rescale matrices */
3516
3.57k
    ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
3517
3518
3.57k
    ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3519
3520
3.57k
    ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
3521
3522
    /* top row luma one row of pixel data per CTB row */
3523
3.57k
    if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3524
0
    {
3525
0
        ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3526
0
                                                    (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
3527
0
                                                    i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3528
0
    }
3529
3.57k
    else
3530
3.57k
    {
3531
3.57k
        ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
3532
3.57k
                                                    (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
3533
3.57k
                                                    i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3534
3.57k
    }
3535
3536
3.57k
    ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3537
3538
3.57k
    ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
3539
3540
    /* top row chroma */
3541
3.57k
    if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
3542
0
    {
3543
0
        ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3544
0
            (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
3545
0
            i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3546
0
    }
3547
3.57k
    else
3548
3.57k
    {
3549
3.57k
        ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
3550
3.57k
            (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
3551
3.57k
            i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3552
3.57k
    }
3553
3554
3.57k
    ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3555
3556
3.57k
    ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
3557
3558
    /* top row neighbour 4x4 */
3559
3.57k
    ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
3560
3.57k
        (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
3561
3.57k
        i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
3562
3563
3.57k
    ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3564
3565
3.57k
    ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
3566
3567
    /* memory to dump rate control parameters by each thread for each bit-rate instance */
3568
    /* RC params collated by each thread for each bit-rate instance separately */
3569
3.57k
    ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
3570
3.57k
                                                 i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
3571
3572
3.57k
    ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3573
3574
3.57k
    ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
3575
    /* Memory required for deblocking */
3576
3.57k
    {
3577
        /* Memory to store Qp of top4x4 blocks for each CTB row.
3578
        This memory is allocated at frame level and shared across
3579
        all cores. The Qp values are needed to form Qp-map(described
3580
        in the ENC_LOOP_DEBLOCKING section below)*/
3581
3582
3.57k
        UWORD32 u4_size_bs_memory, u4_size_qp_memory;
3583
3.57k
        UWORD32 u4_size_top_4x4_qp_memory;
3584
3585
        /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
3586
        /*Space required per CTB*/
3587
3.57k
        u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
3588
        /*Space required for entire CTB row*/
3589
3.57k
        u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
3590
        /*Space required for entire frame*/
3591
3.57k
        u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
3592
        /*Space required for multiple bitrate*/
3593
3.57k
        u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
3594
        /*Space required for multiple frames in parallel*/
3595
3.57k
        u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
3596
3597
3.57k
        ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
3598
3.57k
        ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3599
3.57k
        ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
3600
3601
        /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
3602
        ## Boundary Strength(Vertical):
3603
        BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
3604
        of the row followed by 8 entries of second CTB and so on.
3605
        8 entries: Includes left edge of current CTB and excludes right edge.
3606
        ## Boundary Strength(Horizontal):
3607
        Same as Vertical.
3608
        8 entries:  Includes top edge of current CTB and excludes bottom edge.
3609
3610
        ## Qp-map storage:
3611
        T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
3612
        00 01 02 03 04 05 ..........to the end of the CTB row
3613
        10 11 12 13 14 15 ..........to the end of the CTB row
3614
        20 21 22 23 24 25 ..........to the end of the CTB row
3615
        30 31 32 33 34 35 ..........to the end of the CTB row
3616
        40 41 42 43 44 45 ..........to the end of the CTB row
3617
        ............................to the end of the CTB row
3618
        upto height_of_CTB..........to the end of the CTB row
3619
3620
        Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
3621
        A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
3622
        where,
3623
        => height_of_CTB = number of 4x4 blocks in a CTB  vertically,
3624
        => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
3625
        in order to deblock top edge of current CTB.
3626
        => width_of_CTB  = number of 4x4 blocks in a CTB  horizontally,
3627
        */
3628
3629
        /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
3630
        /*1 vertical edge per 8 pixel*/
3631
3.57k
        u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
3632
        /*Vertical edges for entire width of CTB row*/
3633
3.57k
        u4_size_bs_memory *= u4_ctb_in_a_row;
3634
        /*Each vertical edge of CTB row is 4 bytes*/
3635
3.57k
        u4_size_bs_memory = u4_size_bs_memory << 2;
3636
        /*Adding Memory required for storing horizontal BS by doubling*/
3637
3.57k
        u4_size_bs_memory = u4_size_bs_memory << 1;
3638
3639
        /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
3640
        /*Number of 4x4 blocks in the width of a CTB*/
3641
3.57k
        u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
3642
        /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
3643
        4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
3644
3.57k
        u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
3645
        /*Storage for entire CTB row*/
3646
3.57k
        u4_size_qp_memory *= u4_ctb_in_a_row;
3647
3648
        /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
3649
3.57k
        ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
3650
3.57k
            i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
3651
3652
3.57k
        ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3653
3654
3.57k
        ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
3655
3.57k
    }
3656
3657
    /* Memory required to store pred for 422 chroma */
3658
3.57k
    ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
3659
3.57k
        i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
3660
3.57k
        (i4_chroma_format == IV_YUV_422SP_UV) *
3661
3.57k
        ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3662
3663
3.57k
    ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3664
3665
3.57k
    ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
3666
3667
    /* Memory for inter pred buffers */
3668
3.57k
    {
3669
3.57k
        WORD32 i4_num_bufs_per_thread = 0;
3670
3671
3.57k
        WORD32 i4_buf_size_per_cand =
3672
3.57k
            (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
3673
3.57k
            ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3674
3.57k
        WORD32 i4_quality_preset =
3675
3.57k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3676
3.57k
        switch(i4_quality_preset)
3677
3.57k
        {
3678
1.72k
        case IHEVCE_QUALITY_P0:
3679
1.72k
        {
3680
1.72k
            i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
3681
1.72k
            break;
3682
0
        }
3683
280
        case IHEVCE_QUALITY_P2:
3684
280
        {
3685
280
            i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
3686
280
            break;
3687
0
        }
3688
396
        case IHEVCE_QUALITY_P3:
3689
396
        {
3690
396
            i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
3691
396
            break;
3692
0
        }
3693
253
        case IHEVCE_QUALITY_P4:
3694
253
        {
3695
253
            i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
3696
253
            break;
3697
0
        }
3698
299
        case IHEVCE_QUALITY_P5:
3699
677
        case IHEVCE_QUALITY_P6:
3700
919
        case IHEVCE_QUALITY_P7:
3701
919
        {
3702
919
            i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
3703
919
            break;
3704
677
        }
3705
0
        default:
3706
0
        {
3707
0
            ASSERT(0);
3708
0
        }
3709
3.57k
        }
3710
3711
3.57k
        i4_num_bufs_per_thread += 4;
3712
3713
3.57k
        ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
3714
3.57k
            i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
3715
3716
3.57k
        ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3717
3718
3.57k
        ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
3719
3.57k
    }
3720
3721
    /* Memory required to store chroma intra pred */
3722
0
    ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
3723
3.57k
        i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
3724
3.57k
        ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3725
3.57k
        ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3726
3727
3.57k
    ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3728
3729
3.57k
    ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
3730
3731
    /* Memory required to store pred for reference substitution output */
3732
    /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3733
       allocate 16 bytes to the left and 7 bytes to the right to facilitate
3734
       SIMD access */
3735
3.57k
    ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
3736
3.57k
        i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3737
3.57k
        + INTRAPRED_SIMD_LEFT_PADDING)*
3738
3.57k
        ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3739
3740
3.57k
    ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3741
3742
3.57k
    ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
3743
3744
    /* Memory required to store pred for reference filtering output */
3745
    /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed,
3746
       allocate 16 bytes to the left and 7 bytes to the right to facilitate
3747
       SIMD access */
3748
3.57k
    ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
3749
3.57k
        i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING)
3750
3.57k
        + INTRAPRED_SIMD_LEFT_PADDING)*
3751
3.57k
        ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3752
3753
3.57k
    ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3754
3755
3.57k
    ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
3756
3757
3.57k
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3758
3.57k
    if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3759
1.72k
#endif
3760
1.72k
    {
3761
        /* Memory assignments for recon storage during CU Recursion */
3762
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
3763
1.72k
            i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3764
1.72k
            ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3765
3766
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3767
3768
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3769
3770
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
3771
1.72k
            i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3772
1.72k
            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3773
1.72k
            ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3774
3775
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3776
3777
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3778
1.72k
    }
3779
1.84k
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3780
1.84k
    else
3781
1.84k
    {
3782
        /* Memory assignments for recon storage during CU Recursion */
3783
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
3784
3785
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3786
3787
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
3788
3789
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
3790
3791
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3792
3793
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
3794
1.84k
    }
3795
3.57k
#endif
3796
3797
3.57k
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3798
3.57k
    if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
3799
1.72k
#endif
3800
1.72k
    {
3801
        /* Memory assignments for pred storage during CU Recursion */
3802
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
3803
1.72k
            i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
3804
1.72k
            ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3805
3806
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3807
3808
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3809
3810
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
3811
1.72k
            i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
3812
1.72k
            ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
3813
1.72k
            ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3814
3815
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3816
3817
1.72k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3818
1.72k
    }
3819
1.84k
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
3820
1.84k
    else
3821
1.84k
    {
3822
        /* Memory assignments for pred storage during CU Recursion */
3823
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
3824
3825
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3826
3827
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
3828
3829
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
3830
3831
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3832
3833
1.84k
        ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
3834
1.84k
    }
3835
3.57k
#endif
3836
3837
    /* Memory assignments for CTB left luma data storage */
3838
3.57k
    ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
3839
3.57k
        i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3840
3.57k
        ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3841
3842
3.57k
    ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3843
3844
3.57k
    ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
3845
3846
    /* Memory assignments for CTB left chroma data storage */
3847
3.57k
    ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
3848
3.57k
        i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
3849
3.57k
        ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
3850
3.57k
    ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
3851
3.57k
        ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
3852
3853
3.57k
    ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3854
3855
3.57k
    ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
3856
3857
    /* Memory required for SAO */
3858
3.57k
    {
3859
3.57k
        WORD32 num_vert_units;
3860
3.57k
        WORD32 num_horz_units;
3861
3.57k
        WORD32 ctb_aligned_ht, ctb_aligned_wd;
3862
3.57k
        WORD32 luma_buf, chroma_buf;
3863
3864
3.57k
        num_vert_units = u4_height / MAX_CTB_SIZE;
3865
3.57k
        num_horz_units = u4_width / MAX_CTB_SIZE;
3866
3867
3.57k
        ctb_aligned_ht = u4_height;
3868
3.57k
        ctb_aligned_wd = u4_width;
3869
3870
        /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
3871
        * and 1 extra location is required for top left buf ptr for row 0
3872
        * Also 1 extra byte is required for every row for top left pixel if
3873
        * the top left ptr is to be passed to leaf level unconditionally
3874
        */
3875
3.57k
        luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
3876
3.57k
                   ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3877
3.57k
        chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
3878
3.57k
                     ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3879
3880
3.57k
        ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
3881
3.57k
            (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
3882
3883
        /* Add the memory required to store the sao information of top ctb for top merge
3884
        * This is frame level buffer.
3885
        */
3886
3.57k
        ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
3887
3.57k
            ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
3888
3.57k
            (i4_num_enc_loop_frm_pllel);
3889
3890
3.57k
        ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3891
3892
3.57k
        ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
3893
3.57k
    }
3894
3895
    /* Memory for CU level Coeff data buffer */
3896
3.57k
    {
3897
        /* 16 additional bytes are required to ensure alignment */
3898
3.57k
        {
3899
3.57k
            ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
3900
3.57k
                i4_num_proc_thrds *
3901
3.57k
                (((MAX_LUMA_COEFFS_CTB +
3902
3.57k
                   (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
3903
3.57k
                  16) *
3904
3.57k
                 (2) * sizeof(UWORD8));
3905
3.57k
        }
3906
3907
3.57k
        ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3908
3909
3.57k
        ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
3910
3911
3.57k
        ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
3912
3.57k
            i4_num_proc_thrds *
3913
3.57k
            (MAX_LUMA_COEFFS_CTB +
3914
3.57k
             (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
3915
3.57k
            sizeof(UWORD8);
3916
3917
3.57k
        ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3918
3919
3.57k
        ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
3920
3.57k
    }
3921
3922
    /* Memory for CU dequant data buffer */
3923
3.57k
    {
3924
        /* 16 additional bytes are required to ensure alignment */
3925
3.57k
        {
3926
3.57k
            ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
3927
3.57k
                i4_num_proc_thrds *
3928
3.57k
                (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
3929
3.57k
                                                        : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
3930
3.57k
                 8) *
3931
3.57k
                (2) * sizeof(WORD16);
3932
3.57k
        }
3933
3934
3.57k
        ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
3935
3936
3.57k
        ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
3937
3.57k
    }
3938
3939
    /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
3940
3.57k
    {
3941
3.57k
        WORD32 i4_memSize_perThread;
3942
3943
3.57k
        WORD32 i4_chroma_memSize_perThread = 0;
3944
        /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3945
        /* used in RDOPT to store cur and best modes' data */
3946
3.57k
        WORD32 i4_luma_memSize_perThread =
3947
3.57k
            4 * MAX_CU_SIZE * MAX_CU_SIZE *
3948
3.57k
            ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3949
3950
        /* 'Glossary' for comments in the following codeBlock */
3951
        /* 1 - 2 Bufs for storing recons of the best modes determined in the */
3952
        /* function 'ihevce_intra_chroma_pred_mode_selector' */
3953
        /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
3954
        /* used in RDOPT to store cur and best modes' data */
3955
3.57k
        if(i4_chroma_format == IV_YUV_422SP_UV)
3956
0
        {
3957
0
            WORD32 i4_quality_preset =
3958
0
                ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
3959
0
            switch(i4_quality_preset)
3960
0
            {
3961
0
            case IHEVCE_QUALITY_P0:
3962
0
            {
3963
                /* 1 */
3964
0
                i4_chroma_memSize_perThread +=
3965
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
3966
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3967
3968
                /* 2 */
3969
0
                i4_chroma_memSize_perThread +=
3970
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
3971
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3972
3973
0
                break;
3974
0
            }
3975
0
            case IHEVCE_QUALITY_P2:
3976
0
            {
3977
                /* 1 */
3978
0
                i4_chroma_memSize_perThread +=
3979
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
3980
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3981
3982
                /* 2 */
3983
0
                i4_chroma_memSize_perThread +=
3984
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
3985
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3986
3987
0
                break;
3988
0
            }
3989
0
            case IHEVCE_QUALITY_P3:
3990
0
            {
3991
                /* 1 */
3992
0
                i4_chroma_memSize_perThread +=
3993
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
3994
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
3995
3996
                /* 2 */
3997
0
                i4_chroma_memSize_perThread +=
3998
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
3999
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4000
4001
0
                break;
4002
0
            }
4003
0
            case IHEVCE_QUALITY_P4:
4004
0
            {
4005
                /* 1 */
4006
0
                i4_chroma_memSize_perThread +=
4007
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4008
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4009
4010
                /* 2 */
4011
0
                i4_chroma_memSize_perThread +=
4012
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4013
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4014
4015
0
                break;
4016
0
            }
4017
0
            case IHEVCE_QUALITY_P5:
4018
0
            {
4019
                /* 1 */
4020
0
                i4_chroma_memSize_perThread +=
4021
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4022
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4023
4024
                /* 2 */
4025
0
                i4_chroma_memSize_perThread +=
4026
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4027
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4028
4029
0
                break;
4030
0
            }
4031
0
            case IHEVCE_QUALITY_P6:
4032
0
            case IHEVCE_QUALITY_P7:
4033
0
            {
4034
                /* 1 */
4035
0
                i4_chroma_memSize_perThread +=
4036
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4037
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4038
4039
                /* 2 */
4040
0
                i4_chroma_memSize_perThread +=
4041
0
                    2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4042
0
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4043
4044
0
                break;
4045
0
            }
4046
0
            }
4047
0
        }
4048
3.57k
        else
4049
3.57k
        {
4050
3.57k
            WORD32 i4_quality_preset =
4051
3.57k
                ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4052
3.57k
            switch(i4_quality_preset)
4053
3.57k
            {
4054
1.72k
            case IHEVCE_QUALITY_P0:
4055
1.72k
            {
4056
                /* 1 */
4057
1.72k
                i4_chroma_memSize_perThread +=
4058
1.72k
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
4059
1.72k
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4060
4061
                /* 2 */
4062
1.72k
                i4_chroma_memSize_perThread +=
4063
1.72k
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4064
1.72k
                    ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
4065
1.72k
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4066
4067
1.72k
                break;
4068
0
            }
4069
280
            case IHEVCE_QUALITY_P2:
4070
280
            {
4071
                /* 1 */
4072
280
                i4_chroma_memSize_perThread +=
4073
280
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
4074
280
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4075
4076
                /* 2 */
4077
280
                i4_chroma_memSize_perThread +=
4078
280
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4079
280
                    ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
4080
280
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4081
4082
280
                break;
4083
0
            }
4084
396
            case IHEVCE_QUALITY_P3:
4085
396
            {
4086
                /* 1 */
4087
396
                i4_chroma_memSize_perThread +=
4088
396
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
4089
396
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4090
4091
                /* 2 */
4092
396
                i4_chroma_memSize_perThread +=
4093
396
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4094
396
                    ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
4095
396
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4096
4097
396
                break;
4098
0
            }
4099
253
            case IHEVCE_QUALITY_P4:
4100
253
            {
4101
                /* 1 */
4102
253
                i4_chroma_memSize_perThread +=
4103
253
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
4104
253
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4105
4106
                /* 2 */
4107
253
                i4_chroma_memSize_perThread +=
4108
253
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4109
253
                    ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
4110
253
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4111
4112
253
                break;
4113
0
            }
4114
299
            case IHEVCE_QUALITY_P5:
4115
299
            {
4116
                /* 1 */
4117
299
                i4_chroma_memSize_perThread +=
4118
299
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
4119
299
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4120
4121
                /* 2 */
4122
299
                i4_chroma_memSize_perThread +=
4123
299
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4124
299
                    ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
4125
299
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4126
4127
299
                break;
4128
0
            }
4129
378
            case IHEVCE_QUALITY_P6:
4130
620
            case IHEVCE_QUALITY_P7:
4131
620
            {
4132
                /* 1 */
4133
620
                i4_chroma_memSize_perThread +=
4134
620
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
4135
620
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4136
4137
                /* 2 */
4138
620
                i4_chroma_memSize_perThread +=
4139
620
                    2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
4140
620
                    ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
4141
620
                    ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
4142
4143
620
                break;
4144
378
            }
4145
3.57k
            }
4146
3.57k
        }
4147
4148
3.57k
        i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
4149
4150
3.57k
        ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
4151
3.57k
            i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
4152
4153
3.57k
        ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
4154
4155
3.57k
        ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
4156
3.57k
    }
4157
4158
0
    n_tabs = NUM_ENC_LOOP_MEM_RECS;
4159
4160
    /*************************************************************************/
4161
    /* --- EncLoop Deblock and SAO sync Dep Mngr Mem requests --                     */
4162
    /*************************************************************************/
4163
4164
    /* Fill the memtabs for  EncLoop Deblock Dep Mngr */
4165
3.57k
    {
4166
3.57k
        WORD32 count;
4167
3.57k
        WORD32 num_vert_units;
4168
3.57k
        WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4169
4170
3.57k
        ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4171
3.57k
        ASSERT(num_vert_units > 0);
4172
7.14k
        for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4173
3.57k
        {
4174
7.14k
            for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4175
3.57k
            {
4176
3.57k
                n_tabs += ihevce_dmgr_get_mem_recs(
4177
3.57k
                    &ps_mem_tab[n_tabs],
4178
3.57k
                    DEP_MNGR_ROW_ROW_SYNC,
4179
3.57k
                    num_vert_units,
4180
3.57k
                    ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4181
3.57k
                    i4_num_proc_thrds,
4182
3.57k
                    i4_mem_space);
4183
3.57k
            }
4184
3.57k
        }
4185
4186
        /* Fill the memtabs for  EncLoop SAO Dep Mngr */
4187
7.14k
        for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4188
3.57k
        {
4189
7.14k
            for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4190
3.57k
            {
4191
3.57k
                n_tabs += ihevce_dmgr_get_mem_recs(
4192
3.57k
                    &ps_mem_tab[n_tabs],
4193
3.57k
                    DEP_MNGR_ROW_ROW_SYNC,
4194
3.57k
                    num_vert_units,
4195
3.57k
                    ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4196
3.57k
                    i4_num_proc_thrds,
4197
3.57k
                    i4_mem_space);
4198
3.57k
            }
4199
3.57k
        }
4200
3.57k
    }
4201
4202
    /*************************************************************************/
4203
    /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests --                */
4204
    /*************************************************************************/
4205
4206
    /* Fill the memtabs for  Top-Right CU sync Dep Mngr */
4207
3.57k
    {
4208
3.57k
        WORD32 count;
4209
3.57k
        WORD32 num_vert_units;
4210
3.57k
        WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4211
3.57k
        ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4212
3.57k
        ASSERT(num_vert_units > 0);
4213
4214
7.14k
        for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4215
3.57k
        {
4216
7.14k
            for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
4217
3.57k
            {
4218
3.57k
                n_tabs += ihevce_dmgr_get_mem_recs(
4219
3.57k
                    &ps_mem_tab[n_tabs],
4220
3.57k
                    DEP_MNGR_ROW_ROW_SYNC,
4221
3.57k
                    num_vert_units,
4222
3.57k
                    ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4223
3.57k
                    i4_num_proc_thrds,
4224
3.57k
                    i4_mem_space);
4225
3.57k
            }
4226
3.57k
        }
4227
3.57k
    }
4228
4229
    /*************************************************************************/
4230
    /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests --        */
4231
    /*************************************************************************/
4232
4233
    /* Fill the memtabs for  EncLoop Aux. on Ref. bitrate Dep Mngr */
4234
3.57k
    {
4235
3.57k
        WORD32 count;
4236
3.57k
        WORD32 num_vert_units;
4237
3.57k
        WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4238
4239
3.57k
        ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
4240
3.57k
        ASSERT(num_vert_units > 0);
4241
4242
7.14k
        for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
4243
3.57k
        {
4244
3.57k
            for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
4245
0
            {
4246
0
                n_tabs += ihevce_dmgr_get_mem_recs(
4247
0
                    &ps_mem_tab[n_tabs],
4248
0
                    DEP_MNGR_ROW_ROW_SYNC,
4249
0
                    num_vert_units,
4250
0
                    ps_init_prms->s_app_tile_params.i4_num_tile_cols,
4251
0
                    i4_num_proc_thrds,
4252
0
                    i4_mem_space);
4253
0
            }
4254
3.57k
        }
4255
3.57k
    }
4256
4257
3.57k
    return (n_tabs);
4258
3.57k
}
4259
4260
/*!
4261
******************************************************************************
4262
* \if Function name : ihevce_enc_loop_init \endif
4263
*
4264
* \brief
4265
*    Intialization for ENC_LOOP context state structure .
4266
*
4267
* \param[in] ps_mem_tab : pointer to memory descriptors table
4268
* \param[in] ps_init_prms : Create time static parameters
4269
* \param[in] pv_osal_handle : Osal handle
4270
*
4271
* \return
4272
*    None
4273
*
4274
* \author
4275
*  Ittiam
4276
*
4277
*****************************************************************************
4278
*/
4279
void *ihevce_enc_loop_init(
4280
    iv_mem_rec_t *ps_mem_tab,
4281
    ihevce_static_cfg_params_t *ps_init_prms,
4282
    WORD32 i4_num_proc_thrds,
4283
    void *pv_osal_handle,
4284
    func_selector_t *ps_func_selector,
4285
    rc_quant_t *ps_rc_quant_ctxt,
4286
    ihevce_tile_params_t *ps_tile_params_base,
4287
    WORD32 i4_resolution_id,
4288
    WORD32 i4_num_enc_loop_frm_pllel,
4289
    UWORD8 u1_is_popcnt_available)
4290
3.57k
{
4291
3.57k
    ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
4292
3.57k
    ihevce_enc_loop_ctxt_t *ps_ctxt;
4293
3.57k
    WORD32 ctr, n_tabs;
4294
3.57k
    UWORD32 u4_width, u4_height;
4295
3.57k
    UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
4296
3.57k
    UWORD32 u4_size_bs_memory, u4_size_qp_memory;
4297
3.57k
    UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
4298
3.57k
    WORD32 i;
4299
3.57k
    WORD32 i4_num_bitrate_inst =
4300
3.57k
        ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
4301
3.57k
    enc_loop_rc_params_t *ps_enc_loop_rc_params;
4302
3.57k
    UWORD8 *pu1_sao_base; /* store the base address of sao*/
4303
3.57k
    UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
4304
3.57k
    WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
4305
3.57k
    WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
4306
3.57k
    WORD32 i4_enc_frm_id;
4307
3.57k
    WORD32 num_cu_in_ctb;
4308
3.57k
    WORD32 i4_num_tile_cols = 1;  //Default value is 1
4309
4310
    /* ENC_LOOP state structure */
4311
3.57k
    ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
4312
4313
3.57k
    ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
4314
4315
3.57k
    ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
4316
3.57k
    ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
4317
3.57k
    ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
4318
    /*Calculation of memory sizes for deblocking*/
4319
3.57k
    {
4320
        /*width of the input YUV to be encoded. */
4321
3.57k
        u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
4322
        /*making the width a multiple of CTB size*/
4323
3.57k
        u4_width += SET_CTB_ALIGN(
4324
3.57k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
4325
4326
3.57k
        u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
4327
4328
        /*height of the input YUV to be encoded */
4329
3.57k
        u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
4330
        /*making the height a multiple of CTB size*/
4331
3.57k
        u4_height += SET_CTB_ALIGN(
4332
3.57k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
4333
4334
3.57k
        u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
4335
4336
        /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
4337
        /*1 vertical edge per 8 pixel*/
4338
3.57k
        u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
4339
        /*Vertical edges for entire width of CTB row*/
4340
3.57k
        u4_size_bs_memory *= u4_ctb_in_a_row;
4341
        /*Each vertical edge of CTB row is 4 bytes*/
4342
3.57k
        u4_size_bs_memory = u4_size_bs_memory << 2;
4343
        /*Adding Memory required for storing horizontal BS by doubling*/
4344
3.57k
        u4_size_bs_memory = u4_size_bs_memory << 1;
4345
4346
        /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
4347
        /*Number of 4x4 blocks in the width of a CTB*/
4348
3.57k
        u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
4349
        /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
4350
        4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
4351
3.57k
        u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
4352
        /*Storage for entire CTB row*/
4353
3.57k
        u4_size_qp_memory *= u4_ctb_in_a_row;
4354
4355
3.57k
        pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
4356
3.57k
    }
4357
4358
    /*Derive the base pointer of sao*/
4359
3.57k
    pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
4360
3.57k
    ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4361
3.57k
    u4_ctb_aligned_wd = u4_width;
4362
3.57k
    u4_ctb_aligned_ht = u4_height;
4363
3.57k
    num_vert_units = (u4_height) / ctb_size;
4364
4365
7.14k
    for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
4366
3.57k
    {
4367
3.57k
        ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
4368
        /* Store Tile params base into EncLoop context */
4369
3.57k
        ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
4370
3.57k
        ihevce_cmn_utils_instr_set_router(
4371
3.57k
            &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
4372
3.57k
        ihevce_sifter_sad_fxn_assigner(
4373
3.57k
            (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
4374
3.57k
        ps_ctxt->i4_max_search_range_horizontal =
4375
3.57k
            ps_init_prms->s_config_prms.i4_max_search_range_horz;
4376
3.57k
        ps_ctxt->i4_max_search_range_vertical =
4377
3.57k
            ps_init_prms->s_config_prms.i4_max_search_range_vert;
4378
4379
3.57k
        ps_ctxt->i4_quality_preset =
4380
3.57k
            ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
4381
4382
3.57k
        if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
4383
242
        {
4384
242
            ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
4385
242
        }
4386
4387
3.57k
        ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
4388
4389
3.57k
        ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
4390
4391
3.57k
        ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
4392
4393
3.57k
        ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
4394
4395
3.57k
        ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
4396
4397
3.57k
        ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
4398
4399
3.57k
        if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
4400
1.72k
        {
4401
1.72k
            ps_ctxt->i4_use_ctb_level_lamda = 0;
4402
1.72k
        }
4403
1.84k
        else
4404
1.84k
        {
4405
1.84k
            ps_ctxt->i4_use_ctb_level_lamda = 0;
4406
1.84k
        }
4407
4408
        /** Register the function selector pointer*/
4409
3.57k
        ps_ctxt->ps_func_selector = ps_func_selector;
4410
4411
3.57k
        ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
4412
4413
        /* Initiallization for non-distributed mode */
4414
3.57k
        ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
4415
3.57k
        ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
4416
3.57k
        ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
4417
3.57k
        ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
4418
4419
3.57k
        ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
4420
3.57k
        ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
4421
4422
3.57k
        ps_ctxt->i4_frm_top_row_luma_size =
4423
3.57k
            ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
4424
4425
3.57k
        ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
4426
4427
3.57k
        ps_ctxt->i4_frm_top_row_chroma_size =
4428
3.57k
            ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
4429
4430
3.57k
        {
4431
7.14k
            for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4432
3.57k
            {
4433
                /* +1 is to provision top left pel */
4434
3.57k
                ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4435
3.57k
                    (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
4436
3.57k
                    (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4437
4438
                /* pointer incremented by 1 row to avoid OOB access in 0th row */
4439
3.57k
                ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
4440
3.57k
                    (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
4441
3.57k
                    ps_ctxt->i4_top_row_luma_stride;
4442
4443
                /* +2 is to provision top left pel */
4444
3.57k
                ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4445
3.57k
                    (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
4446
3.57k
                    (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
4447
4448
                /* pointer incremented by 1 row to avoid OOB access in 0th row */
4449
3.57k
                ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
4450
3.57k
                    (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
4451
3.57k
                    ps_ctxt->i4_top_row_chroma_stride;
4452
3.57k
            }
4453
3.57k
        }
4454
4455
        /* +1 is to provision top left nbr */
4456
3.57k
        ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
4457
3.57k
        ps_ctxt->i4_frm_top_row_nbr_size =
4458
3.57k
            ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
4459
7.14k
        for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4460
3.57k
        {
4461
3.57k
            ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
4462
3.57k
                (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
4463
3.57k
                (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
4464
3.57k
            ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
4465
3.57k
        }
4466
4467
3.57k
        num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
4468
3.57k
        num_cu_in_ctb *= num_cu_in_ctb;
4469
4470
        /* pointer incremented by 1 row to avoid OOB access in 0th row */
4471
4472
        /* Memory for CU level Coeff data buffer */
4473
3.57k
        {
4474
3.57k
            WORD32 i4_16byte_boundary_overshoot;
4475
3.57k
            WORD32 buf_size_per_cu;
4476
3.57k
            WORD32 buf_size_per_thread_wo_alignment_req;
4477
3.57k
            WORD32 buf_size_per_thread;
4478
4479
3.57k
            buf_size_per_cu =
4480
3.57k
                ((MAX_LUMA_COEFFS_CTB +
4481
3.57k
                  (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
4482
3.57k
                 16) *
4483
3.57k
                sizeof(UWORD8);
4484
3.57k
            buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
4485
4486
3.57k
            {
4487
3.57k
                buf_size_per_thread = buf_size_per_cu * (2);
4488
4489
10.7k
                for(i = 0; i < 2; i++)
4490
7.14k
                {
4491
7.14k
                    ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
4492
7.14k
                        (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
4493
7.14k
                        (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
4494
4495
7.14k
                    i4_16byte_boundary_overshoot =
4496
7.14k
                        ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
4497
4498
7.14k
                    ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
4499
7.14k
                }
4500
3.57k
            }
4501
4502
3.57k
            ps_ctxt->pu1_cu_recur_coeffs =
4503
3.57k
                (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
4504
3.57k
                (ctr * buf_size_per_thread_wo_alignment_req);
4505
3.57k
        }
4506
4507
        /* Memory for CU dequant data buffer */
4508
3.57k
        {
4509
3.57k
            WORD32 buf_size_per_thread;
4510
3.57k
            WORD32 i4_16byte_boundary_overshoot;
4511
4512
3.57k
            WORD32 buf_size_per_cu =
4513
3.57k
                (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
4514
3.57k
                                                        : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
4515
3.57k
                 8) *
4516
3.57k
                sizeof(WORD16);
4517
4518
3.57k
            {
4519
3.57k
                buf_size_per_thread = buf_size_per_cu * 2;
4520
4521
10.7k
                for(i = 0; i < 2; i++)
4522
7.14k
                {
4523
7.14k
                    ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4524
7.14k
                        (WORD16
4525
7.14k
                             *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
4526
4527
7.14k
                    i4_16byte_boundary_overshoot =
4528
7.14k
                        ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
4529
4530
7.14k
                    ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
4531
7.14k
                        (WORD16
4532
7.14k
                             *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
4533
7.14k
                }
4534
3.57k
            }
4535
3.57k
        }
4536
4537
        /*------ Deblocking memory's pointers assignements starts ------*/
4538
4539
        /*Assign stride = 4x4 blocks in horizontal edge*/
4540
3.57k
        ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4541
4542
3.57k
        ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
4543
3.57k
            ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
4544
4545
        /*Assign frame level memory to store the Qp of
4546
        top 4x4 neighbours of each CTB row*/
4547
7.14k
        for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4548
3.57k
        {
4549
3.57k
            ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
4550
3.57k
                (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
4551
3.57k
                (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
4552
3.57k
                 i4_enc_frm_id);
4553
3.57k
        }
4554
4555
3.57k
        ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
4556
4557
3.57k
        ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
4558
3.57k
            (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
4559
4560
3.57k
        ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
4561
4562
        /*Assign stride = 4x4 blocks in horizontal edge*/
4563
3.57k
        ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
4564
4565
3.57k
        pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
4566
4567
        /*------Deblocking memory's pointers assignements ends ------*/
4568
4569
        /*------SAO memory's pointer assignment starts------------*/
4570
3.57k
        if(!is_hbd_mode)
4571
3.57k
        {
4572
            /* 2 is added to allocate top left pixel */
4573
3.57k
            ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
4574
3.57k
                u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
4575
3.57k
            ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
4576
3.57k
                u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
4577
3.57k
            ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
4578
3.57k
                num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
4579
4580
7.14k
            for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
4581
3.57k
            {
4582
3.57k
                ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
4583
3.57k
                    pu1_sao_base +
4584
3.57k
                    ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4585
3.57k
                      ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4586
3.57k
                     i4_num_bitrate_inst * i4_enc_frm_id) +  // move to the next frame_id
4587
3.57k
                    u4_ctb_aligned_wd +
4588
3.57k
                    2;
4589
4590
3.57k
                ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
4591
3.57k
                    pu1_sao_base +
4592
3.57k
                    ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
4593
3.57k
                      ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
4594
3.57k
                     i4_num_bitrate_inst * i4_enc_frm_id) +
4595
3.57k
                    +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
4596
3.57k
                    u4_ctb_aligned_wd + 4;
4597
4598
3.57k
                ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
4599
3.57k
                    ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
4600
3.57k
                    *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
4601
3.57k
                    (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
4602
3.57k
            }
4603
3.57k
            ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
4604
3.57k
                (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
4605
3.57k
            ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
4606
3.57k
        }
4607
4608
        /*------SAO memory's pointer assignment ends------------*/
4609
4610
        /* perform all one time initialisation here */
4611
3.57k
        ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
4612
4613
3.57k
        ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
4614
4615
3.57k
        ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
4616
4617
        /* move the pointer to 1,2 location */
4618
3.57k
        ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
4619
3.57k
        ps_ctxt->pu1_ctb_nbr_map++;
4620
4621
3.57k
        ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
4622
4623
3.57k
        CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
4624
4625
3.57k
        CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
4626
4627
3.57k
        CREATE_SUBBLOCK2CSBFID_MAP(
4628
3.57k
            gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
4629
4630
3.57k
        CREATE_SUBBLOCK2CSBFID_MAP(
4631
3.57k
            gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
4632
4633
        /* For both instance initialise the chroma dequant start idx */
4634
3.57k
        ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4635
3.57k
        ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
4636
4637
        /* initialise all the function pointer tables */
4638
3.57k
        {
4639
3.57k
            ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
4640
3.57k
                (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
4641
4642
3.57k
            ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
4643
4644
3.57k
#if ENABLE_RDO_BASED_TU_RECURSION
4645
3.57k
            if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4646
1.72k
            {
4647
1.72k
                ps_ctxt->pv_inter_rdopt_cu_ntu =
4648
1.72k
                    (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
4649
1.72k
            }
4650
3.57k
#endif
4651
3.57k
            ps_ctxt->pv_intra_chroma_pred_mode_selector =
4652
3.57k
                (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
4653
3.57k
            ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
4654
3.57k
            ps_ctxt->pv_final_rdopt_mode_prcs =
4655
3.57k
                (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
4656
3.57k
            ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
4657
3.57k
            ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
4658
3.57k
            ps_ctxt->pv_enc_loop_ctb_left_copy =
4659
3.57k
                (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
4660
4661
            /* Memory assignments for chroma intra pred buffer */
4662
3.57k
            {
4663
3.57k
                WORD32 pred_buf_size =
4664
3.57k
                    MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4665
3.57k
                WORD32 pred_buf_size_per_thread =
4666
3.57k
                    NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
4667
3.57k
                UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
4668
3.57k
                                   (ctr * pred_buf_size_per_thread);
4669
4670
10.7k
                for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
4671
7.14k
                {
4672
7.14k
                    ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
4673
7.14k
                    pu1_base += pred_buf_size;
4674
7.14k
                }
4675
3.57k
            }
4676
4677
            /* Memory assignments for reference substitution output */
4678
3.57k
            {
4679
3.57k
                WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4680
3.57k
                                       + INTRAPRED_SIMD_LEFT_PADDING);
4681
3.57k
                WORD32 pred_buf_size_per_thread = pred_buf_size;
4682
3.57k
                UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
4683
3.57k
                                   (ctr * pred_buf_size_per_thread);
4684
4685
3.57k
                ps_ctxt->pv_ref_sub_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4686
3.57k
            }
4687
4688
            /* Memory assignments for reference filtering output */
4689
3.57k
            {
4690
3.57k
                WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING
4691
3.57k
                                       + INTRAPRED_SIMD_LEFT_PADDING);
4692
3.57k
                WORD32 pred_buf_size_per_thread = pred_buf_size;
4693
3.57k
                UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
4694
3.57k
                                   (ctr * pred_buf_size_per_thread);
4695
4696
3.57k
                ps_ctxt->pv_ref_filt_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING;
4697
3.57k
            }
4698
4699
            /* Memory assignments for recon storage during CU Recursion */
4700
3.57k
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4701
3.57k
            if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4702
1.72k
#endif
4703
1.72k
            {
4704
1.72k
                {
4705
1.72k
                    WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4706
1.72k
                    WORD32 pred_buf_size_per_thread = pred_buf_size;
4707
1.72k
                    UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
4708
1.72k
                                       (ctr * pred_buf_size_per_thread);
4709
4710
1.72k
                    ps_ctxt->pv_cu_luma_recon = pu1_base;
4711
1.72k
                }
4712
4713
1.72k
                {
4714
1.72k
                    WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4715
1.72k
                                           ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4716
1.72k
                    WORD32 pred_buf_size_per_thread = pred_buf_size;
4717
1.72k
                    UWORD8 *pu1_base =
4718
1.72k
                        (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
4719
1.72k
                        (ctr * pred_buf_size_per_thread);
4720
4721
1.72k
                    ps_ctxt->pv_cu_chrma_recon = pu1_base;
4722
1.72k
                }
4723
1.72k
            }
4724
4725
            /* Memory assignments for pred storage during CU Recursion */
4726
3.57k
#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
4727
3.57k
            if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
4728
1.72k
#endif
4729
1.72k
            {
4730
1.72k
                {
4731
1.72k
                    WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
4732
1.72k
                    WORD32 pred_buf_size_per_thread = pred_buf_size;
4733
1.72k
                    UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
4734
1.72k
                                       (ctr * pred_buf_size_per_thread);
4735
4736
1.72k
                    ps_ctxt->pv_CTB_pred_luma = pu1_base;
4737
1.72k
                }
4738
4739
1.72k
                {
4740
1.72k
                    WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
4741
1.72k
                                           ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4742
1.72k
                    WORD32 pred_buf_size_per_thread = pred_buf_size;
4743
1.72k
                    UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
4744
1.72k
                                       (ctr * pred_buf_size_per_thread);
4745
4746
1.72k
                    ps_ctxt->pv_CTB_pred_chroma = pu1_base;
4747
1.72k
                }
4748
1.72k
            }
4749
4750
            /* Memory assignments for CTB left luma data storage */
4751
3.57k
            {
4752
3.57k
                WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
4753
3.57k
                WORD32 pred_buf_size_per_thread = pred_buf_size;
4754
3.57k
                UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
4755
3.57k
                                   (ctr * pred_buf_size_per_thread);
4756
4757
3.57k
                ps_ctxt->pv_left_luma_data = pu1_base;
4758
3.57k
            }
4759
4760
            /* Memory assignments for CTB left chroma data storage */
4761
3.57k
            {
4762
3.57k
                WORD32 pred_buf_size =
4763
3.57k
                    (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
4764
3.57k
                WORD32 pred_buf_size_per_thread = pred_buf_size;
4765
3.57k
                UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
4766
3.57k
                                   (ctr * pred_buf_size_per_thread);
4767
4768
3.57k
                ps_ctxt->pv_left_chrm_data = pu1_base;
4769
3.57k
            }
4770
3.57k
        }
4771
4772
        /* Memory for inter pred buffers */
4773
3.57k
        {
4774
3.57k
            WORD32 i4_num_bufs_per_thread;
4775
4776
3.57k
            WORD32 i4_buf_size_per_cand =
4777
3.57k
                (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
4778
3.57k
                ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
4779
4780
3.57k
            i4_num_bufs_per_thread =
4781
3.57k
                (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
4782
3.57k
                i4_buf_size_per_cand;
4783
4784
3.57k
            ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
4785
4786
3.57k
            ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
4787
4788
3.57k
            {
4789
3.57k
                UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
4790
3.57k
                                   +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
4791
4792
29.4k
                for(i = 0; i < i4_num_bufs_per_thread; i++)
4793
25.8k
                {
4794
25.8k
                    ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
4795
25.8k
                        pu1_base + i * i4_buf_size_per_cand;
4796
25.8k
                    ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
4797
25.8k
                }
4798
3.57k
            }
4799
3.57k
        }
4800
4801
        /* Memory required to store pred for 422 chroma */
4802
3.57k
        if(i4_chroma_format == IV_YUV_422SP_UV)
4803
0
        {
4804
0
            WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
4805
0
            WORD32 pred_buf_size_per_thread =
4806
0
                pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
4807
0
                sizeof(UWORD8);
4808
0
            void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
4809
0
                            (ctr * pred_buf_size_per_thread);
4810
4811
0
            ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
4812
0
        }
4813
3.57k
        else
4814
3.57k
        {
4815
3.57k
            ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
4816
3.57k
        }
4817
4818
        /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
4819
3.57k
        {
4820
3.57k
            WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
4821
3.57k
            WORD32 i4_chromaBufSize =
4822
3.57k
                MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
4823
3.57k
            WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
4824
3.57k
                                          (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
4825
3.57k
            WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
4826
3.57k
            {
4827
3.57k
                UWORD8 *pu1_mem_base =
4828
3.57k
                    (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
4829
3.57k
                     ctr * i4_memSize_perThread);
4830
4831
3.57k
                ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
4832
3.57k
                    pu1_mem_base + i4_lumaBufSize * 0;
4833
3.57k
                ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
4834
3.57k
                    pu1_mem_base + i4_lumaBufSize * 1;
4835
3.57k
                ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
4836
3.57k
                    pu1_mem_base + i4_lumaBufSize * 2;
4837
3.57k
                ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
4838
3.57k
                    pu1_mem_base + i4_lumaBufSize * 3;
4839
4840
3.57k
                pu1_mem_base += i4_lumaBufSize * 4;
4841
4842
3.57k
                switch(i4_quality_preset)
4843
3.57k
                {
4844
1.72k
                case IHEVCE_QUALITY_P0:
4845
1.72k
                {
4846
1.72k
#if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
4847
1.72k
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4848
1.72k
                        pu1_mem_base + i4_chromaBufSize * 0;
4849
1.72k
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4850
1.72k
                        pu1_mem_base + i4_chromaBufSize * 1;
4851
#else
4852
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4853
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4854
#endif
4855
4856
1.72k
#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
4857
1.72k
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4858
1.72k
                        pu1_mem_base + i4_chromaBufSize * 2;
4859
1.72k
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4860
1.72k
                        pu1_mem_base + i4_chromaBufSize * 3;
4861
1.72k
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4862
1.72k
                        pu1_mem_base + i4_chromaBufSize * 2;
4863
1.72k
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4864
1.72k
                        pu1_mem_base + i4_chromaBufSize * 3;
4865
#else
4866
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4867
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4868
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4869
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4870
#endif
4871
4872
1.72k
                    break;
4873
0
                }
4874
280
                case IHEVCE_QUALITY_P2:
4875
280
                {
4876
280
#if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
4877
280
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4878
280
                        pu1_mem_base + i4_chromaBufSize * 0;
4879
280
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4880
280
                        pu1_mem_base + i4_chromaBufSize * 1;
4881
#else
4882
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4883
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4884
#endif
4885
4886
280
#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
4887
280
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4888
280
                        pu1_mem_base + i4_chromaBufSize * 2;
4889
280
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4890
280
                        pu1_mem_base + i4_chromaBufSize * 3;
4891
280
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4892
280
                        pu1_mem_base + i4_chromaBufSize * 2;
4893
280
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4894
280
                        pu1_mem_base + i4_chromaBufSize * 3;
4895
#else
4896
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4897
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4898
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4899
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4900
#endif
4901
4902
280
                    break;
4903
0
                }
4904
396
                case IHEVCE_QUALITY_P3:
4905
396
                {
4906
396
#if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
4907
396
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4908
396
                        pu1_mem_base + i4_chromaBufSize * 0;
4909
396
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4910
396
                        pu1_mem_base + i4_chromaBufSize * 1;
4911
#else
4912
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4913
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4914
#endif
4915
4916
#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
4917
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4918
                        pu1_mem_base + i4_chromaBufSize * 2;
4919
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4920
                        pu1_mem_base + i4_chromaBufSize * 3;
4921
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4922
                        pu1_mem_base + i4_chromaBufSize * 2;
4923
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4924
                        pu1_mem_base + i4_chromaBufSize * 3;
4925
#else
4926
396
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4927
396
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4928
396
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4929
396
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4930
396
#endif
4931
4932
396
                    break;
4933
0
                }
4934
253
                case IHEVCE_QUALITY_P4:
4935
253
                {
4936
#if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
4937
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4938
                        pu1_mem_base + i4_chromaBufSize * 0;
4939
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4940
                        pu1_mem_base + i4_chromaBufSize * 1;
4941
#else
4942
253
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4943
253
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4944
253
#endif
4945
4946
#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
4947
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4948
                        pu1_mem_base + i4_chromaBufSize * 2;
4949
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4950
                        pu1_mem_base + i4_chromaBufSize * 3;
4951
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4952
                        pu1_mem_base + i4_chromaBufSize * 2;
4953
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4954
                        pu1_mem_base + i4_chromaBufSize * 3;
4955
#else
4956
253
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4957
253
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4958
253
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4959
253
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4960
253
#endif
4961
4962
253
                    break;
4963
0
                }
4964
299
                case IHEVCE_QUALITY_P5:
4965
299
                {
4966
#if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
4967
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
4968
                        pu1_mem_base + i4_chromaBufSize * 0;
4969
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
4970
                        pu1_mem_base + i4_chromaBufSize * 1;
4971
#else
4972
299
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4973
299
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
4974
299
#endif
4975
4976
#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
4977
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
4978
                        pu1_mem_base + i4_chromaBufSize * 2;
4979
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
4980
                        pu1_mem_base + i4_chromaBufSize * 3;
4981
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
4982
                        pu1_mem_base + i4_chromaBufSize * 2;
4983
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
4984
                        pu1_mem_base + i4_chromaBufSize * 3;
4985
#else
4986
299
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4987
299
                    ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4988
299
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
4989
299
                    ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
4990
299
#endif
4991
4992
299
                    break;
4993
0
                }
4994
3.57k
                }
4995
3.57k
            }
4996
4997
3.57k
            ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
4998
3.57k
            ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
4999
3.57k
            ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5000
3.57k
            ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
5001
5002
3.57k
        } /* Recon Datastore */
5003
5004
        /****************************************************/
5005
        /****************************************************/
5006
        /* ps_pps->i1_sign_data_hiding_flag  == UNHIDDEN    */
5007
        /* when NO_SBH. else HIDDEN                         */
5008
        /****************************************************/
5009
        /****************************************************/
5010
        /* Zero cbf tool is enabled by default for all presets */
5011
0
        ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
5012
5013
3.57k
        if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
5014
2.00k
        {
5015
2.00k
            ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
5016
2.00k
            ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
5017
2.00k
            ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
5018
2.00k
            ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
5019
2.00k
        }
5020
1.56k
        else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
5021
396
        {
5022
396
            ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5023
396
            ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5024
396
            ps_ctxt->i4_rdoq_level = NO_RDOQ;
5025
396
            ps_ctxt->i4_sbh_level = NO_SBH;
5026
396
        }
5027
1.17k
        else
5028
1.17k
        {
5029
1.17k
            ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5030
1.17k
            ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5031
1.17k
            ps_ctxt->i4_rdoq_level = NO_RDOQ;
5032
1.17k
            ps_ctxt->i4_sbh_level = NO_SBH;
5033
1.17k
        }
5034
5035
#if DISABLE_QUANT_ROUNDING
5036
        ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
5037
        ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
5038
#endif
5039
        /*Disabling RDOQ only when spatial modulation is enabled
5040
                as RDOQ degrades visual quality*/
5041
3.57k
        if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
5042
1.54k
        {
5043
1.54k
            ps_ctxt->i4_rdoq_level = NO_RDOQ;
5044
1.54k
        }
5045
5046
#if DISABLE_RDOQ
5047
        ps_ctxt->i4_rdoq_level = NO_RDOQ;
5048
#endif
5049
5050
#if DISABLE_SBH
5051
        ps_ctxt->i4_sbh_level = NO_SBH;
5052
#endif
5053
5054
        /*Rounding factor calc based on previous cabac states */
5055
5056
3.57k
        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
5057
3.57k
        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
5058
3.57k
        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
5059
3.57k
        ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
5060
5061
3.57k
        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
5062
3.57k
        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
5063
3.57k
        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
5064
3.57k
        ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
5065
5066
3.57k
        ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
5067
3.57k
        ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
5068
3.57k
        ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
5069
5070
3.57k
        ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
5071
3.57k
        ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
5072
3.57k
        ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
5073
5074
        /****************************************************************************************/
5075
        /* Setting the perform rdoq and sbh flags appropriately                                 */
5076
        /****************************************************************************************/
5077
3.57k
        {
5078
            /******************************************/
5079
            /* For best cand rdoq and/or sbh          */
5080
            /******************************************/
5081
3.57k
            ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5082
3.57k
                (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
5083
            /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
5084
            we would have to do RDOQ again.*/
5085
3.57k
            ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
5086
3.57k
                ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
5087
3.57k
                ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
5088
0
                 (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
5089
5090
3.57k
            ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5091
3.57k
                (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
5092
5093
            /* SBH should be performed if
5094
            a) i4_sbh_level is BEST_CAND_SBH.
5095
            b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
5096
            if SBH has to be done because for these presets the quant, iquant and scan coeff
5097
            data are calculated in this function and not during the RDOPT stage*/
5098
5099
            /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
5100
3.57k
            ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
5101
3.57k
                ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
5102
3.57k
                ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
5103
0
                 (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
5104
5105
            /******************************************/
5106
            /* For all cand rdoq and/or sbh          */
5107
            /******************************************/
5108
3.57k
            ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
5109
3.57k
                (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
5110
3.57k
            ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
5111
3.57k
                (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
5112
3.57k
            ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
5113
3.57k
                ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5114
3.57k
        }
5115
5116
3.57k
        if(!is_hbd_mode)
5117
3.57k
        {
5118
3.57k
            if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5119
1.27k
            {
5120
1.27k
                if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5121
989
                {
5122
989
                    ps_ctxt->apf_quant_iquant_ssd[0] =
5123
989
                        ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5124
989
                    ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
5125
989
                }
5126
286
                else
5127
286
                {
5128
286
                    ps_ctxt->apf_quant_iquant_ssd[0] =
5129
286
                        ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
5130
286
                    ps_ctxt->apf_quant_iquant_ssd[2] =
5131
286
                        ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
5132
286
                }
5133
5134
                /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5135
1.27k
                if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5136
805
                {
5137
805
                    ps_ctxt->apf_quant_iquant_ssd[1] =
5138
805
                        ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
5139
805
                    ps_ctxt->apf_quant_iquant_ssd[3] =
5140
805
                        ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
5141
805
                }
5142
470
                else
5143
470
                {
5144
470
                    ps_ctxt->apf_quant_iquant_ssd[1] =
5145
470
                        ps_func_selector->ihevc_quant_iquant_ssd_fptr;
5146
470
                    ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
5147
470
                }
5148
1.27k
            }
5149
2.29k
            else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5150
2.29k
            {
5151
2.29k
                if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
5152
1.79k
                {
5153
1.79k
                    ps_ctxt->apf_quant_iquant_ssd[0] =
5154
1.79k
                        ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5155
1.79k
                    ps_ctxt->apf_quant_iquant_ssd[2] =
5156
1.79k
                        ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5157
1.79k
                }
5158
501
                else
5159
501
                {
5160
501
                    ps_ctxt->apf_quant_iquant_ssd[0] =
5161
501
                        ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
5162
501
                    ps_ctxt->apf_quant_iquant_ssd[2] =
5163
501
                        ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
5164
501
                }
5165
5166
                /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
5167
2.29k
                if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
5168
1.20k
                {
5169
1.20k
                    ps_ctxt->apf_quant_iquant_ssd[1] =
5170
1.20k
                        ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
5171
1.20k
                    ps_ctxt->apf_quant_iquant_ssd[3] =
5172
1.20k
                        ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
5173
1.20k
                }
5174
1.09k
                else
5175
1.09k
                {
5176
1.09k
                    ps_ctxt->apf_quant_iquant_ssd[1] =
5177
1.09k
                        ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
5178
1.09k
                    ps_ctxt->apf_quant_iquant_ssd[3] =
5179
1.09k
                        ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
5180
1.09k
                }
5181
2.29k
            }
5182
5183
3.57k
            ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
5184
3.57k
                ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
5185
3.57k
            ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
5186
3.57k
                ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
5187
3.57k
            ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
5188
3.57k
                ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
5189
3.57k
            ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
5190
3.57k
                ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
5191
5192
3.57k
            ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
5193
3.57k
                ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
5194
3.57k
            ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
5195
3.57k
                ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
5196
3.57k
            ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
5197
3.57k
                ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
5198
3.57k
            ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
5199
3.57k
                ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
5200
5201
3.57k
            ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
5202
3.57k
            ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
5203
3.57k
            ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
5204
3.57k
            ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
5205
3.57k
            ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
5206
5207
3.57k
            ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
5208
3.57k
            ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
5209
3.57k
            ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
5210
5211
3.57k
            ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
5212
3.57k
            ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5213
3.57k
            ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5214
3.57k
            ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5215
3.57k
            ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
5216
5217
3.57k
            ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
5218
3.57k
            ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
5219
3.57k
            ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
5220
5221
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
5222
3.57k
                ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
5223
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
5224
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
5225
3.57k
                ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
5226
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
5227
3.57k
                ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
5228
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
5229
3.57k
                ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
5230
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
5231
3.57k
                ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
5232
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
5233
3.57k
                ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
5234
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
5235
3.57k
                ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
5236
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
5237
3.57k
            ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
5238
3.57k
                ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
5239
5240
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
5241
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
5242
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
5243
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
5244
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
5245
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
5246
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
5247
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
5248
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
5249
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
5250
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
5251
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
5252
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
5253
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
5254
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
5255
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
5256
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
5257
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
5258
3.57k
            ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
5259
3.57k
                ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
5260
5261
3.57k
            ps_ctxt->apf_chrm_resd_trns_had[0] =
5262
3.57k
                (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
5263
3.57k
            ps_ctxt->apf_chrm_resd_trns_had[1] =
5264
3.57k
                (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
5265
3.57k
            ps_ctxt->apf_chrm_resd_trns_had[2] =
5266
3.57k
                (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
5267
3.57k
        }
5268
5269
3.57k
        if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
5270
2.29k
        {
5271
            /* initialise the scale & rescale matricies */
5272
2.29k
            ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5273
2.29k
            ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5274
2.29k
            ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5275
2.29k
            ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5276
2.29k
            ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5277
            /*init for inter matrix*/
5278
2.29k
            ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5279
2.29k
            ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5280
2.29k
            ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
5281
2.29k
            ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
5282
2.29k
            ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
5283
5284
            /*init for rescale matrix*/
5285
2.29k
            ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5286
2.29k
            ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5287
2.29k
            ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5288
2.29k
            ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5289
2.29k
            ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5290
            /*init for rescale inter matrix*/
5291
2.29k
            ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5292
2.29k
            ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5293
2.29k
            ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
5294
2.29k
            ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
5295
2.29k
            ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
5296
2.29k
        }
5297
1.27k
        else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
5298
1.27k
        {
5299
            /* initialise the scale & rescale matricies */
5300
1.27k
            ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5301
1.27k
            ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5302
1.27k
            ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
5303
1.27k
            ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
5304
1.27k
            ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
5305
            /*init for inter matrix*/
5306
1.27k
            ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5307
1.27k
            ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
5308
1.27k
            ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
5309
1.27k
            ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
5310
1.27k
            ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
5311
5312
            /*init for rescale matrix*/
5313
1.27k
            ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5314
1.27k
            ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5315
1.27k
            ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
5316
1.27k
            ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
5317
1.27k
            ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
5318
            /*init for rescale inter matrix*/
5319
1.27k
            ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5320
1.27k
            ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
5321
1.27k
            ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
5322
1.27k
            ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
5323
1.27k
            ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
5324
1.27k
        }
5325
0
        else
5326
0
        {
5327
0
            ASSERT(0);
5328
0
        }
5329
5330
        /* Not recomputing Luma pred-data and header data for any preset now */
5331
3.57k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
5332
3.57k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
5333
3.57k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
5334
5335
3.57k
        switch(ps_ctxt->i4_quality_preset)
5336
3.57k
        {
5337
1.72k
        case IHEVCE_QUALITY_P0:
5338
1.72k
        {
5339
1.72k
            ps_ctxt->i4_max_merge_candidates = 5;
5340
1.72k
            ps_ctxt->i4_use_satd_for_merge_eval = 1;
5341
1.72k
            ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5342
1.72k
            ps_ctxt->u1_use_early_cbf_data = 0;
5343
1.72k
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
5344
1.72k
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5345
1.72k
                ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
5346
5347
1.72k
            break;
5348
0
        }
5349
280
        case IHEVCE_QUALITY_P2:
5350
280
        {
5351
280
            ps_ctxt->i4_max_merge_candidates = 5;
5352
280
            ps_ctxt->i4_use_satd_for_merge_eval = 1;
5353
280
            ps_ctxt->u1_use_top_at_ctb_boundary = 1;
5354
280
            ps_ctxt->u1_use_early_cbf_data = 0;
5355
5356
280
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
5357
280
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5358
280
                ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
5359
5360
280
            break;
5361
0
        }
5362
396
        case IHEVCE_QUALITY_P3:
5363
396
        {
5364
396
            ps_ctxt->i4_max_merge_candidates = 3;
5365
396
            ps_ctxt->i4_use_satd_for_merge_eval = 1;
5366
396
            ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5367
5368
396
            ps_ctxt->u1_use_early_cbf_data = 0;
5369
396
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
5370
396
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5371
396
                ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
5372
5373
396
            break;
5374
0
        }
5375
253
        case IHEVCE_QUALITY_P4:
5376
253
        {
5377
253
            ps_ctxt->i4_max_merge_candidates = 2;
5378
253
            ps_ctxt->i4_use_satd_for_merge_eval = 1;
5379
253
            ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5380
253
            ps_ctxt->u1_use_early_cbf_data = 0;
5381
253
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
5382
253
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5383
253
                ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
5384
5385
253
            break;
5386
0
        }
5387
299
        case IHEVCE_QUALITY_P5:
5388
299
        {
5389
299
            ps_ctxt->i4_max_merge_candidates = 2;
5390
299
            ps_ctxt->i4_use_satd_for_merge_eval = 0;
5391
299
            ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5392
299
            ps_ctxt->u1_use_early_cbf_data = 0;
5393
299
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
5394
299
            ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
5395
299
                ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
5396
5397
299
            break;
5398
0
        }
5399
620
        case IHEVCE_QUALITY_P6:
5400
620
        {
5401
620
            ps_ctxt->i4_max_merge_candidates = 2;
5402
620
            ps_ctxt->i4_use_satd_for_merge_eval = 0;
5403
620
            ps_ctxt->u1_use_top_at_ctb_boundary = 0;
5404
620
            ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
5405
620
            break;
5406
0
        }
5407
0
        default:
5408
0
        {
5409
0
            ASSERT(0);
5410
0
        }
5411
3.57k
        }
5412
5413
#if DISABLE_SKIP_AND_MERGE_EVAL
5414
        ps_ctxt->i4_max_merge_candidates = 0;
5415
#endif
5416
5417
3.57k
        ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
5418
3.57k
            !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
5419
5420
        /*initialize memory for RC related parameters required/populated by enc_loop */
5421
        /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
5422
        |-------|-> Thread 0, instance 0
5423
        |       |
5424
        |       |
5425
        |       |
5426
        |-------|-> thread 0, instance 1
5427
        |       |
5428
        |       |
5429
        |       |
5430
        |-------|-> thread 0, intance 2
5431
        |       |
5432
        |       |
5433
        |       |
5434
        |-------|-> thread 1, instance 0
5435
        |       |
5436
        |       |
5437
        |       |
5438
        |-------|-> thread 1, instance 1
5439
        |       |
5440
        |       |
5441
        |       |
5442
        |-------|-> thread 1, instance 2
5443
        ...         ...
5444
5445
        Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
5446
        Finally, one thread will become master and collate the data from all the threads */
5447
7.14k
        for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
5448
3.57k
        {
5449
7.14k
            for(i = 0; i < i4_num_bitrate_inst; i++)
5450
3.57k
            {
5451
3.57k
                ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
5452
3.57k
                ps_enc_loop_rc_params++;
5453
3.57k
            }
5454
3.57k
        }
5455
        /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
5456
5457
#if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
5458
        ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
5459
#endif
5460
5461
3.57k
        ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
5462
3.57k
            MAX_TU_SIZE;
5463
3.57k
        ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
5464
3.57k
            MAX_TU_SIZE;
5465
        /*Multiplying by two to account for interleaving of cb and cr*/
5466
3.57k
        ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
5467
3.57k
                                                                                       << 1;
5468
3.57k
        ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
5469
3.57k
            MAX_TU_SIZE << 1;
5470
5471
        /*     Memory for a frame level memory to store tile-id                  */
5472
        /*              corresponding to each CTB of frame                       */
5473
3.57k
        ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
5474
5475
3.57k
        ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
5476
        /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
5477
        /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
5478
3.57k
        if(ps_init_prms->s_coding_tools_prms.i4_vqet &
5479
3.57k
           (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
5480
0
        {
5481
0
            UWORD32 psy_strength;
5482
0
            UWORD32 psy_strength_mask =
5483
0
                224;  // only bits 5,6,7 are ones. These three bits represent the psy strength
5484
0
            psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
5485
0
            ps_ctxt->u1_enable_psyRDOPT = 1;
5486
0
            ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
5487
0
            if(psy_strength == 0)
5488
0
            {
5489
0
                ps_ctxt->u1_enable_psyRDOPT = 0;
5490
0
                ps_ctxt->u4_psy_strength = 0;
5491
0
            }
5492
0
        }
5493
5494
3.57k
        ps_ctxt->u1_is_stasino_enabled =
5495
3.57k
            ((ps_init_prms->s_coding_tools_prms.i4_vqet &
5496
3.57k
              (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
5497
0
             (ps_init_prms->s_coding_tools_prms.i4_vqet &
5498
0
              (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
5499
5500
3.57k
        ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
5501
3.57k
        ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
5502
3.57k
        ps_ctxt++;
5503
3.57k
    }
5504
    /* Store Tile params base into EncLoop Master context */
5505
3.57k
    ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
5506
5507
3.57k
    if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
5508
0
    {
5509
0
        i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
5510
0
    }
5511
5512
    /* Updating  ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
5513
    /* Loop over all tile-cols in frame */
5514
7.14k
    for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
5515
3.57k
    {
5516
3.57k
        WORD32 i4_tile_col_wd_in_ctb_unit =
5517
3.57k
            (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
5518
3.57k
        WORD32 offset_x;
5519
5520
3.57k
        if(ctr == (i4_num_tile_cols - 1))
5521
3.57k
        { /* Last tile-row of frame */
5522
3.57k
            WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
5523
5524
3.57k
            WORD32 cu_aligned_pic_wd =
5525
3.57k
                ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
5526
3.57k
                SET_CTB_ALIGN(
5527
3.57k
                    ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
5528
3.57k
                    min_cu_size);
5529
5530
3.57k
            WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
5531
5532
3.57k
            offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
5533
3.57k
            offset_x += last_hz_ctb_wd;
5534
3.57k
        }
5535
0
        else
5536
0
        { /* Not the last tile-row of frame */
5537
0
            offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
5538
0
        }
5539
5540
3.57k
        offset_x /= 4;
5541
3.57k
        offset_x -= 1;
5542
5543
3.57k
        ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
5544
3.57k
    }
5545
5546
3.57k
    n_tabs = NUM_ENC_LOOP_MEM_RECS;
5547
5548
    /*store num bit-rate instances in the master context */
5549
3.57k
    ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
5550
3.57k
    ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
5551
    /*************************************************************************/
5552
    /* --- EncLoop Deblock and SAO sync Dep Mngr Mem init --                         */
5553
    /*************************************************************************/
5554
3.57k
    {
5555
3.57k
        WORD32 count;
5556
3.57k
        WORD32 num_vert_units, num_blks_in_row;
5557
3.57k
        WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5558
3.57k
        WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5559
5560
3.57k
        ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5561
3.57k
        ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
5562
3.57k
        ASSERT(num_vert_units > 0);
5563
3.57k
        ASSERT(num_blks_in_row > 0);
5564
5565
7.14k
        for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5566
3.57k
        {
5567
7.14k
            for(i = 0; i < i4_num_bitrate_inst; i++)
5568
3.57k
            {
5569
3.57k
                ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
5570
3.57k
                    &ps_mem_tab[n_tabs],
5571
3.57k
                    pv_osal_handle,
5572
3.57k
                    DEP_MNGR_ROW_ROW_SYNC,
5573
3.57k
                    num_vert_units,
5574
3.57k
                    num_blks_in_row,
5575
3.57k
                    i4_num_tile_cols, /* Number of Col Tiles */
5576
3.57k
                    i4_num_proc_thrds,
5577
3.57k
                    0 /*Sem Disabled*/
5578
3.57k
                );
5579
5580
3.57k
                n_tabs += ihevce_dmgr_get_num_mem_recs();
5581
3.57k
            }
5582
3.57k
        }
5583
5584
7.14k
        for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5585
3.57k
        {
5586
7.14k
            for(i = 0; i < i4_num_bitrate_inst; i++)
5587
3.57k
            {
5588
3.57k
                ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[count][i] = ihevce_dmgr_init(
5589
3.57k
                    &ps_mem_tab[n_tabs],
5590
3.57k
                    pv_osal_handle,
5591
3.57k
                    DEP_MNGR_ROW_ROW_SYNC,
5592
3.57k
                    num_vert_units,
5593
3.57k
                    num_blks_in_row,
5594
3.57k
                    i4_num_tile_cols, /* Number of Col Tiles */
5595
3.57k
                    i4_num_proc_thrds,
5596
3.57k
                    0 /*Sem Disabled*/
5597
3.57k
                );
5598
5599
3.57k
                n_tabs += ihevce_dmgr_get_num_mem_recs();
5600
3.57k
            }
5601
3.57k
        }
5602
3.57k
    }
5603
    /*************************************************************************/
5604
    /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init --                   */
5605
    /*************************************************************************/
5606
3.57k
    {
5607
3.57k
        WORD32 count;
5608
3.57k
        WORD32 num_vert_units, num_blks_in_row;
5609
3.57k
        WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
5610
3.57k
        WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
5611
5612
3.57k
        WORD32 i4_sem = 0;
5613
5614
3.57k
        if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
5615
3.57k
           IHEVCE_QUALITY_P4)
5616
1.17k
            i4_sem = 0;
5617
2.40k
        else
5618
2.40k
            i4_sem = 1;
5619
3.57k
        ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
5620
        /* For Top-Right CU sync, adding one more CTB since value updation */
5621
        /* happens in that way for the last CTB in the row                 */
5622
3.57k
        num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
5623
3.57k
        num_blks_in_row += MAX_CTB_SIZE;
5624
5625
3.57k
        ASSERT(num_vert_units > 0);
5626
3.57k
        ASSERT(num_blks_in_row > 0);
5627
5628
7.14k
        for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
5629
3.57k
        {
5630
7.14k
            for(i = 0; i < i4_num_bitrate_inst; i++)
5631
3.57k
            {
5632
                /* For ES/HS, CU level updates uses spin-locks than semaphore */
5633
3.57k
                {
5634
3.57k
                    ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
5635
3.57k
                        ihevce_dmgr_init(
5636
3.57k
                            &ps_mem_tab[n_tabs],
5637
3.57k
                            pv_osal_handle,
5638
3.57k
                            DEP_MNGR_ROW_ROW_SYNC,
5639
3.57k
                            num_vert_units,
5640
3.57k
                            num_blks_in_row,
5641
3.57k
                            i4_num_tile_cols, /* Number of Col Tiles */
5642
3.57k
                            i4_num_proc_thrds,
5643
3.57k
                            i4_sem /*Sem Disabled*/
5644
3.57k
                        );
5645
3.57k
                }
5646
3.57k
                n_tabs += ihevce_dmgr_get_num_mem_recs();
5647
3.57k
            }
5648
3.57k
        }
5649
3.57k
    }
5650
5651
17.8k
    for(i = 1; i < 5; i++)
5652
14.2k
    {
5653
14.2k
        WORD32 i4_log2_trans_size = i + 1;
5654
14.2k
        WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
5655
5656
14.2k
        ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
5657
14.2k
    }
5658
5659
3.57k
    ga_trans_shift[0] = ga_trans_shift[1];
5660
5661
    /* return the handle to caller */
5662
3.57k
    return ((void *)ps_master_ctxt);
5663
3.57k
}
5664
5665
/*!
5666
******************************************************************************
5667
* \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
5668
*
5669
* \brief
5670
*    Intialization for ENC_LOOP context state structure .
5671
*
5672
* \param[in] ps_mem_tab : pointer to memory descriptors table
5673
* \param[in] ppv_sem_hdls : Array of semaphore handles
5674
* \param[in] i4_num_proc_thrds : Number of processing threads
5675
*
5676
* \return
5677
*    None
5678
*
5679
* \author
5680
*  Ittiam
5681
*
5682
*****************************************************************************
5683
*/
5684
void ihevce_enc_loop_reg_sem_hdls(
5685
    void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
5686
3.57k
{
5687
3.57k
    ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5688
3.57k
    WORD32 i, enc_frm_id;
5689
5690
3.57k
    ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5691
5692
    /*************************************************************************/
5693
    /* --- EncLoop Deblock and SAO sync Dep Mngr reg Semaphores --                   */
5694
    /*************************************************************************/
5695
7.14k
    for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5696
3.57k
    {
5697
7.14k
        for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5698
3.57k
        {
5699
3.57k
            ihevce_dmgr_reg_sem_hdls(
5700
3.57k
                ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
5701
3.57k
                ppv_sem_hdls,
5702
3.57k
                i4_num_proc_thrds);
5703
3.57k
        }
5704
3.57k
    }
5705
5706
7.14k
    for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5707
3.57k
    {
5708
7.14k
        for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5709
3.57k
        {
5710
3.57k
            ihevce_dmgr_reg_sem_hdls(
5711
3.57k
                ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][i],
5712
3.57k
                ppv_sem_hdls,
5713
3.57k
                i4_num_proc_thrds);
5714
3.57k
        }
5715
3.57k
    }
5716
5717
    /*************************************************************************/
5718
    /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores --             */
5719
    /*************************************************************************/
5720
7.14k
    for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5721
3.57k
    {
5722
7.14k
        for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
5723
3.57k
        {
5724
3.57k
            ihevce_dmgr_reg_sem_hdls(
5725
3.57k
                ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
5726
3.57k
                ppv_sem_hdls,
5727
3.57k
                i4_num_proc_thrds);
5728
3.57k
        }
5729
3.57k
    }
5730
5731
3.57k
    return;
5732
3.57k
}
5733
5734
/*!
5735
******************************************************************************
5736
* \if Function name : ihevce_enc_loop_delete \endif
5737
*
5738
* \brief
5739
*    Destroy EncLoop module
5740
* Note : Only Destroys the resources allocated in the module like
5741
*   semaphore,etc. Memory free is done Separately using memtabs
5742
*
5743
* \param[in] pv_me_ctxt : pointer to EncLoop ctxt
5744
*
5745
* \return
5746
*    None
5747
*
5748
* \author
5749
*  Ittiam
5750
*
5751
*****************************************************************************
5752
*/
5753
void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
5754
3.57k
{
5755
3.57k
    ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
5756
3.57k
    WORD32 ctr, enc_frm_id;
5757
5758
3.57k
    ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5759
5760
7.14k
    for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
5761
3.57k
    {
5762
7.14k
        for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
5763
3.57k
        {
5764
            /* --- EncLoop Deblock sync Dep Mngr Delete --*/
5765
3.57k
            ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
5766
            /* --- EncLoop Sao sync Dep Mngr Delete --*/
5767
3.57k
            ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][ctr]);
5768
            /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
5769
3.57k
            ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
5770
3.57k
        }
5771
3.57k
    }
5772
3.57k
}
5773
5774
/*!
5775
******************************************************************************
5776
* \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
5777
*
5778
* \brief
5779
*    Frame level Reset for the Dependency Mngrs local to EncLoop.,
5780
*    ie CU_TopRight and Dblk
5781
*
5782
* \param[in] pv_enc_loop_ctxt       : Enc_loop context pointer
5783
*
5784
* \return
5785
*    None
5786
*
5787
* \author
5788
*  Ittiam
5789
*
5790
*****************************************************************************
5791
*/
5792
void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
5793
94.8k
{
5794
94.8k
    WORD32 ctr, frame_id;
5795
94.8k
    ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5796
5797
94.8k
    ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5798
5799
94.8k
    if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
5800
94.8k
    {
5801
94.8k
        frame_id = 0;
5802
94.8k
    }
5803
0
    else
5804
0
    {
5805
0
        frame_id = enc_frm_id;
5806
0
    }
5807
5808
189k
    for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
5809
94.8k
    {
5810
        /* Dep. Mngr : Reset the num ctb Deblocked in every row  for ENC sync */
5811
94.8k
        ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
5812
5813
        /* Dep. Mngr : Reset the num SAO ctb in every row  for ENC sync */
5814
94.8k
        ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[frame_id][ctr]);
5815
5816
        /* Dep. Mngr : Reset the TopRight CU Processed in every row  for ENC sync */
5817
94.8k
        ihevce_dmgr_rst_row_row_sync(
5818
94.8k
            ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
5819
94.8k
    }
5820
94.8k
}
5821
5822
/*!
5823
******************************************************************************
5824
* \if Function name : ihevce_enc_loop_frame_init \endif
5825
*
5826
* \brief
5827
*    Frame level init of enocde loop function .
5828
*
5829
* \param[in] pv_enc_loop_ctxt           : Enc_loop context pointer
5830
* \param[in] pi4_cu_processed           : ptr to cur frame cu process in pix.
5831
* \param[in] aps_ref_list               : ref pic list for the current frame
5832
* \param[in] ps_slice_hdr               : ptr to current slice header params
5833
* \param[in] ps_pps                     : ptr to active pps params
5834
* \param[in] ps_sps                     : ptr to active sps params
5835
* \param[in] ps_vps                     : ptr to active vps params
5836
5837
5838
* \param[in] i1_weighted_pred_flag      : weighted pred enable flag (unidir)
5839
* \param[in] i1_weighted_bipred_flag    : weighted pred enable flag (bidir)
5840
* \param[in] log2_luma_wght_denom       : down shift factor for weighted pred of luma
5841
* \param[in] log2_chroma_wght_denom       : down shift factor for weighted pred of chroma
5842
* \param[in] cur_poc                    : currennt frame poc
5843
* \param[in] i4_bitrate_instance_num    : number indicating the instance of bit-rate for multi-rate encoder
5844
*
5845
* \return
5846
*    None
5847
*
5848
* \author
5849
*  Ittiam
5850
*
5851
*****************************************************************************
5852
*/
5853
void ihevce_enc_loop_frame_init(
5854
    void *pv_enc_loop_ctxt,
5855
    WORD32 i4_frm_qp,
5856
    recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
5857
    recon_pic_buf_t *ps_frm_recon,
5858
    slice_header_t *ps_slice_hdr,
5859
    pps_t *ps_pps,
5860
    sps_t *ps_sps,
5861
    vps_t *ps_vps,
5862
    WORD8 i1_weighted_pred_flag,
5863
    WORD8 i1_weighted_bipred_flag,
5864
    WORD32 log2_luma_wght_denom,
5865
    WORD32 log2_chroma_wght_denom,
5866
    WORD32 cur_poc,
5867
    WORD32 i4_display_num,
5868
    enc_ctxt_t *ps_enc_ctxt,
5869
    me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
5870
    WORD32 i4_bitrate_instance_num,
5871
    WORD32 i4_thrd_id,
5872
    WORD32 i4_enc_frm_id,
5873
    WORD32 i4_num_bitrates,
5874
    WORD32 i4_quality_preset,
5875
    void *pv_dep_mngr_encloop_dep_me)
5876
94.8k
{
5877
    /* local variables */
5878
94.8k
    ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
5879
94.8k
    ihevce_enc_loop_ctxt_t *ps_ctxt;
5880
94.8k
    WORD32 chroma_qp_offset, i4_div_factor;
5881
94.8k
    WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
5882
94.8k
    WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
5883
5884
    /* ENC_LOOP master state structure */
5885
94.8k
    ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
5886
5887
    /* Nithya: Store the current POC in the slice header */
5888
94.8k
    ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
5889
5890
    /* Update the POC list of the current frame to the recon buffer */
5891
94.8k
    if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
5892
94.8k
    {
5893
94.8k
        int i4_i;
5894
251k
        for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
5895
157k
        {
5896
157k
            ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
5897
157k
        }
5898
94.8k
    }
5899
94.8k
    if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
5900
11.3k
    {
5901
11.3k
        int i4_i;
5902
24.7k
        for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
5903
13.3k
        {
5904
13.3k
            ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
5905
13.3k
        }
5906
11.3k
    }
5907
5908
    /* loop over all the threads */
5909
    // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
5910
94.8k
    {
5911
        /* ENC_LOOP state structure */
5912
94.8k
        ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
5913
5914
        /* SAO ctxt structure initialization*/
5915
94.8k
        ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
5916
94.8k
        ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
5917
94.8k
        ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
5918
5919
        /*bit-rate instance number for Multi-bitrate (MBR) encode */
5920
94.8k
        ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
5921
94.8k
        ps_ctxt->i4_num_bitrates = i4_num_bitrates;
5922
94.8k
        ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
5923
94.8k
        ps_ctxt->i4_is_first_query = 1;
5924
94.8k
        ps_ctxt->i4_is_ctb_qp_modified = 0;
5925
5926
        /* enc_frm_id for multiframe encode */
5927
5928
94.8k
        if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
5929
94.8k
        {
5930
94.8k
            ps_ctxt->i4_enc_frm_id = 0;
5931
94.8k
            i4_enc_frm_id = 0;
5932
94.8k
        }
5933
0
        else
5934
0
        {
5935
0
            ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
5936
0
        }
5937
5938
        /*Initialize the sub pic rc buf appropriately */
5939
5940
        /*Set the thrd id flag */
5941
94.8k
        ps_enc_ctxt->s_multi_thrd
5942
94.8k
            .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
5943
5944
94.8k
        ps_enc_ctxt->s_multi_thrd
5945
94.8k
            .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5946
94.8k
        ps_enc_ctxt->s_multi_thrd
5947
94.8k
            .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5948
5949
94.8k
        ps_enc_ctxt->s_multi_thrd
5950
94.8k
            .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5951
94.8k
        ps_enc_ctxt->s_multi_thrd
5952
94.8k
            .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5953
5954
94.8k
        ps_enc_ctxt->s_multi_thrd
5955
94.8k
            .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5956
94.8k
        ps_enc_ctxt->s_multi_thrd
5957
94.8k
            .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5958
94.8k
        ps_enc_ctxt->s_multi_thrd
5959
94.8k
            .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5960
94.8k
        ps_enc_ctxt->s_multi_thrd
5961
94.8k
            .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5962
94.8k
        ps_enc_ctxt->s_multi_thrd
5963
94.8k
            .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
5964
94.8k
        ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
5965
94.8k
            i4_frm_qp;
5966
5967
        /*Frame level data for Sub Pic rc is initalized here */
5968
        /*Can be sent once per frame*/
5969
94.8k
        {
5970
94.8k
            WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
5971
94.8k
                                      ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
5972
5973
            /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
5974
94.8k
            ps_ctxt->u4_total_cu_bits = 0;
5975
94.8k
            ps_ctxt->u4_total_cu_hdr_bits = 0;
5976
5977
94.8k
            ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
5978
94.8k
            ps_ctxt->u4_cu_tot_bits = 0;
5979
94.8k
            ps_ctxt->u4_total_cu_bits_mul_qs = 0;
5980
94.8k
            ps_ctxt->i4_display_num = i4_display_num;
5981
94.8k
            ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
5982
            /*The Qscale is to be generated every 10th of total frame ctb is completed */
5983
            //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
5984
94.8k
            ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
5985
5986
94.8k
            ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
5987
            /*Sub Pic RC frame level params */
5988
94.8k
            ps_ctxt->i8_frame_l1_ipe_sad =
5989
94.8k
                ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
5990
94.8k
            ps_ctxt->i8_frame_l0_ipe_satd =
5991
94.8k
                ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
5992
94.8k
            ps_ctxt->i8_frame_l1_me_sad =
5993
94.8k
                ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
5994
94.8k
            ps_ctxt->i8_frame_l1_activity_fact =
5995
94.8k
                ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
5996
94.8k
            if(ps_ctxt->i4_sub_pic_level_rc)
5997
0
            {
5998
0
                ASSERT(
5999
0
                    ps_curr_inp_prms->ps_curr_inp->s_lap_out
6000
0
                        .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
6001
6002
0
                ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
6003
0
                                                 [ps_ctxt->i4_bitrate_instance_num] =
6004
0
                    ps_curr_inp_prms->ps_curr_inp->s_lap_out
6005
0
                        .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
6006
0
            }
6007
            //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
6008
6009
94.8k
            ps_ctxt->i4_is_I_scenecut =
6010
94.8k
                ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6011
0
                 (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
6012
0
                  ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
6013
6014
94.8k
            ps_ctxt->i4_is_non_I_scenecut =
6015
94.8k
                ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
6016
0
                 (ps_ctxt->i4_is_I_scenecut == 0));
6017
6018
            /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
6019
            ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
6020
94.8k
            ps_ctxt->i4_is_model_valid =
6021
94.8k
                ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
6022
94.8k
        }
6023
        /* cb and cr offsets are assumed to be same */
6024
94.8k
        chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
6025
6026
        /* assumption of cb = cr qp */
6027
94.8k
        ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
6028
94.8k
        ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
6029
6030
94.8k
        ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
6031
6032
94.8k
        ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
6033
6034
94.8k
        ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
6035
94.8k
        ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
6036
6037
        /*remember chroma qp offset as qp related parameters are calculated at CU level*/
6038
94.8k
        ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
6039
94.8k
        ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
6040
94.8k
        ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
6041
6042
94.8k
        ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
6043
94.8k
        ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
6044
94.8k
        ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
6045
94.8k
        ps_ctxt->i4_use_const_lamda_modifier =
6046
94.8k
            ps_ctxt->i4_use_const_lamda_modifier ||
6047
94.8k
            ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6048
94.8k
              (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
6049
0
             ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6050
0
               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
6051
0
              (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6052
0
               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
6053
0
              (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6054
0
               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
6055
0
              (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
6056
0
               (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
6057
6058
94.8k
        {
6059
94.8k
            ps_ctxt->f_i_pic_lamda_modifier =
6060
94.8k
                ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
6061
94.8k
        }
6062
6063
94.8k
        ps_ctxt->i4_frame_qp = i4_frm_qp;
6064
94.8k
        ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
6065
94.8k
        ps_ctxt->i4_cu_qp = i4_frm_qp;
6066
94.8k
        ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
6067
94.8k
        ps_ctxt->i4_chrm_cu_qp =
6068
94.8k
            (ps_ctxt->u1_chroma_array_type == 2)
6069
94.8k
                ? MIN(i4_frm_qp + chroma_qp_offset, 51)
6070
94.8k
                : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
6071
6072
94.8k
        ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6073
94.8k
        i4_div_factor = (i4_frm_qp + 3) / 6;
6074
94.8k
        i4_div_factor = CLIP3(i4_div_factor, 3, 6);
6075
94.8k
        ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6076
6077
94.8k
        ps_ctxt->i4_chrm_cu_qp_div6 =
6078
94.8k
            (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
6079
94.8k
        ps_ctxt->i4_chrm_cu_qp_mod6 =
6080
94.8k
            (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
6081
6082
94.8k
#define INTER_RND_QP_BY_6
6083
94.8k
#ifdef INTER_RND_QP_BY_6
6084
6085
94.8k
        { /*1/6 rounding for 8 bit b frames*/
6086
94.8k
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
6087
94.8k
                /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6088
94.8k
        }
6089
#else
6090
        /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
6091
        ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
6092
#endif
6093
6094
94.8k
        if(ISLICE == i1_slice_type)
6095
31.0k
        {
6096
            /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
6097
31.0k
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
6098
31.0k
                /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
6099
31.0k
        }
6100
63.8k
        else
6101
63.8k
        {
6102
            /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
6103
63.8k
            ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
6104
63.8k
                ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
6105
            /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
6106
63.8k
        }
6107
6108
94.8k
        ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
6109
6110
94.8k
        ps_ctxt->i1_slice_type = i1_slice_type;
6111
6112
        /* intialize the inter pred (MC) context at frame level */
6113
94.8k
        ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
6114
94.8k
        ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
6115
94.8k
        ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
6116
94.8k
        ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
6117
94.8k
        ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
6118
6119
        /* intialize the MV pred context at frame level */
6120
94.8k
        ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
6121
94.8k
        ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
6122
94.8k
        ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
6123
94.8k
        ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
6124
94.8k
            ps_pps->i1_log2_parallel_merge_level - 2;
6125
6126
94.8k
#if ADAPT_COLOCATED_FROM_L0_FLAG
6127
94.8k
        if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
6128
74.5k
        {
6129
74.5k
            if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
6130
11.3k
               (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
6131
11.3k
                ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
6132
3.27k
            {
6133
3.27k
                ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
6134
3.27k
            }
6135
74.5k
        }
6136
94.8k
#endif
6137
        /* Initialization of deblocking params */
6138
94.8k
        ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
6139
94.8k
        ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
6140
6141
94.8k
        ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
6142
6143
94.8k
        ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
6144
        /*init frame level stat accumualtion parameters */
6145
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6146
94.8k
            ->u4_frame_sad_acc = 0;
6147
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6148
94.8k
            ->u4_frame_intra_sad_acc = 0;
6149
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6150
94.8k
            ->u4_frame_open_loop_intra_sad = 0;
6151
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6152
94.8k
            ->i8_frame_open_loop_ssd = 0;
6153
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6154
94.8k
            ->u4_frame_inter_sad_acc = 0;
6155
6156
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6157
94.8k
            ->i8_frame_cost_acc = 0;
6158
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6159
94.8k
            ->i8_frame_intra_cost_acc = 0;
6160
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6161
94.8k
            ->i8_frame_inter_cost_acc = 0;
6162
6163
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6164
94.8k
            ->u4_frame_intra_sad = 0;
6165
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6166
94.8k
            ->u4_frame_rdopt_bits = 0;
6167
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6168
94.8k
            ->u4_frame_rdopt_header_bits = 0;
6169
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6170
94.8k
            ->i4_qp_normalized_8x8_cu_sum[0] = 0;
6171
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6172
94.8k
            ->i4_qp_normalized_8x8_cu_sum[1] = 0;
6173
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6174
94.8k
            ->i4_8x8_cu_sum[0] = 0;
6175
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6176
94.8k
            ->i4_8x8_cu_sum[1] = 0;
6177
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6178
94.8k
            ->i8_sad_by_qscale[0] = 0;
6179
94.8k
        ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
6180
94.8k
            ->i8_sad_by_qscale[1] = 0;
6181
        /* Compute the frame_qstep */
6182
94.8k
        GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
6183
6184
94.8k
        ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
6185
6186
94.8k
        ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
6187
        /* intialize the cabac rdopt context at frame level */
6188
94.8k
        ihevce_entropy_rdo_frame_init(
6189
94.8k
            &ps_ctxt->s_rdopt_entropy_ctxt,
6190
94.8k
            ps_slice_hdr,
6191
94.8k
            ps_pps,
6192
94.8k
            ps_sps,
6193
94.8k
            ps_vps,
6194
94.8k
            ps_master_ctxt->au1_cu_skip_top_row,
6195
94.8k
            &ps_enc_ctxt->s_rc_quant);
6196
6197
        /* register the dep mngr instance for forward ME sync */
6198
94.8k
        ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
6199
94.8k
    }
6200
94.8k
}
6201
/*
6202
******************************************************************************
6203
* \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
6204
*
6205
* \brief
6206
*    returns Nil
6207
*
6208
* \param[in] pv_enc_loop_ctxt : pointer to encode loop context
6209
* \param[out]ps_rc_prms       : ptr to frame level info structure
6210
*
6211
* \return
6212
*    None
6213
*
6214
* \author
6215
*  Ittiam
6216
*
6217
*****************************************************************************
6218
*/
6219
void ihevce_enc_loop_get_frame_rc_prms(
6220
    void *pv_enc_loop_ctxt,
6221
    rc_bits_sad_t *ps_rc_prms,
6222
    WORD32 i4_br_id,  //bitrate instance id
6223
    WORD32 i4_enc_frm_id)  // frame id
6224
94.8k
{
6225
    /*Get the master thread pointer*/
6226
94.8k
    ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
6227
94.8k
    ihevce_enc_loop_ctxt_t *ps_ctxt;
6228
94.8k
    UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
6229
94.8k
    LWORD64 i8_total_ssd_frame = 0;
6230
94.8k
    UWORD32 total_frame_sad = 0;
6231
94.8k
    UWORD32 total_frame_rdopt_bits = 0;
6232
94.8k
    UWORD32 total_frame_rdopt_header_bits = 0;
6233
94.8k
    WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
6234
94.8k
    WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
6235
94.8k
    LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
6236
94.8k
    WORD32 i4_curr_qp_acc = 0;
6237
94.8k
    WORD32 i;
6238
6239
    /* ENC_LOOP master state structure */
6240
94.8k
    ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
6241
6242
94.8k
    if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
6243
94.8k
    {
6244
94.8k
        i4_enc_frm_id = 0;
6245
94.8k
    }
6246
    /*loop through all threads and accumulate intra sad across all threads*/
6247
189k
    for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
6248
94.8k
    {
6249
        /* ENC_LOOP state structure */
6250
94.8k
        ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
6251
94.8k
        total_frame_open_loop_intra_sad +=
6252
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
6253
94.8k
        i8_total_ssd_frame +=
6254
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
6255
94.8k
        total_frame_intra_sad +=
6256
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
6257
94.8k
        total_frame_sad +=
6258
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
6259
94.8k
        total_frame_rdopt_bits +=
6260
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
6261
94.8k
        total_frame_rdopt_header_bits +=
6262
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
6263
94.8k
        i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6264
94.8k
                                              ->i4_qp_normalized_8x8_cu_sum[0];
6265
94.8k
        i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
6266
94.8k
                                              ->i4_qp_normalized_8x8_cu_sum[1];
6267
94.8k
        i4_8x8_cu_sum[0] +=
6268
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
6269
94.8k
        i4_8x8_cu_sum[1] +=
6270
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
6271
94.8k
        i8_sad_by_qscale[0] +=
6272
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
6273
94.8k
        i8_sad_by_qscale[1] +=
6274
94.8k
            ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
6275
94.8k
    }
6276
6277
94.8k
    ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
6278
94.8k
    ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
6279
94.8k
    ps_rc_prms->u4_total_sad = total_frame_sad;
6280
94.8k
    ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
6281
94.8k
    ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
6282
    /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
6283
94.8k
    ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
6284
94.8k
    ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
6285
94.8k
    ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
6286
94.8k
    ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
6287
94.8k
    ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
6288
94.8k
    ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
6289
94.8k
    ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
6290
94.8k
}