/src/libhevc/encoder/ihevce_enc_loop_pass.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*! |
22 | | ****************************************************************************** |
23 | | * \file ihevce_enc_loop_pass.c |
24 | | * |
25 | | * \brief |
26 | | * This file contains Encoder normative loop pass related functions |
27 | | * |
28 | | * \date |
29 | | * 18/09/2012 |
30 | | * |
31 | | * \author |
32 | | * Ittiam |
33 | | * |
34 | | * |
35 | | * List of Functions |
36 | | * |
37 | | * |
38 | | ****************************************************************************** |
39 | | */ |
40 | | |
41 | | /*****************************************************************************/ |
42 | | /* File Includes */ |
43 | | /*****************************************************************************/ |
44 | | /* System include files */ |
45 | | #include <stdio.h> |
46 | | #include <string.h> |
47 | | #include <stdlib.h> |
48 | | #include <assert.h> |
49 | | #include <stdarg.h> |
50 | | #include <math.h> |
51 | | #include <limits.h> |
52 | | |
53 | | /* User include files */ |
54 | | #include "ihevc_typedefs.h" |
55 | | #include "itt_video_api.h" |
56 | | #include "ihevce_api.h" |
57 | | |
58 | | #include "rc_cntrl_param.h" |
59 | | #include "rc_frame_info_collector.h" |
60 | | #include "rc_look_ahead_params.h" |
61 | | |
62 | | #include "ihevc_defs.h" |
63 | | #include "ihevc_macros.h" |
64 | | #include "ihevc_debug.h" |
65 | | #include "ihevc_structs.h" |
66 | | #include "ihevc_platform_macros.h" |
67 | | #include "ihevc_deblk.h" |
68 | | #include "ihevc_itrans_recon.h" |
69 | | #include "ihevc_chroma_itrans_recon.h" |
70 | | #include "ihevc_chroma_intra_pred.h" |
71 | | #include "ihevc_intra_pred.h" |
72 | | #include "ihevc_inter_pred.h" |
73 | | #include "ihevc_mem_fns.h" |
74 | | #include "ihevc_padding.h" |
75 | | #include "ihevc_weighted_pred.h" |
76 | | #include "ihevc_sao.h" |
77 | | #include "ihevc_resi_trans.h" |
78 | | #include "ihevc_quant_iquant_ssd.h" |
79 | | #include "ihevc_cabac_tables.h" |
80 | | #include "ihevc_common_tables.h" |
81 | | #include "ihevc_quant_tables.h" |
82 | | |
83 | | #include "ihevce_defs.h" |
84 | | #include "ihevce_hle_interface.h" |
85 | | #include "ihevce_lap_enc_structs.h" |
86 | | #include "ihevce_multi_thrd_structs.h" |
87 | | #include "ihevce_multi_thrd_funcs.h" |
88 | | #include "ihevce_me_common_defs.h" |
89 | | #include "ihevce_had_satd.h" |
90 | | #include "ihevce_error_codes.h" |
91 | | #include "ihevce_bitstream.h" |
92 | | #include "ihevce_cabac.h" |
93 | | #include "ihevce_rdoq_macros.h" |
94 | | #include "ihevce_function_selector.h" |
95 | | #include "ihevce_enc_structs.h" |
96 | | #include "ihevce_entropy_structs.h" |
97 | | #include "ihevce_cmn_utils_instr_set_router.h" |
98 | | #include "ihevce_ipe_instr_set_router.h" |
99 | | #include "ihevce_decomp_pre_intra_structs.h" |
100 | | #include "ihevce_decomp_pre_intra_pass.h" |
101 | | #include "ihevce_enc_loop_structs.h" |
102 | | #include "ihevce_nbr_avail.h" |
103 | | #include "ihevce_enc_loop_utils.h" |
104 | | #include "ihevce_sub_pic_rc.h" |
105 | | #include "ihevce_global_tables.h" |
106 | | #include "ihevce_bs_compute_ctb.h" |
107 | | #include "ihevce_cabac_rdo.h" |
108 | | #include "ihevce_deblk.h" |
109 | | #include "ihevce_frame_process.h" |
110 | | #include "ihevce_rc_enc_structs.h" |
111 | | #include "hme_datatype.h" |
112 | | #include "hme_interface.h" |
113 | | #include "hme_common_defs.h" |
114 | | #include "hme_defs.h" |
115 | | #include "ihevce_me_instr_set_router.h" |
116 | | #include "ihevce_enc_subpel_gen.h" |
117 | | #include "ihevce_inter_pred.h" |
118 | | #include "ihevce_mv_pred.h" |
119 | | #include "ihevce_mv_pred_merge.h" |
120 | | #include "ihevce_enc_loop_inter_mode_sifter.h" |
121 | | #include "ihevce_enc_cu_recursion.h" |
122 | | #include "ihevce_enc_loop_pass.h" |
123 | | #include "ihevce_common_utils.h" |
124 | | #include "ihevce_dep_mngr_interface.h" |
125 | | #include "ihevce_sao.h" |
126 | | #include "ihevce_tile_interface.h" |
127 | | #include "ihevce_profile.h" |
128 | | |
129 | | #include "cast_types.h" |
130 | | #include "osal.h" |
131 | | #include "osal_defaults.h" |
132 | | |
133 | | /*****************************************************************************/ |
134 | | /* Globals */ |
135 | | /*****************************************************************************/ |
136 | | extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS]; |
137 | | |
138 | | extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES]; |
139 | | |
140 | | /*****************************************************************************/ |
141 | | /* Constant Macros */ |
142 | | /*****************************************************************************/ |
143 | 94.8k | #define UPDATE_QP_AT_CTB 6 |
144 | 21.4k | #define INTRAPRED_SIMD_LEFT_PADDING 16 |
145 | 14.2k | #define INTRAPRED_SIMD_RIGHT_PADDING 8 |
146 | | |
147 | | /*****************************************************************************/ |
148 | | /* Function Definitions */ |
149 | | /*****************************************************************************/ |
150 | | |
151 | | /*! |
152 | | ****************************************************************************** |
153 | | * \if Function name : ihevce_enc_loop_ctb_left_copy \endif |
154 | | * |
155 | | * \brief |
156 | | * This function copy the right data of CTB to context buffers |
157 | | * |
158 | | * \date |
159 | | * 18/09/2012 |
160 | | * |
161 | | * \author |
162 | | * Ittiam |
163 | | * |
164 | | * \return |
165 | | * |
166 | | * List of Functions |
167 | | * |
168 | | * |
169 | | ****************************************************************************** |
170 | | */ |
171 | | void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms) |
172 | 131k | { |
173 | | /* ------------------------------------------------------------------ */ |
174 | | /* copy the right coloum data to the context buffers */ |
175 | | /* ------------------------------------------------------------------ */ |
176 | | |
177 | 131k | nbr_4x4_t *ps_left_nbr; |
178 | 131k | nbr_4x4_t *ps_nbr; |
179 | 131k | UWORD8 *pu1_buff; |
180 | 131k | WORD32 num_pels; |
181 | 131k | UWORD8 *pu1_luma_left, *pu1_chrm_left; |
182 | | |
183 | 131k | UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); |
184 | | |
185 | 131k | pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data; |
186 | 131k | pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data; |
187 | 131k | ps_left_nbr = &ps_ctxt->as_left_col_nbr[0]; |
188 | | |
189 | | /* copy right luma data */ |
190 | 131k | pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1; |
191 | | |
192 | 8.51M | for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++) |
193 | 8.38M | { |
194 | 8.38M | WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels; |
195 | | |
196 | 8.38M | pu1_luma_left[num_pels] = pu1_buff[i4_indx]; |
197 | 8.38M | } |
198 | | |
199 | | /* copy right chroma data */ |
200 | 131k | pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2; |
201 | | |
202 | 4.32M | for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++) |
203 | 4.19M | { |
204 | 4.19M | WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels; |
205 | | |
206 | 4.19M | *pu1_chrm_left++ = pu1_buff[i4_indx]; |
207 | 4.19M | *pu1_chrm_left++ = pu1_buff[i4_indx + 1]; |
208 | 4.19M | } |
209 | | |
210 | | /* store the nbr 4x4 data at ctb level */ |
211 | 131k | { |
212 | 131k | WORD32 ctr; |
213 | 131k | WORD32 nbr_strd; |
214 | | |
215 | 131k | nbr_strd = ps_cu_prms->i4_ctb_size >> 2; |
216 | | |
217 | | /* copy right nbr data */ |
218 | 131k | ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0]; |
219 | 131k | ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1); |
220 | | |
221 | 2.22M | for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++) |
222 | 2.09M | { |
223 | 2.09M | WORD32 i4_indx = nbr_strd * ctr; |
224 | | |
225 | 2.09M | ps_left_nbr[ctr] = ps_nbr[i4_indx]; |
226 | 2.09M | } |
227 | 131k | } |
228 | 131k | return; |
229 | 131k | } |
230 | | |
231 | | /*! |
232 | | ****************************************************************************** |
233 | | * \if Function name : ihevce_mark_all_modes_to_evaluate \endif |
234 | | * |
235 | | * \brief |
236 | | * Mark all modes for inter/intra for evaluation. This function will be |
237 | | * called by ref instance |
238 | | * |
239 | | * \param[in] pv_ctxt : pointer to enc_loop module |
240 | | * \param[in] ps_cu_analyse : pointer to cu analyse |
241 | | * |
242 | | * \return |
243 | | * None |
244 | | * |
245 | | * \author |
246 | | * Ittiam |
247 | | * |
248 | | ***************************************************************************** |
249 | | */ |
250 | | void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse) |
251 | 1.99M | { |
252 | 1.99M | UWORD8 ctr; |
253 | 1.99M | WORD32 i4_part; |
254 | | |
255 | 1.99M | (void)pv_ctxt; |
256 | | /* run a loop over all Inter cands */ |
257 | 9.97M | for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++) |
258 | 7.97M | { |
259 | 7.97M | ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1; |
260 | 7.97M | } |
261 | | |
262 | | /* run a loop over all intra candidates */ |
263 | 1.99M | if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands) |
264 | 1.80M | { |
265 | 9.03M | for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++) |
266 | 7.22M | { |
267 | 7.22M | ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1; |
268 | 7.22M | ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1; |
269 | | |
270 | 36.1M | for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++) |
271 | 28.9M | { |
272 | 28.9M | ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1; |
273 | 28.9M | } |
274 | 7.22M | } |
275 | 1.80M | } |
276 | 1.99M | } |
277 | | |
278 | | /*! |
279 | | ****************************************************************************** |
280 | | * \if Function name : ihevce_cu_mode_decide \endif |
281 | | * |
282 | | * \brief |
283 | | * Coding Unit mode decide function. Performs RD opt and decides the best mode |
284 | | * |
285 | | * \param[in] ps_ctxt : pointer to enc_loop module |
286 | | * \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers) |
287 | | * \param[in] ps_cu_analyse : pointer to cu analyse |
288 | | * \param[out] ps_cu_final : pointer to cu final |
289 | | * \param[out] pu1_ecd_data :pointer to store coeff data for ECD |
290 | | * \param[out]ps_row_col_pu; colocated pu buffer pointer |
291 | | * \param[out]pu1_row_pu_map; colocated pu map buffer pointer |
292 | | * \param[in]col_start_pu_idx : pu index start value |
293 | | * |
294 | | * \return |
295 | | * None |
296 | | * |
297 | | * |
298 | | * \author |
299 | | * Ittiam |
300 | | * |
301 | | ***************************************************************************** |
302 | | */ |
303 | | LWORD64 ihevce_cu_mode_decide( |
304 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
305 | | enc_loop_cu_prms_t *ps_cu_prms, |
306 | | cu_analyse_t *ps_cu_analyse, |
307 | | final_mode_state_t *ps_final_mode_state, |
308 | | UWORD8 *pu1_ecd_data, |
309 | | pu_col_mv_t *ps_col_pu, |
310 | | UWORD8 *pu1_col_pu_map, |
311 | | WORD32 col_start_pu_idx) |
312 | 1.99M | { |
313 | 1.99M | enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms; |
314 | 1.99M | cu_nbr_prms_t s_cu_nbr_prms; |
315 | 1.99M | inter_cu_mode_info_t s_inter_cu_mode_info; |
316 | 1.99M | cu_inter_cand_t *ps_best_inter_cand = NULL; |
317 | 1.99M | UWORD8 *pu1_cu_top; |
318 | 1.99M | UWORD8 *pu1_cu_top_left; |
319 | 1.99M | UWORD8 *pu1_cu_left; |
320 | 1.99M | UWORD8 *pu1_final_recon = NULL; |
321 | 1.99M | UWORD8 *pu1_curr_src = NULL; |
322 | 1.99M | void *pv_curr_src = NULL; |
323 | 1.99M | void *pv_cu_left = NULL; |
324 | 1.99M | void *pv_cu_top = NULL; |
325 | 1.99M | void *pv_cu_top_left = NULL; |
326 | | |
327 | 1.99M | WORD32 cu_left_stride = 0; |
328 | 1.99M | WORD32 ctr; |
329 | 1.99M | WORD32 rd_opt_best_idx; |
330 | 1.99M | LWORD64 rd_opt_least_cost; |
331 | 1.99M | WORD32 rd_opt_curr_idx; |
332 | 1.99M | WORD32 num_4x4_in_ctb; |
333 | 1.99M | WORD32 nbr_4x4_left_strd = 0; |
334 | | |
335 | 1.99M | nbr_4x4_t *ps_topleft_nbr_4x4; |
336 | 1.99M | nbr_4x4_t *ps_left_nbr_4x4 = NULL; |
337 | 1.99M | nbr_4x4_t *ps_top_nbr_4x4 = NULL; |
338 | 1.99M | nbr_4x4_t *ps_curr_nbr_4x4; |
339 | 1.99M | WORD32 enable_intra_eval_flag; |
340 | 1.99M | WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1; |
341 | 1.99M | WORD32 curr_cu_pos_in_row; |
342 | 1.99M | WORD32 cu_top_right_offset; |
343 | 1.99M | WORD32 cu_top_right_dep_pos; |
344 | 1.99M | WORD32 i4_ctb_x_off, i4_ctb_y_off; |
345 | | |
346 | 1.99M | UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); |
347 | 1.99M | (void)ps_final_mode_state; |
348 | | /* default init */ |
349 | 1.99M | rd_opt_least_cost = MAX_COST_64; |
350 | 1.99M | ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64; |
351 | 1.99M | ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64; |
352 | | |
353 | | /* Zero cbf tool is enabled by default for all presets */ |
354 | 1.99M | ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; |
355 | | |
356 | 1.99M | rd_opt_best_idx = 1; |
357 | 1.99M | rd_opt_curr_idx = 0; |
358 | 1.99M | enable_intra_eval_flag = 1; |
359 | | |
360 | | /* CU params in enc ctxt*/ |
361 | 1.99M | ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x; |
362 | 1.99M | ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y; |
363 | 1.99M | ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size; |
364 | | |
365 | 1.99M | num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2); |
366 | 1.99M | ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0]; |
367 | 1.99M | ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1); |
368 | 1.99M | ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb); |
369 | | |
370 | | /* CB and Cr are pixel interleaved */ |
371 | 1.99M | s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride; |
372 | | |
373 | 1.99M | s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride; |
374 | | |
375 | 1.99M | if(!ps_ctxt->u1_is_input_data_hbd) |
376 | 1.99M | { |
377 | | /* --------------------------------------- */ |
378 | | /* ----- Luma Pointers Derivation -------- */ |
379 | | /* --------------------------------------- */ |
380 | | |
381 | | /* based on CU position derive the pointers */ |
382 | 1.99M | pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3); |
383 | | |
384 | 1.99M | pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3); |
385 | | |
386 | 1.99M | pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride); |
387 | | |
388 | 1.99M | pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride); |
389 | | |
390 | 1.99M | pv_curr_src = pu1_curr_src; |
391 | | |
392 | | /* CU left */ |
393 | 1.99M | if(0 == ps_cu_analyse->b3_cu_pos_x) |
394 | 594k | { |
395 | | /* CTB boundary */ |
396 | 594k | pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data; |
397 | 594k | pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3); |
398 | 594k | cu_left_stride = 1; |
399 | | |
400 | 594k | ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0]; |
401 | 594k | ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1; |
402 | 594k | nbr_4x4_left_strd = 1; |
403 | 594k | } |
404 | 1.40M | else |
405 | 1.40M | { |
406 | | /* inside CTB */ |
407 | 1.40M | pu1_cu_left = pu1_final_recon - 1; |
408 | 1.40M | cu_left_stride = ps_cu_prms->i4_luma_recon_stride; |
409 | | |
410 | 1.40M | ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1; |
411 | 1.40M | nbr_4x4_left_strd = num_4x4_in_ctb; |
412 | 1.40M | } |
413 | | |
414 | 1.99M | pv_cu_left = pu1_cu_left; |
415 | | |
416 | | /* CU top */ |
417 | 1.99M | if(0 == ps_cu_analyse->b3_cu_pos_y) |
418 | 579k | { |
419 | | /* CTB boundary */ |
420 | 579k | pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma; |
421 | 579k | pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size; |
422 | 579k | pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3); |
423 | | |
424 | 579k | ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr; |
425 | 579k | ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2)); |
426 | 579k | ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1); |
427 | 579k | } |
428 | 1.41M | else |
429 | 1.41M | { |
430 | | /* inside CTB */ |
431 | 1.41M | pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride; |
432 | | |
433 | 1.41M | ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb; |
434 | 1.41M | } |
435 | | |
436 | 1.99M | pv_cu_top = pu1_cu_top; |
437 | | |
438 | | /* CU top left */ |
439 | 1.99M | if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y)) |
440 | 405k | { |
441 | | /* left ctb boundary but not first row */ |
442 | 405k | pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */ |
443 | 405k | ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */ |
444 | 405k | } |
445 | 1.58M | else |
446 | 1.58M | { |
447 | | /* rest all cases topleft is top -1 */ |
448 | 1.58M | pu1_cu_top_left = pu1_cu_top - 1; |
449 | 1.58M | ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1; |
450 | 1.58M | } |
451 | | |
452 | 1.99M | pv_cu_top_left = pu1_cu_top_left; |
453 | | |
454 | | /* Store the CU nbr information in the ctxt for final reconstruction fun. */ |
455 | 1.99M | s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd; |
456 | 1.99M | s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4; |
457 | 1.99M | s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4; |
458 | 1.99M | s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4; |
459 | 1.99M | s_cu_nbr_prms.pu1_cu_left = pu1_cu_left; |
460 | 1.99M | s_cu_nbr_prms.pu1_cu_top = pu1_cu_top; |
461 | 1.99M | s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left; |
462 | 1.99M | s_cu_nbr_prms.cu_left_stride = cu_left_stride; |
463 | | |
464 | | /* ------------------------------------------------------------ */ |
465 | | /* -- Initialize the number of neigbour skip cu count for rdo --*/ |
466 | | /* ------------------------------------------------------------ */ |
467 | 1.99M | { |
468 | 1.99M | nbr_avail_flags_t s_nbr; |
469 | 1.99M | WORD32 i4_num_nbr_skip_cus = 0; |
470 | | |
471 | | /* get the neighbour availability flags for current cu */ |
472 | 1.99M | ihevce_get_nbr_intra( |
473 | 1.99M | &s_nbr, |
474 | 1.99M | ps_ctxt->pu1_ctb_nbr_map, |
475 | 1.99M | ps_ctxt->i4_nbr_map_strd, |
476 | 1.99M | (ps_cu_analyse->b3_cu_pos_x << 1), |
477 | 1.99M | (ps_cu_analyse->b3_cu_pos_y << 1), |
478 | 1.99M | (ps_cu_analyse->u1_cu_size >> 2)); |
479 | 1.99M | if(s_nbr.u1_top_avail) |
480 | 1.52M | { |
481 | 1.52M | i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag; |
482 | 1.52M | } |
483 | | |
484 | 1.99M | if(s_nbr.u1_left_avail) |
485 | 1.53M | { |
486 | 1.53M | i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag; |
487 | 1.53M | } |
488 | 1.99M | ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus = |
489 | 1.99M | i4_num_nbr_skip_cus; |
490 | 1.99M | ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus = |
491 | 1.99M | i4_num_nbr_skip_cus; |
492 | 1.99M | } |
493 | | |
494 | | /* --------------------------------------- */ |
495 | | /* --- Chroma Pointers Derivation -------- */ |
496 | | /* --------------------------------------- */ |
497 | | |
498 | | /* based on CU position derive the pointers */ |
499 | 1.99M | s_chrm_cu_buf_prms.pu1_final_recon = |
500 | 1.99M | ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3); |
501 | | |
502 | 1.99M | s_chrm_cu_buf_prms.pu1_curr_src = |
503 | 1.99M | ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3); |
504 | | |
505 | 1.99M | s_chrm_cu_buf_prms.pu1_final_recon += |
506 | 1.99M | ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride); |
507 | | |
508 | 1.99M | s_chrm_cu_buf_prms.pu1_curr_src += |
509 | 1.99M | ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride); |
510 | | |
511 | | /* CU left */ |
512 | 1.99M | if(0 == ps_cu_analyse->b3_cu_pos_x) |
513 | 594k | { |
514 | | /* CTB boundary */ |
515 | 594k | s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data; |
516 | 594k | s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3)); |
517 | 594k | s_chrm_cu_buf_prms.i4_cu_left_stride = 2; |
518 | 594k | } |
519 | 1.40M | else |
520 | 1.40M | { |
521 | | /* inside CTB */ |
522 | 1.40M | s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2; |
523 | 1.40M | s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride; |
524 | 1.40M | } |
525 | | |
526 | | /* CU top */ |
527 | 1.99M | if(0 == ps_cu_analyse->b3_cu_pos_y) |
528 | 579k | { |
529 | | /* CTB boundary */ |
530 | 579k | s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma; |
531 | 579k | s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size; |
532 | 579k | s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3); |
533 | 579k | } |
534 | 1.41M | else |
535 | 1.41M | { |
536 | | /* inside CTB */ |
537 | 1.41M | s_chrm_cu_buf_prms.pu1_cu_top = |
538 | 1.41M | s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride; |
539 | 1.41M | } |
540 | | |
541 | | /* CU top left */ |
542 | 1.99M | if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y)) |
543 | 405k | { |
544 | | /* left ctb boundary but not first row */ |
545 | 405k | s_chrm_cu_buf_prms.pu1_cu_top_left = |
546 | 405k | s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */ |
547 | 405k | } |
548 | 1.58M | else |
549 | 1.58M | { |
550 | | /* rest all cases topleft is top -2 */ |
551 | 1.58M | s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2; |
552 | 1.58M | } |
553 | 1.99M | } |
554 | | |
555 | | /* Set Variables for Dep. Checking and Setting */ |
556 | 1.99M | i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6); |
557 | | |
558 | 1.99M | i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y; |
559 | 1.99M | ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx; |
560 | | |
561 | | /* Set the pred pointer count for ME/intra to 0 to start */ |
562 | 1.99M | ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0; |
563 | | |
564 | 1.99M | ASSERT( |
565 | 1.99M | (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0)); |
566 | | |
567 | 1.99M | ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES); |
568 | 1.99M | s_inter_cu_mode_info.u1_num_inter_cands = 0; |
569 | 1.99M | s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0; |
570 | 1.99M | s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0; |
571 | | |
572 | 1.99M | ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0; |
573 | 1.99M | ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0; |
574 | 1.99M | ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0; |
575 | 1.99M | ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0; |
576 | 1.99M | ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size; |
577 | 1.99M | if(0 != ps_cu_analyse->u1_num_inter_cands) |
578 | 1.07M | { |
579 | 1.07M | ihevce_inter_cand_sifter_prms_t s_prms; |
580 | | |
581 | 1.07M | UWORD8 u1_enable_top_row_sync; |
582 | | |
583 | 1.07M | if(ps_ctxt->u1_disable_intra_eval) |
584 | 20.1k | { |
585 | 20.1k | u1_enable_top_row_sync = !DISABLE_TOP_SYNC; |
586 | 20.1k | } |
587 | 1.05M | else |
588 | 1.05M | { |
589 | 1.05M | u1_enable_top_row_sync = 1; |
590 | 1.05M | } |
591 | | |
592 | 1.07M | if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync) |
593 | 306k | { |
594 | | /* Wait till top data is ready */ |
595 | | /* Currently checking till top right CU */ |
596 | 306k | curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3); |
597 | | |
598 | 306k | if(i4_ctb_y_off == 0) |
599 | 266k | { |
600 | | /* No wait for 1st row */ |
601 | 266k | cu_top_right_offset = -(MAX_CTB_SIZE); |
602 | 266k | { |
603 | 266k | ihevce_tile_params_t *ps_col_tile_params = |
604 | 266k | ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + |
605 | 266k | ps_ctxt->i4_tile_col_idx); |
606 | | /* No wait for 1st row */ |
607 | 266k | cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); |
608 | 266k | } |
609 | 266k | cu_top_right_dep_pos = 0; |
610 | 266k | } |
611 | 39.7k | else |
612 | 39.7k | { |
613 | 39.7k | cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4; |
614 | 39.7k | cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1; |
615 | 39.7k | } |
616 | | |
617 | 306k | if(0 == ps_cu_analyse->b3_cu_pos_y) |
618 | 117k | { |
619 | 117k | ihevce_dmgr_chk_row_row_sync( |
620 | 117k | ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, |
621 | 117k | curr_cu_pos_in_row, |
622 | 117k | cu_top_right_offset, |
623 | 117k | cu_top_right_dep_pos, |
624 | 117k | ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ |
625 | 117k | ps_ctxt->thrd_id); |
626 | 117k | } |
627 | 306k | } |
628 | | |
629 | 1.07M | if(ps_ctxt->i1_cu_qp_delta_enable) |
630 | 495k | { |
631 | 495k | ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, 4, 0); |
632 | 495k | } |
633 | | |
634 | 1.07M | s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd; |
635 | 1.07M | s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands; |
636 | 1.07M | s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd; |
637 | 1.07M | s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride; |
638 | 1.07M | s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip; |
639 | 1.07M | s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0]; |
640 | 1.07M | s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0]; |
641 | 1.07M | s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4; |
642 | 1.07M | s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt; |
643 | 1.07M | s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand; |
644 | 1.07M | s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu; |
645 | 1.07M | s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt; |
646 | 1.07M | s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data; |
647 | 1.07M | s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4; |
648 | 1.07M | s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4; |
649 | 1.07M | s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map; |
650 | 1.07M | s_prms.pv_src = pv_curr_src; |
651 | 1.07M | s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; |
652 | 1.07M | s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; |
653 | 1.07M | s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size; |
654 | 1.07M | s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates; |
655 | 1.07M | s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands; |
656 | 1.07M | s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval; |
657 | 1.07M | s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset; |
658 | 1.07M | s_prms.i1_slice_type = ps_ctxt->i1_slice_type; |
659 | 1.07M | s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms; |
660 | 1.07M | s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8); |
661 | 1.07M | s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info; |
662 | 1.07M | s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost; |
663 | 1.07M | s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda; |
664 | 1.07M | s_prms.u1_use_merge_cand_from_top_row = |
665 | 1.07M | (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0)); |
666 | 1.07M | s_prms.u1_merge_idx_cabac_model = |
667 | 1.07M | ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT]; |
668 | | #if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING |
669 | | s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric; |
670 | | s_prms.u1_reuse_me_sad = 1; |
671 | | #else |
672 | 1.07M | s_prms.u1_reuse_me_sad = 0; |
673 | 1.07M | #endif |
674 | | |
675 | 1.07M | if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE) |
676 | 229k | { |
677 | 229k | if(ps_ctxt->i4_temporal_layer == 1) |
678 | 82.3k | { |
679 | 82.3k | s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF; |
680 | 82.3k | } |
681 | 147k | else |
682 | 147k | { |
683 | 147k | s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME; |
684 | 147k | } |
685 | 229k | } |
686 | 848k | else |
687 | 848k | { |
688 | 848k | s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P; |
689 | 848k | } |
690 | 1.07M | s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy; |
691 | | |
692 | 1.07M | if(s_prms.u1_is_cu_noisy) |
693 | 0 | { |
694 | 0 | s_prms.i4_lambda_qf = |
695 | 0 | ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f; |
696 | 0 | } |
697 | 1.07M | s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu; |
698 | | |
699 | 1.07M | s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func; |
700 | | |
701 | 1.07M | s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit; |
702 | 1.07M | ihevce_inter_cand_sifter(&s_prms); |
703 | 1.07M | } |
704 | 1.99M | if(u1_is_422) |
705 | 0 | { |
706 | 0 | UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1]; |
707 | 0 | UWORD8 u1_num_bufs_allocated; |
708 | |
|
709 | 0 | u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices( |
710 | 0 | au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1); |
711 | |
|
712 | 0 | ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1)); |
713 | |
|
714 | 0 | for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated; |
715 | 0 | ctr++) |
716 | 0 | { |
717 | 0 | { |
718 | 0 | ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] = |
719 | 0 | (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]]; |
720 | 0 | } |
721 | |
|
722 | 0 | ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size; |
723 | |
|
724 | 0 | ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++; |
725 | 0 | } |
726 | |
|
727 | 0 | { |
728 | 0 | ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] = |
729 | 0 | (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf; |
730 | 0 | } |
731 | |
|
732 | 0 | ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size; |
733 | |
|
734 | 0 | ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++; |
735 | 0 | } |
736 | 1.99M | else |
737 | 1.99M | { |
738 | 1.99M | UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX]; |
739 | 1.99M | UWORD8 u1_num_bufs_allocated; |
740 | | |
741 | 1.99M | u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices( |
742 | 1.99M | au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX); |
743 | | |
744 | 1.99M | ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX); |
745 | | |
746 | 7.97M | for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated; |
747 | 5.98M | ctr++) |
748 | 5.98M | { |
749 | 5.98M | { |
750 | 5.98M | ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] = |
751 | 5.98M | (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]]; |
752 | 5.98M | } |
753 | | |
754 | 5.98M | ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size; |
755 | | |
756 | 5.98M | ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++; |
757 | 5.98M | } |
758 | 1.99M | } |
759 | | |
760 | 1.99M | ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse); |
761 | | |
762 | 1.99M | ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0; |
763 | 1.99M | ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0; |
764 | 1.99M | ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
765 | 1.99M | ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
766 | 1.99M | ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0; |
767 | 1.99M | ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0; |
768 | 1.99M | ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0; |
769 | 1.99M | ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0; |
770 | | /* --------------------------------------- */ |
771 | | /* ------ Inter RD OPT stage ------------- */ |
772 | | /* --------------------------------------- */ |
773 | 1.99M | if(0 != s_inter_cu_mode_info.u1_num_inter_cands) |
774 | 1.07M | { |
775 | 1.07M | UWORD8 u1_ssd_bit_info_ctr = 0; |
776 | | |
777 | | /* -- run a loop over all Inter rd opt cands ------ */ |
778 | 4.32M | for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++) |
779 | 3.24M | { |
780 | 3.24M | cu_inter_cand_t *ps_inter_cand; |
781 | | |
782 | 3.24M | LWORD64 rd_opt_cost = 0; |
783 | | |
784 | 3.24M | ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr]; |
785 | | |
786 | 3.24M | if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) || |
787 | 831k | (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag)) |
788 | 2.47M | { |
789 | 2.47M | ps_inter_cand->b1_eval_mark = 1; |
790 | 2.47M | } |
791 | | |
792 | | /****************************************************************/ |
793 | | /* This check is only valid for derived instances. */ |
794 | | /* check if this mode needs to be evaluated or not. */ |
795 | | /* if it is a skip candidate, go ahead and evaluate it even if */ |
796 | | /* it has not been marked while sorting. */ |
797 | | /****************************************************************/ |
798 | 3.24M | if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag)) |
799 | 0 | { |
800 | 0 | continue; |
801 | 0 | } |
802 | | |
803 | | /* RDOPT related copies and settings */ |
804 | 3.24M | ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx; |
805 | | |
806 | | /* RDOPT copy States : Prev Cu best to current init */ |
807 | 3.24M | COPY_CABAC_STATES( |
808 | 3.24M | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
809 | 3.24M | &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], |
810 | 3.24M | IHEVC_CAB_CTXT_END * sizeof(UWORD8)); |
811 | | /* MVP ,MVD calc and Motion compensation */ |
812 | 3.24M | rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)( |
813 | 3.24M | ps_ctxt, |
814 | 3.24M | ps_inter_cand, |
815 | 3.24M | ps_cu_analyse->u1_cu_size, |
816 | 3.24M | ps_cu_analyse->b3_cu_pos_x, |
817 | 3.24M | ps_cu_analyse->b3_cu_pos_y, |
818 | 3.24M | ps_left_nbr_4x4, |
819 | 3.24M | ps_top_nbr_4x4, |
820 | 3.24M | ps_topleft_nbr_4x4, |
821 | 3.24M | nbr_4x4_left_strd, |
822 | 3.24M | rd_opt_curr_idx); |
823 | | |
824 | | #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT |
825 | | if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag)) |
826 | | { |
827 | | ihevce_determine_tu_tree_distribution( |
828 | | ps_inter_cand, |
829 | | (me_func_selector_t *)ps_ctxt->pv_err_func_selector, |
830 | | ps_ctxt->ai2_scratch, |
831 | | (UWORD8 *)pv_curr_src, |
832 | | ps_cu_prms->i4_luma_src_stride, |
833 | | ps_ctxt->i4_satd_lamda, |
834 | | LAMBDA_Q_SHIFT, |
835 | | ps_cu_analyse->u1_cu_size, |
836 | | ps_ctxt->u1_max_tr_depth); |
837 | | } |
838 | | #endif |
839 | | #if DISABLE_ZERO_ZBF_IN_INTER |
840 | | ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; |
841 | | #else |
842 | 3.24M | ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; |
843 | 3.24M | #endif |
844 | | /* Recon loop with different TUs based on partition type*/ |
845 | 3.24M | rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)( |
846 | 3.24M | ps_ctxt, |
847 | 3.24M | ps_cu_prms, |
848 | 3.24M | pv_curr_src, |
849 | 3.24M | ps_cu_analyse->u1_cu_size, |
850 | 3.24M | ps_cu_analyse->b3_cu_pos_x, |
851 | 3.24M | ps_cu_analyse->b3_cu_pos_y, |
852 | 3.24M | rd_opt_curr_idx, |
853 | 3.24M | &s_chrm_cu_buf_prms, |
854 | 3.24M | ps_inter_cand, |
855 | 3.24M | ps_cu_analyse, |
856 | 3.24M | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
857 | 3.24M | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
858 | 2.71M | (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
859 | 2.71M | 100.0); |
860 | | |
861 | 3.24M | #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY |
862 | 3.24M | if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) |
863 | 0 | { |
864 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; |
865 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
866 | 0 | ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; |
867 | 0 | } |
868 | 3.24M | #endif |
869 | | |
870 | | /* based on the rd opt cost choose the best and current index */ |
871 | 3.24M | if(rd_opt_cost < rd_opt_least_cost) |
872 | 1.83M | { |
873 | | /* swap the best and current indx */ |
874 | 1.83M | rd_opt_best_idx = !rd_opt_best_idx; |
875 | 1.83M | rd_opt_curr_idx = !rd_opt_curr_idx; |
876 | | |
877 | 1.83M | ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost; |
878 | 1.83M | rd_opt_least_cost = rd_opt_cost; |
879 | 1.83M | i4_best_cu_qp = ps_ctxt->i4_cu_qp; |
880 | | |
881 | | /* Store the best Inter cand. for final_recon function */ |
882 | 1.83M | ps_best_inter_cand = ps_inter_cand; |
883 | 1.83M | } |
884 | | |
885 | | /* set the neighbour map to 0 */ |
886 | 3.24M | ihevce_set_nbr_map( |
887 | 3.24M | ps_ctxt->pu1_ctb_nbr_map, |
888 | 3.24M | ps_ctxt->i4_nbr_map_strd, |
889 | 3.24M | (ps_cu_analyse->b3_cu_pos_x << 1), |
890 | 3.24M | (ps_cu_analyse->b3_cu_pos_y << 1), |
891 | 3.24M | (ps_cu_analyse->u1_cu_size >> 2), |
892 | 3.24M | 0); |
893 | | |
894 | 3.24M | } /* end of loop for all the Inter RD OPT cand */ |
895 | 1.07M | } |
896 | | /* --------------------------------------- */ |
897 | | /* ---- Conditional Eval of Intra -------- */ |
898 | | /* --------------------------------------- */ |
899 | 1.99M | { |
900 | 1.99M | enc_loop_cu_final_prms_t *ps_enc_loop_bestprms; |
901 | 1.99M | ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; |
902 | | |
903 | | /* check if inter candidates are valid */ |
904 | 1.99M | if(0 != ps_cu_analyse->u1_num_inter_cands) |
905 | 1.07M | { |
906 | | /* if skip or no residual inter candidates has won then */ |
907 | | /* evaluation of intra candidates is disabled */ |
908 | 1.07M | if((1 == ps_enc_loop_bestprms->u1_skip_flag) || |
909 | 847k | (0 == ps_enc_loop_bestprms->u1_is_cu_coded)) |
910 | 842k | { |
911 | 842k | enable_intra_eval_flag = 0; |
912 | 842k | } |
913 | 1.07M | } |
914 | | /* Disable Intra Gating for HIGH QUALITY PRESET */ |
915 | 1.99M | #if !ENABLE_INTRA_GATING_FOR_HQ |
916 | 1.99M | if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset) |
917 | 1.28M | { |
918 | 1.28M | enable_intra_eval_flag = 1; |
919 | | |
920 | 1.28M | #if DISABLE_LARGE_INTRA_PQ |
921 | 1.28M | if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) && |
922 | 0 | (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands)) |
923 | 0 | { |
924 | 0 | if(ps_cu_analyse->u1_cu_size > 16) |
925 | 0 | { |
926 | | /* Disable 32x32 / 64x64 Intra in PQ P and B pics */ |
927 | 0 | enable_intra_eval_flag = 0; |
928 | 0 | } |
929 | 0 | else if(ps_cu_analyse->u1_cu_size == 16) |
930 | 0 | { |
931 | | /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */ |
932 | 0 | ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255; |
933 | 0 | } |
934 | 0 | } |
935 | 1.28M | #endif |
936 | 1.28M | } |
937 | 1.99M | #endif |
938 | 1.99M | } |
939 | | |
940 | | /* --------------------------------------- */ |
941 | | /* ------ Intra RD OPT stage ------------- */ |
942 | | /* --------------------------------------- */ |
943 | | |
944 | | /* -- run a loop over all Intra rd opt cands ------ */ |
945 | 1.99M | if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag)) |
946 | 1.71M | { |
947 | 1.71M | LWORD64 rd_opt_cost; |
948 | 1.71M | WORD32 end_flag = 0; |
949 | 1.71M | WORD32 cu_eval_done = 0; |
950 | 1.71M | WORD32 subcu_eval_done = 0; |
951 | 1.71M | WORD32 subpu_eval_done = 0; |
952 | 1.71M | WORD32 max_trans_size; |
953 | 1.71M | WORD32 sync_wait_stride; |
954 | 1.71M | max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size)); |
955 | 1.71M | sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size; |
956 | | |
957 | 1.71M | if(!ps_ctxt->u1_use_top_at_ctb_boundary) |
958 | 431k | { |
959 | | /* Wait till top data is ready */ |
960 | | /* Currently checking till top right CU */ |
961 | 431k | curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3); |
962 | | |
963 | 431k | if(i4_ctb_y_off == 0) |
964 | 299k | { |
965 | | /* No wait for 1st row */ |
966 | 299k | cu_top_right_offset = -(MAX_CTB_SIZE); |
967 | 299k | { |
968 | 299k | ihevce_tile_params_t *ps_col_tile_params = |
969 | 299k | ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + |
970 | 299k | ps_ctxt->i4_tile_col_idx); |
971 | | /* No wait for 1st row */ |
972 | 299k | cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); |
973 | 299k | } |
974 | 299k | cu_top_right_dep_pos = 0; |
975 | 299k | } |
976 | 131k | else |
977 | 131k | { |
978 | 131k | cu_top_right_offset = sync_wait_stride; |
979 | 131k | cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1; |
980 | 131k | } |
981 | | |
982 | 431k | if(0 == ps_cu_analyse->b3_cu_pos_y) |
983 | 131k | { |
984 | 131k | ihevce_dmgr_chk_row_row_sync( |
985 | 131k | ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, |
986 | 131k | curr_cu_pos_in_row, |
987 | 131k | cu_top_right_offset, |
988 | 131k | cu_top_right_dep_pos, |
989 | 131k | ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ |
990 | 131k | ps_ctxt->thrd_id); |
991 | 131k | } |
992 | 431k | } |
993 | 1.71M | ctr = 0; |
994 | | |
995 | | /* Zero cbf tool is disabled for intra CUs */ |
996 | | #if ENABLE_ZERO_CBF_IN_INTRA |
997 | | ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; |
998 | | #else |
999 | 1.71M | ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; |
1000 | 1.71M | #endif |
1001 | | |
1002 | | /* Intra Mode gating based on MPM cand list and encoder quality preset */ |
1003 | 1.71M | if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)) |
1004 | 115k | { |
1005 | 115k | ihevce_mpm_idx_based_filter_RDOPT_cand( |
1006 | 115k | ps_ctxt, |
1007 | 115k | ps_cu_analyse, |
1008 | 115k | ps_left_nbr_4x4, |
1009 | 115k | ps_top_nbr_4x4, |
1010 | 115k | &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0], |
1011 | 115k | &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]); |
1012 | | |
1013 | 115k | ihevce_mpm_idx_based_filter_RDOPT_cand( |
1014 | 115k | ps_ctxt, |
1015 | 115k | ps_cu_analyse, |
1016 | 115k | ps_left_nbr_4x4, |
1017 | 115k | ps_top_nbr_4x4, |
1018 | 115k | &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0], |
1019 | 115k | &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]); |
1020 | 115k | } |
1021 | | |
1022 | | /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */ |
1023 | 1.71M | if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd) |
1024 | 1.28M | { |
1025 | | /* For cu_size = 64, there won't be any TU_EQ_CU case */ |
1026 | 1.28M | if(64 != ps_cu_analyse->u1_cu_size) |
1027 | 1.27M | { |
1028 | | /* RDOPT copy States : Prev Cu best to current init */ |
1029 | 1.27M | COPY_CABAC_STATES( |
1030 | 1.27M | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
1031 | 1.27M | &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], |
1032 | 1.27M | IHEVC_CAB_CTXT_END); |
1033 | | |
1034 | | /* RDOPT related copies and settings */ |
1035 | 1.27M | ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx; |
1036 | | |
1037 | | /* Calc. best SATD mode for TU_EQ_CU case */ |
1038 | 1.27M | ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)( |
1039 | 1.27M | ps_ctxt, |
1040 | 1.27M | &s_chrm_cu_buf_prms, |
1041 | 1.27M | ps_cu_analyse, |
1042 | 1.27M | rd_opt_curr_idx, |
1043 | 1.27M | TU_EQ_CU, |
1044 | 1.27M | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
1045 | 1.27M | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
1046 | 1.15M | (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
1047 | 1.15M | 100.0, |
1048 | 1.27M | ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY); |
1049 | | |
1050 | 1.27M | #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY |
1051 | 1.27M | if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) |
1052 | 0 | { |
1053 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; |
1054 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
1055 | 0 | ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; |
1056 | 0 | } |
1057 | 1.27M | #endif |
1058 | 1.27M | } |
1059 | | |
1060 | | /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and |
1061 | | TU_EQ_CU_DIV2 case */ |
1062 | | |
1063 | 1.28M | if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] != |
1064 | 1.28M | 255) && |
1065 | 191k | (8 != ps_cu_analyse->u1_cu_size)) |
1066 | 191k | { |
1067 | | /* RDOPT copy States : Prev Cu best to current init */ |
1068 | 191k | COPY_CABAC_STATES( |
1069 | 191k | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
1070 | 191k | &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], |
1071 | 191k | IHEVC_CAB_CTXT_END); |
1072 | | |
1073 | | /* RDOPT related copies and settings */ |
1074 | 191k | ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx; |
1075 | | |
1076 | | /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */ |
1077 | 191k | ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)( |
1078 | 191k | ps_ctxt, |
1079 | 191k | &s_chrm_cu_buf_prms, |
1080 | 191k | ps_cu_analyse, |
1081 | 191k | rd_opt_curr_idx, |
1082 | 191k | TU_EQ_CU_DIV2, |
1083 | 191k | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
1084 | 191k | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
1085 | 188k | (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
1086 | 188k | 100.0, |
1087 | 191k | ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY); |
1088 | | |
1089 | 191k | #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY |
1090 | 191k | if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) |
1091 | 0 | { |
1092 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; |
1093 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
1094 | 0 | ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; |
1095 | 0 | } |
1096 | 191k | #endif |
1097 | 191k | } |
1098 | 1.28M | } |
1099 | | |
1100 | 8.48M | while(0 == end_flag) |
1101 | 8.48M | { |
1102 | 8.48M | UWORD8 *pu1_mode = NULL; |
1103 | 8.48M | WORD32 curr_func_mode = 0; |
1104 | 8.48M | void *pv_pred; |
1105 | | |
1106 | 8.48M | ASSERT(ctr < 36); |
1107 | | |
1108 | | /* TU equal to CU size evaluation of different modes */ |
1109 | 8.48M | if(0 == cu_eval_done) |
1110 | 6.47M | { |
1111 | | /* check if the all the modes have been evaluated */ |
1112 | 6.47M | if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr]) |
1113 | 1.66M | { |
1114 | 1.66M | cu_eval_done = 1; |
1115 | 1.66M | ctr = 0; |
1116 | 1.66M | } |
1117 | 4.80M | else if( |
1118 | 4.80M | (1 == ctr) && |
1119 | 1.69M | ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) || |
1120 | 1.63M | (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) && |
1121 | 232k | (ps_ctxt->i1_slice_type != ISLICE)) |
1122 | 47.8k | { |
1123 | 47.8k | ctr = 0; |
1124 | 47.8k | cu_eval_done = 1; |
1125 | 47.8k | subcu_eval_done = 1; |
1126 | 47.8k | subpu_eval_done = 1; |
1127 | 47.8k | } |
1128 | 4.76M | else |
1129 | 4.76M | { |
1130 | 4.76M | if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr]) |
1131 | 55.8k | { |
1132 | 55.8k | ctr++; |
1133 | 55.8k | continue; |
1134 | 55.8k | } |
1135 | | |
1136 | 4.70M | pu1_mode = |
1137 | 4.70M | &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr]; |
1138 | 4.70M | ctr++; |
1139 | 4.70M | curr_func_mode = TU_EQ_CU; |
1140 | 4.70M | } |
1141 | 6.47M | } |
1142 | | /* Sub CU (NXN) mode evaluation of different pred modes */ |
1143 | 8.42M | if((0 == subpu_eval_done) && (1 == cu_eval_done)) |
1144 | 2.12M | { |
1145 | | /*For NxN modes evaluation all candidates for all PU parts are evaluated */ |
1146 | | /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */ |
1147 | 2.12M | { |
1148 | 2.12M | pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr]; |
1149 | | |
1150 | 2.12M | curr_func_mode = TU_EQ_SUBCU; |
1151 | | /* check if the any modes have to be evaluated */ |
1152 | 2.12M | if(255 == *pu1_mode) |
1153 | 1.20M | { |
1154 | 1.20M | subpu_eval_done = 1; |
1155 | 1.20M | ctr = 0; |
1156 | 1.20M | } |
1157 | 921k | else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */ |
1158 | 460k | { |
1159 | 460k | subpu_eval_done = 1; |
1160 | 460k | ctr = 0; |
1161 | 460k | } |
1162 | 460k | else |
1163 | 460k | { |
1164 | 460k | ctr++; |
1165 | 460k | } |
1166 | 2.12M | } |
1167 | 2.12M | } |
1168 | | |
1169 | | /* TU size equal to CU div2 mode evaluation of different pred modes */ |
1170 | 8.42M | if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done)) |
1171 | 3.21M | { |
1172 | | /* check if the all the modes have been evaluated */ |
1173 | 3.21M | if(255 == |
1174 | 3.21M | ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr]) |
1175 | 1.66M | { |
1176 | 1.66M | subcu_eval_done = 1; |
1177 | 1.66M | } |
1178 | 1.54M | else if( |
1179 | 1.54M | (1 == ctr) && |
1180 | 551k | ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) || |
1181 | 507k | (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) && |
1182 | 144k | (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64)) |
1183 | 6 | { |
1184 | 6 | subcu_eval_done = 1; |
1185 | 6 | } |
1186 | 1.54M | else |
1187 | 1.54M | { |
1188 | 1.54M | if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr]) |
1189 | 55.8k | { |
1190 | 55.8k | ctr++; |
1191 | 55.8k | continue; |
1192 | 55.8k | } |
1193 | | |
1194 | 1.48M | pu1_mode = &ps_cu_analyse->s_cu_intra_cand |
1195 | 1.48M | .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr]; |
1196 | | |
1197 | 1.48M | ctr++; |
1198 | 1.48M | curr_func_mode = TU_EQ_CU_DIV2; |
1199 | 1.48M | } |
1200 | 3.21M | } |
1201 | | |
1202 | | /* check if all CU option have been evalueted */ |
1203 | 8.36M | if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done)) |
1204 | 1.71M | { |
1205 | 1.71M | break; |
1206 | 1.71M | } |
1207 | | |
1208 | | /* RDOPT related copies and settings */ |
1209 | 6.65M | ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx; |
1210 | | |
1211 | | /* Assign ME/Intra pred buf. to the current intra cand. since we |
1212 | | are storing pred data for final_reon function */ |
1213 | 6.65M | { |
1214 | 6.65M | pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx]; |
1215 | 6.65M | } |
1216 | | |
1217 | | /* RDOPT copy States : Prev Cu best to current init */ |
1218 | 6.65M | COPY_CABAC_STATES( |
1219 | 6.65M | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
1220 | 6.65M | &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], |
1221 | 6.65M | IHEVC_CAB_CTXT_END); |
1222 | | |
1223 | | /* call the function which performs the normative Intra encode */ |
1224 | 6.65M | rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)( |
1225 | 6.65M | ps_ctxt, |
1226 | 6.65M | ps_cu_prms, |
1227 | 6.65M | pv_pred, |
1228 | 6.65M | ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx], |
1229 | 6.65M | &s_chrm_cu_buf_prms, |
1230 | 6.65M | pu1_mode, |
1231 | 6.65M | ps_cu_analyse, |
1232 | 6.65M | pv_curr_src, |
1233 | 6.65M | pv_cu_left, |
1234 | 6.65M | pv_cu_top, |
1235 | 6.65M | pv_cu_top_left, |
1236 | 6.65M | ps_left_nbr_4x4, |
1237 | 6.65M | ps_top_nbr_4x4, |
1238 | 6.65M | nbr_4x4_left_strd, |
1239 | 6.65M | cu_left_stride, |
1240 | 6.65M | rd_opt_curr_idx, |
1241 | 6.65M | curr_func_mode, |
1242 | 6.65M | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
1243 | 6.65M | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
1244 | 6.24M | (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
1245 | 6.24M | 100.0); |
1246 | | |
1247 | 6.65M | #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY |
1248 | 6.65M | if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) |
1249 | 0 | { |
1250 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; |
1251 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
1252 | 0 | ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; |
1253 | 0 | } |
1254 | 6.65M | #endif |
1255 | | |
1256 | | /* based on the rd opt cost choose the best and current index */ |
1257 | 6.65M | if(rd_opt_cost < rd_opt_least_cost) |
1258 | 1.70M | { |
1259 | | /* swap the best and current indx */ |
1260 | 1.70M | rd_opt_best_idx = !rd_opt_best_idx; |
1261 | 1.70M | rd_opt_curr_idx = !rd_opt_curr_idx; |
1262 | 1.70M | i4_best_cu_qp = ps_ctxt->i4_cu_qp; |
1263 | | |
1264 | 1.70M | rd_opt_least_cost = rd_opt_cost; |
1265 | 1.70M | ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost; |
1266 | 1.70M | } |
1267 | | |
1268 | 6.65M | if((TU_EQ_SUBCU == curr_func_mode) && |
1269 | 460k | (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) && |
1270 | 374k | (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0) |
1271 | 214k | { |
1272 | 214k | UWORD8 au1_tu_eq_cu_div2_modes[4]; |
1273 | 214k | UWORD8 au1_freq_of_mode[4]; |
1274 | | |
1275 | 214k | if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N) |
1276 | 79.7k | { |
1277 | 79.7k | ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = |
1278 | 79.7k | 255; //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0]; |
1279 | 79.7k | ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = |
1280 | 79.7k | 255; |
1281 | 79.7k | } |
1282 | 134k | else |
1283 | 134k | { |
1284 | 134k | WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D( |
1285 | 134k | ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode, |
1286 | 134k | au1_tu_eq_cu_div2_modes, |
1287 | 134k | au1_freq_of_mode, |
1288 | 134k | 4); |
1289 | | |
1290 | 134k | if(2 == i4_num_clusters) |
1291 | 51.9k | { |
1292 | 51.9k | if(au1_freq_of_mode[0] == 3) |
1293 | 22.9k | { |
1294 | 22.9k | ps_cu_analyse->s_cu_intra_cand |
1295 | 22.9k | .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = |
1296 | 22.9k | au1_tu_eq_cu_div2_modes[0]; |
1297 | 22.9k | ps_cu_analyse->s_cu_intra_cand |
1298 | 22.9k | .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255; |
1299 | 22.9k | } |
1300 | 28.9k | else if(au1_freq_of_mode[1] == 3) |
1301 | 4.93k | { |
1302 | 4.93k | ps_cu_analyse->s_cu_intra_cand |
1303 | 4.93k | .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = |
1304 | 4.93k | au1_tu_eq_cu_div2_modes[1]; |
1305 | 4.93k | ps_cu_analyse->s_cu_intra_cand |
1306 | 4.93k | .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255; |
1307 | 4.93k | } |
1308 | 24.0k | else |
1309 | 24.0k | { |
1310 | 24.0k | ps_cu_analyse->s_cu_intra_cand |
1311 | 24.0k | .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = |
1312 | 24.0k | au1_tu_eq_cu_div2_modes[0]; |
1313 | 24.0k | ps_cu_analyse->s_cu_intra_cand |
1314 | 24.0k | .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = |
1315 | 24.0k | au1_tu_eq_cu_div2_modes[1]; |
1316 | 24.0k | ps_cu_analyse->s_cu_intra_cand |
1317 | 24.0k | .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255; |
1318 | 24.0k | } |
1319 | 51.9k | } |
1320 | 134k | } |
1321 | 214k | } |
1322 | | |
1323 | | /* set the neighbour map to 0 */ |
1324 | 6.65M | ihevce_set_nbr_map( |
1325 | 6.65M | ps_ctxt->pu1_ctb_nbr_map, |
1326 | 6.65M | ps_ctxt->i4_nbr_map_strd, |
1327 | 6.65M | (ps_cu_analyse->b3_cu_pos_x << 1), |
1328 | 6.65M | (ps_cu_analyse->b3_cu_pos_y << 1), |
1329 | 6.65M | (ps_cu_analyse->u1_cu_size >> 2), |
1330 | 6.65M | 0); |
1331 | 6.65M | } |
1332 | | |
1333 | 1.71M | } /* end of Intra RD OPT cand evaluation */ |
1334 | | |
1335 | 1.99M | ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1)); |
1336 | 1.99M | ps_ctxt->i4_cu_qp = i4_best_cu_qp; |
1337 | 1.99M | ps_cu_analyse->i1_cu_qp = i4_best_cu_qp; |
1338 | | |
1339 | | /* --------------------------------------- */ |
1340 | | /* --------Final mode Recon ---------- */ |
1341 | | /* --------------------------------------- */ |
1342 | 1.99M | { |
1343 | 1.99M | enc_loop_cu_final_prms_t *ps_enc_loop_bestprms; |
1344 | 1.99M | void *pv_final_pred = NULL; |
1345 | 1.99M | WORD32 final_pred_strd = 0; |
1346 | 1.99M | void *pv_final_pred_chrm = NULL; |
1347 | 1.99M | WORD32 final_pred_strd_chrm = 0; |
1348 | 1.99M | WORD32 packed_pred_mode; |
1349 | | |
1350 | 1.99M | #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
1351 | 1.99M | if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) |
1352 | 1.18M | { |
1353 | 1.18M | pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0]; |
1354 | 1.18M | } |
1355 | | #else |
1356 | | pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0]; |
1357 | | #endif |
1358 | | |
1359 | 1.99M | ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; |
1360 | 1.99M | packed_pred_mode = |
1361 | 1.99M | ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2; |
1362 | | |
1363 | 1.99M | if(!ps_ctxt->u1_is_input_data_hbd) |
1364 | 1.99M | { |
1365 | 1.99M | if(ps_enc_loop_bestprms->u1_intra_flag) |
1366 | 995k | { |
1367 | 995k | pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx]; |
1368 | 995k | final_pred_strd = |
1369 | 995k | ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx]; |
1370 | 995k | } |
1371 | 999k | else |
1372 | 999k | { |
1373 | 999k | pv_final_pred = ps_best_inter_cand->pu1_pred_data; |
1374 | 999k | final_pred_strd = ps_best_inter_cand->i4_pred_data_stride; |
1375 | 999k | } |
1376 | | |
1377 | 1.99M | pv_final_pred_chrm = |
1378 | 1.99M | ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] + |
1379 | 1.99M | rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + |
1380 | 1.99M | (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1))); |
1381 | 1.99M | final_pred_strd_chrm = |
1382 | 1.99M | ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX]; |
1383 | 1.99M | } |
1384 | | |
1385 | 1.99M | ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms); |
1386 | | |
1387 | 1.99M | { |
1388 | 1.99M | final_mode_process_prms_t s_prms; |
1389 | | |
1390 | 1.99M | void *pv_cu_luma_recon; |
1391 | 1.99M | void *pv_cu_chroma_recon; |
1392 | 1.99M | WORD32 luma_stride, chroma_stride; |
1393 | | |
1394 | 1.99M | if(!ps_ctxt->u1_is_input_data_hbd) |
1395 | 1.99M | { |
1396 | 1.99M | #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
1397 | 1.99M | if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) |
1398 | 1.18M | { |
1399 | 1.18M | pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon; |
1400 | 1.18M | pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon; |
1401 | 1.18M | luma_stride = ps_cu_analyse->u1_cu_size; |
1402 | 1.18M | chroma_stride = ps_cu_analyse->u1_cu_size; |
1403 | 1.18M | } |
1404 | 814k | else |
1405 | 814k | { |
1406 | | /* based on CU position derive the luma pointers */ |
1407 | 814k | pv_cu_luma_recon = pu1_final_recon; |
1408 | | |
1409 | | /* based on CU position derive the chroma pointers */ |
1410 | 814k | pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon; |
1411 | | |
1412 | 814k | luma_stride = ps_cu_prms->i4_luma_recon_stride; |
1413 | | |
1414 | 814k | chroma_stride = ps_cu_prms->i4_chrm_recon_stride; |
1415 | 814k | } |
1416 | | #else |
1417 | | pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon; |
1418 | | pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon; |
1419 | | luma_stride = ps_cu_analyse->u1_cu_size; |
1420 | | chroma_stride = ps_cu_analyse->u1_cu_size; |
1421 | | #endif |
1422 | | |
1423 | 1.99M | s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms; |
1424 | 1.99M | s_prms.ps_best_inter_cand = ps_best_inter_cand; |
1425 | 1.99M | s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms; |
1426 | 1.99M | s_prms.packed_pred_mode = packed_pred_mode; |
1427 | 1.99M | s_prms.rd_opt_best_idx = rd_opt_best_idx; |
1428 | 1.99M | s_prms.pv_src = pu1_curr_src; |
1429 | 1.99M | s_prms.src_strd = ps_cu_prms->i4_luma_src_stride; |
1430 | 1.99M | s_prms.pv_pred = pv_final_pred; |
1431 | 1.99M | s_prms.pred_strd = final_pred_strd; |
1432 | 1.99M | s_prms.pv_pred_chrm = pv_final_pred_chrm; |
1433 | 1.99M | s_prms.pred_chrm_strd = final_pred_strd_chrm; |
1434 | 1.99M | s_prms.pu1_final_ecd_data = pu1_ecd_data; |
1435 | 1.99M | s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; |
1436 | 1.99M | s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd; |
1437 | 1.99M | s_prms.pv_luma_recon = pv_cu_luma_recon; |
1438 | 1.99M | s_prms.recon_luma_strd = luma_stride; |
1439 | 1.99M | s_prms.pv_chrm_recon = pv_cu_chroma_recon; |
1440 | 1.99M | s_prms.recon_chrma_strd = chroma_stride; |
1441 | 1.99M | s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x; |
1442 | 1.99M | s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y; |
1443 | 1.99M | s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size; |
1444 | 1.99M | s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp; |
1445 | 1.99M | s_prms.u1_will_cabac_state_change = 1; |
1446 | 1.99M | s_prms.u1_recompute_sbh_and_rdoq = 0; |
1447 | 1.99M | s_prms.u1_is_first_pass = 1; |
1448 | 1.99M | } |
1449 | | |
1450 | 1.99M | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
1451 | 1.99M | s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag |
1452 | 1.99M | ? ps_cu_prms->u1_is_cu_noisy |
1453 | 1.99M | : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY; |
1454 | 1.99M | #endif |
1455 | | |
1456 | 1.99M | ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms); |
1457 | | |
1458 | 1.99M | #if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY |
1459 | 1.99M | if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy) |
1460 | 0 | { |
1461 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf; |
1462 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
1463 | 0 | ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf; |
1464 | 0 | } |
1465 | 1.99M | #endif |
1466 | 1.99M | } |
1467 | 1.99M | } |
1468 | | |
1469 | | /* --------------------------------------- */ |
1470 | | /* --------Populate CU out prms ---------- */ |
1471 | | /* --------------------------------------- */ |
1472 | 1.99M | { |
1473 | 1.99M | enc_loop_cu_final_prms_t *ps_enc_loop_bestprms; |
1474 | 1.99M | UWORD8 *pu1_pu_map; |
1475 | 1.99M | ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; |
1476 | | |
1477 | | /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */ |
1478 | | /* then it has to be coded as skip CU */ |
1479 | 1.99M | if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) && |
1480 | 1.76M | (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) && |
1481 | 841k | (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded)) |
1482 | 535k | { |
1483 | 535k | ps_enc_loop_bestprms->u1_skip_flag = 1; |
1484 | 535k | } |
1485 | | |
1486 | | /* update number PUs in CU */ |
1487 | 1.99M | ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu; |
1488 | | |
1489 | | /* ---- populate the colocated pu map index --- */ |
1490 | 4.04M | for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++) |
1491 | 2.04M | { |
1492 | 2.04M | WORD32 i; |
1493 | 2.04M | WORD32 vert_ht; |
1494 | 2.04M | WORD32 horz_wd; |
1495 | | |
1496 | 2.04M | if(ps_enc_loop_bestprms->u1_intra_flag) |
1497 | 995k | { |
1498 | 995k | ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1; |
1499 | 995k | vert_ht = ps_cu_analyse->u1_cu_size >> 2; |
1500 | 995k | horz_wd = ps_cu_analyse->u1_cu_size >> 2; |
1501 | 995k | } |
1502 | 1.04M | else |
1503 | 1.04M | { |
1504 | 1.04M | vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2); |
1505 | 1.04M | horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2); |
1506 | 1.04M | } |
1507 | | |
1508 | 2.04M | pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x; |
1509 | 2.04M | pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb); |
1510 | | |
1511 | 10.0M | for(i = 0; i < vert_ht; i++) |
1512 | 7.99M | { |
1513 | 7.99M | memset(pu1_pu_map, col_start_pu_idx, horz_wd); |
1514 | 7.99M | pu1_pu_map += num_4x4_in_ctb; |
1515 | 7.99M | } |
1516 | | /* increment the index */ |
1517 | 2.04M | col_start_pu_idx++; |
1518 | 2.04M | } |
1519 | | /* ---- copy the colocated PUs to frm pu ----- */ |
1520 | 1.99M | memcpy( |
1521 | 1.99M | ps_col_pu, |
1522 | 1.99M | &ps_enc_loop_bestprms->as_col_pu_enc_loop[0], |
1523 | 1.99M | ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t)); |
1524 | | |
1525 | | /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/ |
1526 | 1.99M | { |
1527 | 1.99M | entropy_context_t *ps_entropy_ctxt; |
1528 | | |
1529 | 1.99M | WORD32 diff_cu_qp_delta_depth, log2_ctb_size; |
1530 | | |
1531 | 1.99M | WORD32 log2_min_cu_qp_delta_size; |
1532 | 1.99M | UWORD32 block_addr_align; |
1533 | 1.99M | ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt; |
1534 | | |
1535 | 1.99M | log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size; |
1536 | 1.99M | diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth; |
1537 | | |
1538 | 1.99M | log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth; |
1539 | 1.99M | block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3); |
1540 | | |
1541 | 1.99M | ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align; |
1542 | 1.99M | ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align; |
1543 | | /*Update the Qp value used. It will not have a valid value iff |
1544 | | current CU is (skipped/no_cbf). In that case the Qp needed for |
1545 | | deblocking is calculated from top/left/previous coded CU*/ |
1546 | | |
1547 | 1.99M | ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp; |
1548 | | |
1549 | 1.99M | if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x && |
1550 | 1.99M | ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y) |
1551 | 1.99M | { |
1552 | 1.99M | ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1; |
1553 | 1.99M | } |
1554 | 0 | else |
1555 | 0 | { |
1556 | 0 | ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0; |
1557 | 0 | } |
1558 | 1.99M | } |
1559 | | |
1560 | | /* -- at the end of CU set the neighbour map to 1 -- */ |
1561 | 1.99M | ihevce_set_nbr_map( |
1562 | 1.99M | ps_ctxt->pu1_ctb_nbr_map, |
1563 | 1.99M | ps_ctxt->i4_nbr_map_strd, |
1564 | 1.99M | (ps_cu_analyse->b3_cu_pos_x << 1), |
1565 | 1.99M | (ps_cu_analyse->b3_cu_pos_y << 1), |
1566 | 1.99M | (ps_cu_analyse->u1_cu_size >> 2), |
1567 | 1.99M | 1); |
1568 | | |
1569 | | /* -- at the end of CU update best cabac rdopt states -- */ |
1570 | | /* -- and also set the top row skip flags ------------- */ |
1571 | 1.99M | ihevce_entropy_update_best_cu_states( |
1572 | 1.99M | &ps_ctxt->s_rdopt_entropy_ctxt, |
1573 | 1.99M | ps_cu_analyse->b3_cu_pos_x, |
1574 | 1.99M | ps_cu_analyse->b3_cu_pos_y, |
1575 | 1.99M | ps_cu_analyse->u1_cu_size, |
1576 | 1.99M | 0, |
1577 | 1.99M | rd_opt_best_idx); |
1578 | 1.99M | } |
1579 | | |
1580 | | /* Store Output struct */ |
1581 | | #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
1582 | | { |
1583 | | { |
1584 | | memcpy( |
1585 | | &ps_ctxt->ps_enc_out_ctxt->s_cu_prms, |
1586 | | &ps_ctxt->as_cu_prms[rd_opt_best_idx], |
1587 | | sizeof(enc_loop_cu_final_prms_t)); |
1588 | | } |
1589 | | |
1590 | | memcpy( |
1591 | | &ps_ctxt->as_cu_recur_nbr[0], |
1592 | | &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0], |
1593 | | sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) * |
1594 | | (ps_cu_analyse->u1_cu_size >> 2)); |
1595 | | |
1596 | | ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms; |
1597 | | |
1598 | | ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0]; |
1599 | | } |
1600 | | #else |
1601 | 1.99M | if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2) |
1602 | 814k | { |
1603 | 814k | ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; |
1604 | | |
1605 | 814k | ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0]; |
1606 | | |
1607 | 814k | if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
1608 | 7.16k | { |
1609 | | /* Wait till top data is ready */ |
1610 | | /* Currently checking till top right CU */ |
1611 | 7.16k | curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3); |
1612 | | |
1613 | 7.16k | if(i4_ctb_y_off == 0) |
1614 | 7.05k | { |
1615 | | /* No wait for 1st row */ |
1616 | 7.05k | cu_top_right_offset = -(MAX_CTB_SIZE); |
1617 | 7.05k | { |
1618 | 7.05k | ihevce_tile_params_t *ps_col_tile_params = |
1619 | 7.05k | ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + |
1620 | 7.05k | ps_ctxt->i4_tile_col_idx); |
1621 | | |
1622 | | /* No wait for 1st row */ |
1623 | 7.05k | cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); |
1624 | 7.05k | } |
1625 | 7.05k | cu_top_right_dep_pos = 0; |
1626 | 7.05k | } |
1627 | 109 | else |
1628 | 109 | { |
1629 | 109 | cu_top_right_offset = (ps_cu_analyse->u1_cu_size); |
1630 | 109 | cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1; |
1631 | 109 | } |
1632 | | |
1633 | 7.16k | if(0 == ps_cu_analyse->b3_cu_pos_y) |
1634 | 3.17k | { |
1635 | 3.17k | ihevce_dmgr_chk_row_row_sync( |
1636 | 3.17k | ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, |
1637 | 3.17k | curr_cu_pos_in_row, |
1638 | 3.17k | cu_top_right_offset, |
1639 | 3.17k | cu_top_right_dep_pos, |
1640 | 3.17k | ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ |
1641 | 3.17k | ps_ctxt->thrd_id); |
1642 | 3.17k | } |
1643 | 7.16k | } |
1644 | 814k | } |
1645 | 1.18M | else |
1646 | 1.18M | { |
1647 | 1.18M | { |
1648 | 1.18M | memcpy( |
1649 | 1.18M | &ps_ctxt->ps_enc_out_ctxt->s_cu_prms, |
1650 | 1.18M | &ps_ctxt->as_cu_prms[rd_opt_best_idx], |
1651 | 1.18M | sizeof(enc_loop_cu_final_prms_t)); |
1652 | 1.18M | } |
1653 | | |
1654 | 1.18M | memcpy( |
1655 | 1.18M | &ps_ctxt->as_cu_recur_nbr[0], |
1656 | 1.18M | &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0], |
1657 | 1.18M | sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) * |
1658 | 1.18M | (ps_cu_analyse->u1_cu_size >> 2)); |
1659 | | |
1660 | 1.18M | ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms; |
1661 | | |
1662 | 1.18M | ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0]; |
1663 | 1.18M | } |
1664 | 1.99M | #endif |
1665 | | |
1666 | 1.99M | ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &= |
1667 | 1.99M | ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1); |
1668 | | |
1669 | 1.99M | return rd_opt_least_cost; |
1670 | 1.99M | } |
1671 | | |
1672 | | /*! |
1673 | | ****************************************************************************** |
1674 | | * \if Function name : ihevce_enc_loop_process_row \endif |
1675 | | * |
1676 | | * \brief |
1677 | | * Row level enc_loop pass function |
1678 | | * |
1679 | | * \param[in] pv_ctxt : pointer to enc_loop module |
1680 | | * \param[in] ps_curr_src_bufs : pointer to input yuv buffer (row buffer) |
1681 | | * \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer) |
1682 | | * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer) |
1683 | | * \param[out] ps_ctb_out : pointer CTB output structure (row buffer) |
1684 | | * \param[out] ps_cu_out : pointer CU output structure (row buffer) |
1685 | | * \param[out] ps_tu_out : pointer TU output structure (row buffer) |
1686 | | * \param[out] pi2_frm_coeffs : pointer coeff output (row buffer) |
1687 | | * \param[in] i4_poc : current poc. Needed to send recon in dist-client mode |
1688 | | * |
1689 | | * \return |
1690 | | * None |
1691 | | * |
1692 | | * Note : Currently the frame level calcualtions done assumes that |
1693 | | * framewidth of the input /recon are excat multiple of ctbsize |
1694 | | * |
1695 | | * \author |
1696 | | * Ittiam |
1697 | | * |
1698 | | ***************************************************************************** |
1699 | | */ |
1700 | | void ihevce_enc_loop_process_row( |
1701 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
1702 | | iv_enc_yuv_buf_t *ps_curr_src_bufs, |
1703 | | iv_enc_yuv_buf_t *ps_curr_recon_bufs, |
1704 | | iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src, |
1705 | | UWORD8 **ppu1_y_subpel_planes, |
1706 | | ctb_analyse_t *ps_ctb_in, |
1707 | | ctb_enc_loop_out_t *ps_ctb_out, |
1708 | | ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse, |
1709 | | cur_ctb_cu_tree_t *ps_row_cu_tree, |
1710 | | cu_enc_loop_out_t *ps_row_cu, |
1711 | | tu_enc_loop_out_t *ps_row_tu, |
1712 | | pu_t *ps_row_pu, |
1713 | | pu_col_mv_t *ps_row_col_pu, |
1714 | | UWORD16 *pu2_num_pu_map, |
1715 | | UWORD8 *pu1_row_pu_map, |
1716 | | UWORD8 *pu1_row_ecd_data, |
1717 | | UWORD32 *pu4_pu_offsets, |
1718 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
1719 | | WORD32 vert_ctr, |
1720 | | recon_pic_buf_t *ps_frm_recon, |
1721 | | void *pv_dep_mngr_encloop_dep_me, |
1722 | | pad_interp_recon_frm_t *ps_pad_interp_recon, |
1723 | | WORD32 i4_pass, |
1724 | | multi_thrd_ctxt_t *ps_multi_thrd_ctxt, |
1725 | | ihevce_tile_params_t *ps_tile_params) |
1726 | 104k | { |
1727 | 104k | enc_loop_cu_prms_t s_cu_prms; |
1728 | 104k | ctb_enc_loop_out_t *ps_ctb_out_dblk; |
1729 | | |
1730 | 104k | WORD32 ctb_ctr, ctb_start, ctb_end; |
1731 | 104k | WORD32 col_pu_map_idx; |
1732 | 104k | WORD32 num_ctbs_horz_pic; |
1733 | 104k | WORD32 ctb_size; |
1734 | 104k | WORD32 last_ctb_row_flag; |
1735 | 104k | WORD32 last_ctb_col_flag; |
1736 | 104k | WORD32 last_hz_ctb_wd; |
1737 | 104k | WORD32 last_vt_ctb_ht; |
1738 | 104k | void *pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk; |
1739 | 104k | void *pv_dep_mngr_enc_loop_sao = ps_ctxt->pv_dep_mngr_enc_loop_sao; |
1740 | 104k | void *pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right; |
1741 | 104k | WORD32 dblk_offset, dblk_check_dep_pos; |
1742 | 104k | WORD32 sao_offset, sao_check_dep_pos; |
1743 | 104k | WORD32 aux_offset, aux_check_dep_pos; |
1744 | 104k | void *pv_dep_mngr_me_dep_encloop; |
1745 | 104k | ctb_enc_loop_out_t *ps_ctb_out_sao; |
1746 | | /*Structure to store deblocking parameters at CTB-row level*/ |
1747 | 104k | deblk_ctbrow_prms_t s_deblk_ctb_row_params; |
1748 | 104k | UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2); |
1749 | | |
1750 | 104k | pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon; |
1751 | 104k | num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz; |
1752 | 104k | ctb_size = ps_frm_ctb_prms->i4_ctb_size; |
1753 | | |
1754 | | /* Store the num_ctb_horz in sao context*/ |
1755 | 104k | ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz; |
1756 | 104k | ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert; |
1757 | | |
1758 | | /* Set Variables for Dep. Checking and Setting */ |
1759 | 104k | aux_check_dep_pos = vert_ctr; |
1760 | 104k | aux_offset = 2; /* Should be there for 0th row also */ |
1761 | 104k | if(vert_ctr > 0) |
1762 | 9.64k | { |
1763 | 9.64k | dblk_check_dep_pos = vert_ctr - 1; |
1764 | 9.64k | dblk_offset = 2; |
1765 | 9.64k | } |
1766 | 94.8k | else |
1767 | 94.8k | { |
1768 | | /* First row should run without waiting */ |
1769 | 94.8k | dblk_check_dep_pos = 0; |
1770 | 94.8k | dblk_offset = -(ps_tile_params->i4_first_sample_x + 1); |
1771 | 94.8k | } |
1772 | | |
1773 | | /* Set sao_offset and sao_check_dep_pos */ |
1774 | 104k | if(vert_ctr > 1) |
1775 | 4.95k | { |
1776 | 4.95k | sao_check_dep_pos = vert_ctr - 2; |
1777 | 4.95k | sao_offset = 2; |
1778 | 4.95k | } |
1779 | 99.5k | else |
1780 | 99.5k | { |
1781 | | /* First row should run without waiting */ |
1782 | 99.5k | sao_check_dep_pos = 0; |
1783 | 99.5k | sao_offset = -(ps_tile_params->i4_first_sample_x + 1); |
1784 | 99.5k | } |
1785 | | |
1786 | | /* check if the current row processed in last CTb row */ |
1787 | 104k | last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)); |
1788 | | |
1789 | | /* Valid Width (pixels) in the last CTB in every row (padding cases) */ |
1790 | 104k | last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size); |
1791 | | |
1792 | | /* Valid Height (pixels) in the last CTB row (padding cases) */ |
1793 | 104k | last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht - |
1794 | 104k | ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size); |
1795 | | /* reset the states copied flag */ |
1796 | 104k | ps_ctxt->u1_cabac_states_next_row_copied_flag = 0; |
1797 | 104k | ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0; |
1798 | | |
1799 | | /* populate the cu prms which are common for entire ctb row */ |
1800 | 104k | s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd; |
1801 | 104k | s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd; |
1802 | 104k | s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd; |
1803 | 104k | s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd; |
1804 | 104k | s_cu_prms.i4_ctb_size = ctb_size; |
1805 | | |
1806 | 104k | ps_ctxt->i4_is_first_cu_qg_coded = 0; |
1807 | | |
1808 | | /* Initialize the number of PUs for the first CTB to 0 */ |
1809 | 104k | *pu2_num_pu_map = 0; |
1810 | | |
1811 | | /*Getting the address of BS and Qp arrays and other info*/ |
1812 | 104k | memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t)); |
1813 | 104k | { |
1814 | 104k | WORD32 num_ctbs_horz_tile; |
1815 | | /* Update the pointers which are accessed not by using ctb_ctr |
1816 | | to the tile start here! */ |
1817 | 104k | ps_ctb_in += ps_tile_params->i4_first_ctb_x; |
1818 | 104k | ps_ctb_out += ps_tile_params->i4_first_ctb_x; |
1819 | | |
1820 | 104k | ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb); |
1821 | 104k | ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb); |
1822 | 104k | ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb); |
1823 | 104k | pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb); |
1824 | 104k | pu1_row_ecd_data += |
1825 | 104k | (ps_tile_params->i4_first_ctb_x * |
1826 | 104k | ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1) |
1827 | 104k | : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) * |
1828 | 104k | MAX_SCAN_COEFFS_BYTES_4x4); |
1829 | | |
1830 | | /* Update the pointers to the tile start */ |
1831 | 104k | s_deblk_ctb_row_params.pu4_ctb_row_bs_vert += |
1832 | 104k | (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block |
1833 | 104k | s_deblk_ctb_row_params.pu4_ctb_row_bs_horz += |
1834 | 104k | (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block |
1835 | 104k | s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2)); |
1836 | | |
1837 | 104k | num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit; |
1838 | | |
1839 | 104k | ctb_start = ps_tile_params->i4_first_ctb_x; |
1840 | 104k | ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile; |
1841 | 104k | } |
1842 | 104k | ps_ctb_out_dblk = ps_ctb_out; |
1843 | | |
1844 | 104k | ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp; |
1845 | | |
1846 | | /* --------- Loop over all the CTBs in a row --------------- */ |
1847 | 235k | for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) |
1848 | 131k | { |
1849 | 131k | cu_final_update_prms s_cu_update_prms; |
1850 | | |
1851 | 131k | cur_ctb_cu_tree_t *ps_cu_tree_analyse; |
1852 | 131k | me_ctb_data_t *ps_cu_me_data; |
1853 | 131k | ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse; |
1854 | 131k | cu_enc_loop_out_t *ps_cu_final; |
1855 | 131k | pu_col_mv_t *ps_ctb_col_pu; |
1856 | | |
1857 | 131k | WORD32 cur_ctb_ht, cur_ctb_wd; |
1858 | 131k | WORD32 last_cu_pos_in_ctb; |
1859 | 131k | WORD32 last_cu_size; |
1860 | 131k | WORD32 num_pus_in_ctb; |
1861 | 131k | UWORD8 u1_is_ctb_noisy; |
1862 | 131k | ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb; |
1863 | | |
1864 | 131k | if(ctb_ctr) |
1865 | 26.5k | { |
1866 | 26.5k | ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb; |
1867 | 26.5k | } |
1868 | | /*If Sup pic rc is enabled*/ |
1869 | 131k | if(ps_ctxt->i4_sub_pic_level_rc) |
1870 | 0 | { |
1871 | 0 | ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt); |
1872 | 0 | } |
1873 | | /* check if the current row processed in last CTb row */ |
1874 | 131k | last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1)); |
1875 | 131k | if(1 == last_ctb_col_flag) |
1876 | 104k | { |
1877 | 104k | cur_ctb_wd = last_hz_ctb_wd; |
1878 | 104k | } |
1879 | 26.5k | else |
1880 | 26.5k | { |
1881 | 26.5k | cur_ctb_wd = ctb_size; |
1882 | 26.5k | } |
1883 | | |
1884 | | /* If it's the last CTB, get the actual ht of CTB */ |
1885 | 131k | if(1 == last_ctb_row_flag) |
1886 | 108k | { |
1887 | 108k | cur_ctb_ht = last_vt_ctb_ht; |
1888 | 108k | } |
1889 | 23.0k | else |
1890 | 23.0k | { |
1891 | 23.0k | cur_ctb_ht = ctb_size; |
1892 | 23.0k | } |
1893 | | |
1894 | 131k | ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht; |
1895 | 131k | ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd; |
1896 | | |
1897 | | /* Wait till reference frame recon is available */ |
1898 | | |
1899 | | /* ------------ Wait till current data is ready from ME -------------- */ |
1900 | | |
1901 | | /*only for ref instance and Non I pics */ |
1902 | 131k | if((ps_ctxt->i4_bitrate_instance_num == 0) && |
1903 | 131k | ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE)) |
1904 | 80.4k | { |
1905 | 80.4k | if(ctb_ctr < (num_ctbs_horz_pic)) |
1906 | 80.4k | { |
1907 | 80.4k | ihevce_dmgr_chk_row_row_sync( |
1908 | 80.4k | pv_dep_mngr_encloop_dep_me, |
1909 | 80.4k | ctb_ctr, |
1910 | 80.4k | 1, |
1911 | 80.4k | vert_ctr, |
1912 | 80.4k | ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ |
1913 | 80.4k | ps_ctxt->thrd_id); |
1914 | 80.4k | } |
1915 | 80.4k | } |
1916 | | |
1917 | | /* store the cu pointer for current ctb out */ |
1918 | 131k | ps_ctb_out->ps_enc_cu = ps_row_cu; |
1919 | 131k | ps_cu_final = ps_row_cu; |
1920 | | |
1921 | | /* Get the base point of CU recursion tree */ |
1922 | 131k | if(ISLICE != ps_ctxt->i1_slice_type) |
1923 | 80.4k | { |
1924 | 80.4k | ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree; |
1925 | 80.4k | ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE))); |
1926 | 80.4k | } |
1927 | 50.6k | else |
1928 | 50.6k | { |
1929 | | /* Initialize ptr to current CTB */ |
1930 | 50.6k | ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE); |
1931 | 50.6k | } |
1932 | | |
1933 | | /* Get the ME data pointer for 16x16 block data in ctb */ |
1934 | 131k | ps_cu_me_data = ps_ctb_in->ps_me_ctb_data; |
1935 | 131k | u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present; |
1936 | 131k | s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy; |
1937 | 131k | s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy; |
1938 | | |
1939 | | /* store the ctb level prms in cu prms */ |
1940 | 131k | s_cu_prms.i4_ctb_pos = ctb_ctr; |
1941 | | |
1942 | 131k | s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size; |
1943 | 131k | s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size; |
1944 | | |
1945 | 131k | { |
1946 | 131k | s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size; |
1947 | 131k | s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size; |
1948 | 131k | } |
1949 | | |
1950 | 131k | s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size; |
1951 | | |
1952 | 131k | s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size; |
1953 | | |
1954 | 131k | s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size; |
1955 | | |
1956 | | /* Initialize ptr to current CTB */ |
1957 | 131k | ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr; // * ctb_size; |
1958 | | |
1959 | | /* reset the map idx for current ctb */ |
1960 | 131k | col_pu_map_idx = 0; |
1961 | 131k | num_pus_in_ctb = 0; |
1962 | | |
1963 | | /* reset the map buffer to 0*/ |
1964 | | |
1965 | 131k | memset( |
1966 | 131k | &ps_ctxt->au1_nbr_ctb_map[0][0], |
1967 | 131k | 0, |
1968 | 131k | (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8)); |
1969 | | |
1970 | | /* set the CTB neighbour availability flags */ |
1971 | 131k | ihevce_set_ctb_nbr( |
1972 | 131k | &ps_ctb_out->s_ctb_nbr_avail_flags, |
1973 | 131k | ps_ctxt->pu1_ctb_nbr_map, |
1974 | 131k | ps_ctxt->i4_nbr_map_strd, |
1975 | 131k | ctb_ctr, |
1976 | 131k | vert_ctr, |
1977 | 131k | ps_frm_ctb_prms); |
1978 | | |
1979 | | /* -------- update the cur CTB offsets for inter prediction-------- */ |
1980 | 131k | ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size; |
1981 | 131k | ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size; |
1982 | | |
1983 | | /* -------- update the cur CTB offsets for MV prediction-------- */ |
1984 | 131k | ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr; |
1985 | 131k | ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr; |
1986 | | |
1987 | | /* -------------- Boundary Strength Initialization ----------- */ |
1988 | 131k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
1989 | 127k | { |
1990 | 127k | ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr); |
1991 | 127k | } |
1992 | | |
1993 | | /* -------- update cur CTB offsets for entropy rdopt context------- */ |
1994 | 131k | ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr); |
1995 | | |
1996 | | /* --------- CU Recursion --------------- */ |
1997 | | |
1998 | 131k | { |
1999 | | #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
2000 | | WORD32 i4_max_tree_depth = 4; |
2001 | | #endif |
2002 | 131k | WORD32 i4_tree_depth = 0; |
2003 | | /* Init no. of CU in CTB to 0*/ |
2004 | 131k | ps_ctb_out->u1_num_cus_in_ctb = 0; |
2005 | | |
2006 | | #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
2007 | | if(ps_ctxt->i4_bitrate_instance_num == 0) |
2008 | | { |
2009 | | WORD32 i4_max_tree_depth = 4; |
2010 | | WORD32 i; |
2011 | | for(i = 0; i < i4_max_tree_depth; i++) |
2012 | | { |
2013 | | COPY_CABAC_STATES( |
2014 | | &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0], |
2015 | | &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], |
2016 | | IHEVC_CAB_CTXT_END * sizeof(UWORD8)); |
2017 | | } |
2018 | | } |
2019 | | #else |
2020 | 131k | if(ps_ctxt->i4_bitrate_instance_num == 0) |
2021 | 131k | { |
2022 | 131k | if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) |
2023 | 38.4k | { |
2024 | 38.4k | WORD32 i4_max_tree_depth = 4; |
2025 | 38.4k | WORD32 i; |
2026 | 192k | for(i = 0; i < i4_max_tree_depth; i++) |
2027 | 153k | { |
2028 | 153k | COPY_CABAC_STATES( |
2029 | 153k | &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0], |
2030 | 153k | &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], |
2031 | 153k | IHEVC_CAB_CTXT_END * sizeof(UWORD8)); |
2032 | 153k | } |
2033 | 38.4k | } |
2034 | 131k | } |
2035 | | |
2036 | 131k | #endif |
2037 | 131k | if(ps_ctxt->i4_bitrate_instance_num == 0) |
2038 | 131k | { |
2039 | | /* FOR I- PIC populate the curr_ctb accordingly */ |
2040 | 131k | if(ISLICE == ps_ctxt->i1_slice_type) |
2041 | 50.6k | { |
2042 | 50.6k | ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse; |
2043 | 50.6k | ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1; |
2044 | | |
2045 | 50.6k | ihevce_populate_cu_tree( |
2046 | 50.6k | ps_ctb_ipe_analyse, |
2047 | 50.6k | ps_cu_tree_analyse, |
2048 | 50.6k | 0, |
2049 | 50.6k | (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset, |
2050 | 50.6k | POS_NA, |
2051 | 50.6k | POS_NA, |
2052 | 50.6k | POS_NA); |
2053 | 50.6k | } |
2054 | 131k | } |
2055 | 131k | ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1; |
2056 | 131k | ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0]; |
2057 | 131k | ps_ctxt->pu1_ecd_data = pu1_row_ecd_data; |
2058 | | |
2059 | 131k | s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data; |
2060 | 131k | s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb; |
2061 | 131k | s_cu_update_prms.pi4_last_cu_size = &last_cu_size; |
2062 | 131k | s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb; |
2063 | 131k | s_cu_update_prms.pps_cu_final = &ps_cu_final; |
2064 | 131k | s_cu_update_prms.pps_row_pu = &ps_row_pu; |
2065 | 131k | s_cu_update_prms.pps_row_tu = &ps_row_tu; |
2066 | 131k | s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb; |
2067 | | |
2068 | | // source satd computation |
2069 | | /* compute the source 8x8 SATD for the current CTB */ |
2070 | | /* populate pui4_source_satd in some structure and pass it inside */ |
2071 | 131k | if(ps_ctxt->u1_enable_psyRDOPT) |
2072 | 0 | { |
2073 | | /* declare local variables */ |
2074 | 0 | WORD32 i; |
2075 | 0 | WORD32 ctb_size; |
2076 | 0 | WORD32 num_comp_had_blocks; |
2077 | 0 | UWORD8 *pu1_l0_block; |
2078 | 0 | WORD32 block_ht; |
2079 | 0 | WORD32 block_wd; |
2080 | 0 | WORD32 ht_offset; |
2081 | 0 | WORD32 wd_offset; |
2082 | |
|
2083 | 0 | WORD32 num_horz_blocks; |
2084 | 0 | WORD32 had_block_size; |
2085 | 0 | WORD32 total_had_block_size; |
2086 | 0 | WORD16 pi2_residue_had_zscan[64]; |
2087 | 0 | UWORD8 ai1_zeros_buffer[64]; |
2088 | |
|
2089 | 0 | WORD32 index_satd; |
2090 | 0 | WORD32 is_hbd; |
2091 | | /* initialize the variables */ |
2092 | 0 | block_ht = cur_ctb_ht; |
2093 | 0 | block_wd = cur_ctb_wd; |
2094 | |
|
2095 | 0 | is_hbd = ps_ctxt->u1_is_input_data_hbd; |
2096 | |
|
2097 | 0 | had_block_size = 8; |
2098 | 0 | total_had_block_size = had_block_size * had_block_size; |
2099 | |
|
2100 | 0 | for(i = 0; i < total_had_block_size; i++) |
2101 | 0 | { |
2102 | 0 | ai1_zeros_buffer[i] = 0; |
2103 | 0 | } |
2104 | |
|
2105 | 0 | ctb_size = block_ht * block_wd; //ctb_width * ctb_height; |
2106 | 0 | num_comp_had_blocks = ctb_size / (had_block_size * had_block_size); |
2107 | |
|
2108 | 0 | num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size; |
2109 | 0 | ht_offset = -had_block_size; |
2110 | 0 | wd_offset = -had_block_size; |
2111 | |
|
2112 | 0 | index_satd = 0; |
2113 | | /*Loop over all 8x8 blocsk in the CTB*/ |
2114 | 0 | for(i = 0; i < num_comp_had_blocks; i++) |
2115 | 0 | { |
2116 | 0 | if(i % num_horz_blocks == 0) |
2117 | 0 | { |
2118 | 0 | wd_offset = -had_block_size; |
2119 | 0 | ht_offset += had_block_size; |
2120 | 0 | } |
2121 | 0 | wd_offset += had_block_size; |
2122 | |
|
2123 | 0 | if(!is_hbd) |
2124 | 0 | { |
2125 | | /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ |
2126 | 0 | pu1_l0_block = s_cu_prms.pu1_luma_src + |
2127 | 0 | ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset; |
2128 | |
|
2129 | 0 | ps_ctxt->ai4_source_satd_8x8[index_satd] = |
2130 | |
|
2131 | 0 | ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit( |
2132 | 0 | pu1_l0_block, |
2133 | 0 | ps_curr_src_bufs->i4_y_strd, |
2134 | 0 | ai1_zeros_buffer, |
2135 | 0 | had_block_size, |
2136 | 0 | pi2_residue_had_zscan, |
2137 | 0 | had_block_size); |
2138 | 0 | } |
2139 | 0 | index_satd++; |
2140 | 0 | } |
2141 | 0 | } |
2142 | | |
2143 | 131k | if(ps_ctxt->u1_enable_psyRDOPT) |
2144 | 0 | { |
2145 | | /* declare local variables */ |
2146 | 0 | WORD32 i; |
2147 | 0 | WORD32 ctb_size; |
2148 | 0 | WORD32 num_comp_had_blocks; |
2149 | 0 | UWORD8 *pu1_l0_block; |
2150 | 0 | UWORD8 *pu1_l0_block_prev = NULL; |
2151 | 0 | WORD32 block_ht; |
2152 | 0 | WORD32 block_wd; |
2153 | 0 | WORD32 ht_offset; |
2154 | 0 | WORD32 wd_offset; |
2155 | |
|
2156 | 0 | WORD32 num_horz_blocks; |
2157 | 0 | WORD32 had_block_size; |
2158 | 0 | WORD16 pi2_residue_had[64]; |
2159 | 0 | UWORD8 ai1_zeros_buffer[64]; |
2160 | 0 | WORD32 index_satd = 0; |
2161 | |
|
2162 | 0 | WORD32 is_hbd; |
2163 | 0 | is_hbd = ps_ctxt->u1_is_input_data_hbd; // 8 bit |
2164 | | |
2165 | | /* initialize the variables */ |
2166 | | /* change this based ont he bit depth */ |
2167 | | // ps_ctxt->u1_chroma_array_type |
2168 | 0 | if(ps_ctxt->u1_chroma_array_type == 1) |
2169 | 0 | { |
2170 | 0 | block_ht = cur_ctb_ht / 2; |
2171 | 0 | block_wd = cur_ctb_wd / 2; |
2172 | 0 | } |
2173 | 0 | else |
2174 | 0 | { |
2175 | 0 | block_ht = cur_ctb_ht; |
2176 | 0 | block_wd = cur_ctb_wd / 2; |
2177 | 0 | } |
2178 | |
|
2179 | 0 | had_block_size = 4; |
2180 | 0 | memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8)); |
2181 | |
|
2182 | 0 | ctb_size = block_ht * block_wd; //ctb_width * ctb_height; |
2183 | 0 | num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size); |
2184 | |
|
2185 | 0 | num_horz_blocks = 2 * block_wd / had_block_size; //ctb_width / had_block_size; |
2186 | 0 | ht_offset = -had_block_size; |
2187 | 0 | wd_offset = -had_block_size; |
2188 | |
|
2189 | 0 | if(!is_hbd) |
2190 | 0 | { |
2191 | | /* loop over for every 4x4 blocks in the CU for Cb */ |
2192 | 0 | for(i = 0; i < num_comp_had_blocks; i++) |
2193 | 0 | { |
2194 | 0 | if(i % num_horz_blocks == 0) |
2195 | 0 | { |
2196 | 0 | wd_offset = -had_block_size; |
2197 | 0 | ht_offset += had_block_size; |
2198 | 0 | } |
2199 | 0 | wd_offset += had_block_size; |
2200 | | |
2201 | | /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */ |
2202 | 0 | if(i % 2 != 0) |
2203 | 0 | { |
2204 | 0 | if(!is_hbd) |
2205 | 0 | { |
2206 | 0 | pu1_l0_block = pu1_l0_block_prev + 1; |
2207 | 0 | } |
2208 | 0 | } |
2209 | 0 | else |
2210 | 0 | { |
2211 | 0 | if(!is_hbd) |
2212 | 0 | { |
2213 | 0 | pu1_l0_block = s_cu_prms.pu1_chrm_src + |
2214 | 0 | s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset; |
2215 | 0 | pu1_l0_block_prev = pu1_l0_block; |
2216 | 0 | } |
2217 | 0 | } |
2218 | |
|
2219 | 0 | if(had_block_size == 4) |
2220 | 0 | { |
2221 | 0 | if(!is_hbd) |
2222 | 0 | { |
2223 | 0 | ps_ctxt->ai4_source_chroma_satd[index_satd] = |
2224 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit( |
2225 | 0 | pu1_l0_block, |
2226 | 0 | s_cu_prms.i4_chrm_src_stride, |
2227 | 0 | ai1_zeros_buffer, |
2228 | 0 | had_block_size, |
2229 | 0 | pi2_residue_had, |
2230 | 0 | had_block_size); |
2231 | 0 | } |
2232 | |
|
2233 | 0 | index_satd++; |
2234 | |
|
2235 | 0 | } // block size of 4x4 |
2236 | |
|
2237 | 0 | } // for all blocks |
2238 | |
|
2239 | 0 | } // is hbd check |
2240 | 0 | } |
2241 | | |
2242 | 131k | ihevce_cu_recurse_decide( |
2243 | 131k | ps_ctxt, |
2244 | 131k | &s_cu_prms, |
2245 | 131k | ps_cu_tree_analyse, |
2246 | 131k | ps_cu_tree_analyse, |
2247 | 131k | ps_ctb_ipe_analyse, |
2248 | 131k | ps_cu_me_data, |
2249 | 131k | &ps_ctb_col_pu, |
2250 | 131k | &s_cu_update_prms, |
2251 | 131k | pu1_row_pu_map, |
2252 | 131k | &col_pu_map_idx, |
2253 | 131k | i4_tree_depth, |
2254 | 131k | ctb_ctr << 6, |
2255 | 131k | vert_ctr << 6, |
2256 | 131k | cur_ctb_ht); |
2257 | | |
2258 | 131k | if(ps_ctxt->i1_slice_type != ISLICE) |
2259 | 80.4k | { |
2260 | 80.4k | ASSERT( |
2261 | 80.4k | (cur_ctb_wd * cur_ctb_ht) <= |
2262 | 80.4k | ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse)); |
2263 | 80.4k | } |
2264 | | /*If Sup pic rc is enabled*/ |
2265 | 131k | if(1 == ps_ctxt->i4_sub_pic_level_rc) |
2266 | 0 | { |
2267 | | /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */ |
2268 | 0 | ihevce_sub_pic_rc_in_data( |
2269 | 0 | (void *)ps_multi_thrd_ctxt, |
2270 | 0 | (void *)ps_ctxt, |
2271 | 0 | (void *)ps_ctb_ipe_analyse, |
2272 | 0 | (void *)ps_frm_ctb_prms); |
2273 | 0 | } |
2274 | | |
2275 | 131k | ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128; |
2276 | | |
2277 | 131k | } /* End of CU recursion block */ |
2278 | | |
2279 | | #if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
2280 | | { |
2281 | | ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0]; |
2282 | | enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms; |
2283 | | ps_ctxt->pu1_ecd_data = pu1_row_ecd_data; |
2284 | | |
2285 | | do |
2286 | | { |
2287 | | ihevce_update_final_cu_results( |
2288 | | ps_ctxt, |
2289 | | ps_enc_out_ctxt, |
2290 | | ps_cu_prms, |
2291 | | NULL, /* &ps_ctb_col_pu */ |
2292 | | NULL, /* &col_pu_map_idx */ |
2293 | | &s_cu_update_prms, |
2294 | | ctb_ctr, |
2295 | | vert_ctr); |
2296 | | |
2297 | | ps_enc_out_ctxt++; |
2298 | | |
2299 | | ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE); |
2300 | | |
2301 | | } while(ps_enc_out_ctxt->u1_cu_size != 128); |
2302 | | } |
2303 | | #else |
2304 | 131k | if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) |
2305 | 38.4k | { |
2306 | 38.4k | ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0]; |
2307 | 38.4k | enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms; |
2308 | 38.4k | ps_ctxt->pu1_ecd_data = pu1_row_ecd_data; |
2309 | | |
2310 | 38.4k | do |
2311 | 444k | { |
2312 | 444k | ihevce_update_final_cu_results( |
2313 | 444k | ps_ctxt, |
2314 | 444k | ps_enc_out_ctxt, |
2315 | 444k | ps_cu_prms, |
2316 | 444k | NULL, /* &ps_ctb_col_pu */ |
2317 | 444k | NULL, /* &col_pu_map_idx */ |
2318 | 444k | &s_cu_update_prms, |
2319 | 444k | ctb_ctr, |
2320 | 444k | vert_ctr); |
2321 | | |
2322 | 444k | ps_enc_out_ctxt++; |
2323 | | |
2324 | 444k | ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE); |
2325 | | |
2326 | 444k | } while(ps_enc_out_ctxt->u1_cu_size != 128); |
2327 | 38.4k | } |
2328 | 131k | #endif |
2329 | | |
2330 | | /* --- ctb level copy of data to left buffers--*/ |
2331 | 131k | ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms); |
2332 | | |
2333 | 131k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2334 | 127k | { |
2335 | | /* For the Unaligned CTB, make the invalid edge boundary strength 0 */ |
2336 | 127k | ihevce_bs_clear_invalid( |
2337 | 127k | &ps_ctxt->s_deblk_bs_prms, |
2338 | 127k | last_ctb_row_flag, |
2339 | 127k | (ctb_ctr == (num_ctbs_horz_pic - 1)), |
2340 | 127k | last_hz_ctb_wd, |
2341 | 127k | last_vt_ctb_ht); |
2342 | | |
2343 | | /* -----------------Read boundary strengts for current CTB------------- */ |
2344 | | |
2345 | 127k | if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic)) |
2346 | 92.1k | { |
2347 | | /*Storing boundary strengths of current CTB*/ |
2348 | 92.1k | UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0]; |
2349 | 92.1k | UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0]; |
2350 | | |
2351 | 92.1k | memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8); |
2352 | 92.1k | memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8); |
2353 | 92.1k | } |
2354 | | //Increment for storing next CTB info |
2355 | 127k | s_deblk_ctb_row_params.pu4_ctb_row_bs_vert += |
2356 | 127k | (ctb_size >> 3); //one vertical edge per 8x8 block |
2357 | 127k | s_deblk_ctb_row_params.pu4_ctb_row_bs_horz += |
2358 | 127k | (ctb_size >> 3); //one horizontal edge per 8x8 block |
2359 | 127k | } |
2360 | | |
2361 | | /* -------------- ctb level updates ----------------- */ |
2362 | 131k | ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb; |
2363 | | |
2364 | 131k | pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2); |
2365 | | |
2366 | | /* first ctb offset will be populated by the caller */ |
2367 | 131k | if(0 != ctb_ctr) |
2368 | 26.5k | { |
2369 | 26.5k | pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb; |
2370 | 26.5k | } |
2371 | 131k | pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb; |
2372 | 131k | ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0); |
2373 | | |
2374 | 131k | ps_ctb_in++; |
2375 | 131k | ps_ctb_out++; |
2376 | 131k | } |
2377 | | |
2378 | | /* ---------- Encloop end of row updates ----------------- */ |
2379 | | |
2380 | | /* at the end of row processing cu pixel counter is set to */ |
2381 | | /* (num ctb * ctbzise) + ctb size */ |
2382 | | /* this is to set the dependency for right most cu of last */ |
2383 | | /* ctb's top right data dependency */ |
2384 | | /* this even takes care of entropy dependency for */ |
2385 | | /* incomplete ctb as well */ |
2386 | 104k | ihevce_dmgr_set_row_row_sync( |
2387 | 104k | pv_dep_mngr_enc_loop_cu_top_right, |
2388 | 104k | (ctb_ctr * ctb_size + ctb_size), |
2389 | 104k | vert_ctr, |
2390 | 104k | ps_ctxt->i4_tile_col_idx /* Col Tile No. */); |
2391 | | |
2392 | 104k | ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func; |
2393 | | |
2394 | | /* Restore structure. |
2395 | | Getting the address of stored-BS and Qp-map and other info */ |
2396 | 104k | memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t)); |
2397 | 104k | { |
2398 | | /* Update the pointers to the tile start */ |
2399 | 104k | s_deblk_ctb_row_params.pu4_ctb_row_bs_vert += |
2400 | 104k | (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block |
2401 | 104k | s_deblk_ctb_row_params.pu4_ctb_row_bs_horz += |
2402 | 104k | (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block |
2403 | 104k | s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2)); |
2404 | 104k | } |
2405 | | |
2406 | | #if PROFILE_ENC_REG_DATA |
2407 | | s_profile.u8_enc_reg_data[vert_ctr] = 0; |
2408 | | #endif |
2409 | | |
2410 | | /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */ |
2411 | 104k | if(!ps_ctxt->u1_is_input_data_hbd) |
2412 | 104k | { |
2413 | 104k | WORD32 last_col_pic, last_col_tile; |
2414 | | |
2415 | 235k | for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) |
2416 | 131k | { |
2417 | | /* store the ctb level prms in cu prms */ |
2418 | 131k | s_cu_prms.i4_ctb_pos = ctb_ctr; |
2419 | 131k | s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size; |
2420 | 131k | s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size; |
2421 | | |
2422 | 131k | s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size; |
2423 | 131k | s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size; |
2424 | 131k | s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size; |
2425 | | |
2426 | 131k | s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size; |
2427 | | |
2428 | 131k | s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size; |
2429 | | |
2430 | | /* If last ctb in the horizontal row */ |
2431 | 131k | if(ctb_ctr == (num_ctbs_horz_pic - 1)) |
2432 | 104k | { |
2433 | 104k | last_col_pic = 1; |
2434 | 104k | } |
2435 | 26.5k | else |
2436 | 26.5k | { |
2437 | 26.5k | last_col_pic = 0; |
2438 | 26.5k | } |
2439 | | |
2440 | | /* If last ctb in the tile row */ |
2441 | 131k | if(ctb_ctr == (ctb_end - 1)) |
2442 | 104k | { |
2443 | 104k | last_col_tile = 1; |
2444 | 104k | } |
2445 | 26.5k | else |
2446 | 26.5k | { |
2447 | 26.5k | last_col_tile = 0; |
2448 | 26.5k | } |
2449 | | |
2450 | 131k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2451 | 127k | { |
2452 | | /* for last ctb of a row check top instead of top right */ |
2453 | 127k | if(((ctb_ctr + 1) == ctb_end) && (vert_ctr > 0)) |
2454 | 9.58k | { |
2455 | 9.58k | dblk_offset = 1; |
2456 | 9.58k | } |
2457 | | /* Wait till top neighbour CTB has done it's deblocking*/ |
2458 | 127k | ihevce_dmgr_chk_row_row_sync( |
2459 | 127k | pv_dep_mngr_enc_loop_dblk, |
2460 | 127k | ctb_ctr, |
2461 | 127k | dblk_offset, |
2462 | 127k | dblk_check_dep_pos, |
2463 | 127k | ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ |
2464 | 127k | ps_ctxt->thrd_id); |
2465 | | |
2466 | 127k | if((0 == ps_ctxt->i4_deblock_type)) |
2467 | 92.1k | { |
2468 | | /* Populate Qp-map */ |
2469 | 92.1k | if(ctb_start == ctb_ctr) |
2470 | 73.8k | { |
2471 | 73.8k | ihevce_deblk_populate_qp_map( |
2472 | 73.8k | ps_ctxt, |
2473 | 73.8k | &s_deblk_ctb_row_params, |
2474 | 73.8k | ps_ctb_out_dblk, |
2475 | 73.8k | vert_ctr, |
2476 | 73.8k | ps_frm_ctb_prms, |
2477 | 73.8k | ps_tile_params); |
2478 | 73.8k | } |
2479 | 92.1k | ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size; |
2480 | | |
2481 | | /* recon pointers and stride */ |
2482 | 92.1k | ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon; |
2483 | 92.1k | ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon; |
2484 | 92.1k | ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride; |
2485 | 92.1k | ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride; |
2486 | | |
2487 | 92.1k | ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1; |
2488 | 92.1k | { |
2489 | 92.1k | ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = |
2490 | 92.1k | (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1; |
2491 | 92.1k | } |
2492 | 92.1k | ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1; |
2493 | | //or according to slice boundary. Support yet to be added !!!! |
2494 | | |
2495 | 92.1k | ihevce_deblk_ctb( |
2496 | 92.1k | &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params); |
2497 | | |
2498 | | //Increment for storing next CTB info |
2499 | 92.1k | s_deblk_ctb_row_params.pu4_ctb_row_bs_vert += |
2500 | 92.1k | (ctb_size >> 3); //one vertical edge per 8x8 block |
2501 | 92.1k | s_deblk_ctb_row_params.pu4_ctb_row_bs_horz += |
2502 | 92.1k | (ctb_size >> 3); //one horizontal edge per 8x8 block |
2503 | 92.1k | s_deblk_ctb_row_params.pi1_ctb_row_qp += |
2504 | 92.1k | (ctb_size >> 2); //one qp per 4x4 block. |
2505 | 92.1k | } |
2506 | 127k | } // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2507 | | |
2508 | | /* update the number of ctbs deblocked for this row */ |
2509 | 131k | ihevce_dmgr_set_row_row_sync( |
2510 | 131k | pv_dep_mngr_enc_loop_dblk, |
2511 | 131k | (ctb_ctr + 1), |
2512 | 131k | vert_ctr, |
2513 | 131k | ps_ctxt->i4_tile_col_idx /* Col Tile No. */); |
2514 | | |
2515 | 131k | } //end of loop over CTBs in current CTB-row |
2516 | | |
2517 | | /* Apply SAO over the previous CTB-row */ |
2518 | 235k | for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) |
2519 | 131k | { |
2520 | 131k | if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || |
2521 | 60.7k | ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) |
2522 | 70.3k | { |
2523 | 70.3k | sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t; |
2524 | | |
2525 | 70.3k | if(vert_ctr > ps_tile_params->i4_first_ctb_y) |
2526 | 16.6k | { |
2527 | | /*For last ctb check top dep only*/ |
2528 | 16.6k | if((vert_ctr > 1) && ((ctb_ctr + 1) == ctb_end)) |
2529 | 3.98k | { |
2530 | 3.98k | sao_offset = 1; |
2531 | 3.98k | } |
2532 | | |
2533 | 16.6k | ihevce_dmgr_chk_row_row_sync( |
2534 | 16.6k | pv_dep_mngr_enc_loop_sao, |
2535 | 16.6k | ctb_ctr, |
2536 | 16.6k | sao_offset, |
2537 | 16.6k | sao_check_dep_pos, |
2538 | 16.6k | ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ |
2539 | 16.6k | ps_ctxt->thrd_id); |
2540 | | |
2541 | | /* Call the sao function to do sao for the current ctb*/ |
2542 | | |
2543 | | /* Register the curr ctb's x pos in sao context*/ |
2544 | 16.6k | ps_sao_ctxt->i4_ctb_x = ctb_ctr; |
2545 | | |
2546 | | /* Register the curr ctb's y pos in sao context*/ |
2547 | 16.6k | ps_sao_ctxt->i4_ctb_y = vert_ctr - 1; |
2548 | | |
2549 | 16.6k | ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out + |
2550 | 16.6k | (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr; |
2551 | 16.6k | ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao; |
2552 | 16.6k | ps_sao_ctxt->i4_sao_blk_wd = ctb_size; |
2553 | 16.6k | ps_sao_ctxt->i4_sao_blk_ht = ctb_size; |
2554 | | |
2555 | 16.6k | ps_sao_ctxt->i4_is_last_ctb_row = 0; |
2556 | 16.6k | ps_sao_ctxt->i4_is_last_ctb_col = 0; |
2557 | | |
2558 | 16.6k | if((ctb_ctr + 1) == ctb_end) |
2559 | 6.90k | { |
2560 | 6.90k | ps_sao_ctxt->i4_is_last_ctb_col = 1; |
2561 | 6.90k | ps_sao_ctxt->i4_sao_blk_wd = |
2562 | 6.90k | ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) - |
2563 | 6.90k | ps_tile_params->i4_curr_tile_width); |
2564 | 6.90k | } |
2565 | | |
2566 | | /* Calculate the recon buf pointer and stride for teh current ctb */ |
2567 | 16.6k | ps_sao_ctxt->pu1_cur_luma_recon_buf = |
2568 | 16.6k | ps_sao_ctxt->pu1_frm_luma_recon_buf + |
2569 | 16.6k | (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + |
2570 | 16.6k | (ps_sao_ctxt->i4_ctb_x * ctb_size); |
2571 | | |
2572 | 16.6k | ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride; |
2573 | | |
2574 | 16.6k | ps_sao_ctxt->pu1_cur_chroma_recon_buf = |
2575 | 16.6k | ps_sao_ctxt->pu1_frm_chroma_recon_buf + |
2576 | 16.6k | (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * |
2577 | 16.6k | (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + |
2578 | 16.6k | (ps_sao_ctxt->i4_ctb_x * ctb_size); |
2579 | | |
2580 | 16.6k | ps_sao_ctxt->i4_cur_chroma_recon_stride = |
2581 | 16.6k | ps_sao_ctxt->i4_frm_chroma_recon_stride; |
2582 | | |
2583 | 16.6k | ps_sao_ctxt->pu1_cur_luma_src_buf = |
2584 | 16.6k | ps_sao_ctxt->pu1_frm_luma_src_buf + |
2585 | 16.6k | (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + |
2586 | 16.6k | (ps_sao_ctxt->i4_ctb_x * ctb_size); |
2587 | | |
2588 | 16.6k | ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride; |
2589 | | |
2590 | 16.6k | ps_sao_ctxt->pu1_cur_chroma_src_buf = |
2591 | 16.6k | ps_sao_ctxt->pu1_frm_chroma_src_buf + |
2592 | 16.6k | (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y * |
2593 | 16.6k | (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + |
2594 | 16.6k | (ps_sao_ctxt->i4_ctb_x * ctb_size); |
2595 | | |
2596 | 16.6k | ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride; |
2597 | | |
2598 | | /* Calculate the pointer to buff to store the (x,y)th sao |
2599 | | * for the top merge of (x,y+1)th ctb |
2600 | | */ |
2601 | 16.6k | ps_sao_ctxt->ps_top_ctb_sao = |
2602 | 16.6k | &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id] |
2603 | 16.6k | [ps_sao_ctxt->i4_ctb_x + |
2604 | 16.6k | (ps_sao_ctxt->i4_ctb_y) * |
2605 | 16.6k | ps_frm_ctb_prms->i4_num_ctbs_horz + |
2606 | 16.6k | (ps_ctxt->i4_bitrate_instance_num * |
2607 | 16.6k | ps_sao_ctxt->i4_num_ctb_units)]; |
2608 | | |
2609 | | /* Calculate the pointer to buff to store the top pixels of curr ctb*/ |
2610 | 16.6k | ps_sao_ctxt->pu1_curr_sao_src_top_luma = |
2611 | 16.6k | ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] + |
2612 | 16.6k | (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride + |
2613 | 16.6k | ps_sao_ctxt->i4_ctb_x * ctb_size + |
2614 | 16.6k | ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + |
2615 | 16.6k | ps_sao_ctxt->i4_top_chroma_buf_size); |
2616 | | |
2617 | | /* Calculate the pointer to buff to store the top pixels of curr ctb*/ |
2618 | 16.6k | ps_sao_ctxt->pu1_curr_sao_src_top_chroma = |
2619 | 16.6k | ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] + |
2620 | 16.6k | (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride + |
2621 | 16.6k | ps_sao_ctxt->i4_ctb_x * ctb_size + |
2622 | 16.6k | ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + |
2623 | 16.6k | ps_sao_ctxt->i4_top_chroma_buf_size); |
2624 | | |
2625 | 16.6k | { |
2626 | 16.6k | UWORD32 u4_ctb_sao_bits; |
2627 | | |
2628 | 16.6k | ihevce_sao_analyse( |
2629 | 16.6k | &ps_ctxt->s_sao_ctxt_t, |
2630 | 16.6k | ps_ctb_out_sao, |
2631 | 16.6k | &u4_ctb_sao_bits, |
2632 | 16.6k | ps_tile_params); |
2633 | 16.6k | ps_ctxt |
2634 | 16.6k | ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] |
2635 | 16.6k | [ps_ctxt->i4_bitrate_instance_num] |
2636 | 16.6k | ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits; |
2637 | 16.6k | ps_ctxt |
2638 | 16.6k | ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] |
2639 | 16.6k | [ps_ctxt->i4_bitrate_instance_num] |
2640 | 16.6k | ->u4_frame_rdopt_bits += u4_ctb_sao_bits; |
2641 | 16.6k | } |
2642 | | /** Subpel generation not done for non-ref picture **/ |
2643 | 16.6k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2644 | 16.6k | { |
2645 | | /* Recon Padding */ |
2646 | 16.6k | ihevce_recon_padding( |
2647 | 16.6k | ps_pad_interp_recon, |
2648 | 16.6k | ctb_ctr, |
2649 | 16.6k | vert_ctr - 1, |
2650 | 16.6k | ps_frm_ctb_prms, |
2651 | 16.6k | ps_ctxt->ps_func_selector); |
2652 | 16.6k | } |
2653 | | /* update the number of SAO ctbs for this row */ |
2654 | 16.6k | ihevce_dmgr_set_row_row_sync( |
2655 | 16.6k | pv_dep_mngr_enc_loop_sao, |
2656 | 16.6k | ctb_ctr + 1, |
2657 | 16.6k | vert_ctr - 1, |
2658 | 16.6k | ps_ctxt->i4_tile_col_idx /* Col Tile No. */); |
2659 | 16.6k | } |
2660 | 70.3k | } |
2661 | 60.7k | else //SAO Disabled |
2662 | 60.7k | { |
2663 | 60.7k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2664 | 57.1k | { |
2665 | | /* Recon Padding */ |
2666 | 57.1k | ihevce_recon_padding( |
2667 | 57.1k | ps_pad_interp_recon, |
2668 | 57.1k | ctb_ctr, |
2669 | 57.1k | vert_ctr, |
2670 | 57.1k | ps_frm_ctb_prms, |
2671 | 57.1k | ps_ctxt->ps_func_selector); |
2672 | 57.1k | } |
2673 | 60.7k | } |
2674 | 131k | } // end of SAO for loop |
2675 | | |
2676 | | /* Call the sao function again for the last ctb row of frame */ |
2677 | 104k | if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || |
2678 | 51.4k | ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) |
2679 | 53.0k | { |
2680 | 53.0k | sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t; |
2681 | | |
2682 | 53.0k | if(vert_ctr == |
2683 | 53.0k | (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) |
2684 | 46.1k | { |
2685 | 99.9k | for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) |
2686 | 53.7k | { |
2687 | | /* Register the curr ctb's x pos in sao context*/ |
2688 | 53.7k | ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr; |
2689 | | |
2690 | | /* Register the curr ctb's y pos in sao context*/ |
2691 | 53.7k | ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr; |
2692 | | |
2693 | 53.7k | ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out + |
2694 | 53.7k | vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz + ctb_ctr; |
2695 | | |
2696 | 53.7k | ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao; |
2697 | | |
2698 | 53.7k | ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size; |
2699 | 53.7k | ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0; |
2700 | | |
2701 | 53.7k | if((ctb_ctr + 1) == ctb_end) |
2702 | 46.1k | { |
2703 | 46.1k | ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1; |
2704 | 46.1k | ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = |
2705 | 46.1k | ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) - |
2706 | 46.1k | ps_tile_params->i4_curr_tile_width); |
2707 | 46.1k | } |
2708 | | |
2709 | 53.7k | ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = |
2710 | 53.7k | ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) - |
2711 | 53.7k | ps_tile_params->i4_curr_tile_height); |
2712 | | |
2713 | 53.7k | ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1; |
2714 | | |
2715 | | /* Calculate the recon buf pointer and stride for teh current ctb */ |
2716 | 53.7k | ps_sao_ctxt->pu1_cur_luma_recon_buf = |
2717 | 53.7k | ps_sao_ctxt->pu1_frm_luma_recon_buf + |
2718 | 53.7k | (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + |
2719 | 53.7k | (ps_sao_ctxt->i4_ctb_x * ctb_size); |
2720 | | |
2721 | 53.7k | ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride; |
2722 | | |
2723 | 53.7k | ps_sao_ctxt->pu1_cur_chroma_recon_buf = |
2724 | 53.7k | ps_sao_ctxt->pu1_frm_chroma_recon_buf + |
2725 | 53.7k | (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y * |
2726 | 53.7k | (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + |
2727 | 53.7k | (ps_sao_ctxt->i4_ctb_x * ctb_size); |
2728 | | |
2729 | 53.7k | ps_sao_ctxt->i4_cur_chroma_recon_stride = |
2730 | 53.7k | ps_sao_ctxt->i4_frm_chroma_recon_stride; |
2731 | | |
2732 | 53.7k | ps_sao_ctxt->pu1_cur_luma_src_buf = |
2733 | 53.7k | ps_sao_ctxt->pu1_frm_luma_src_buf + |
2734 | 53.7k | (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) + |
2735 | 53.7k | (ps_sao_ctxt->i4_ctb_x * ctb_size); |
2736 | | |
2737 | 53.7k | ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride; |
2738 | | |
2739 | 53.7k | ps_sao_ctxt->pu1_cur_chroma_src_buf = |
2740 | 53.7k | ps_sao_ctxt->pu1_frm_chroma_src_buf + |
2741 | 53.7k | (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y * |
2742 | 53.7k | (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) + |
2743 | 53.7k | (ps_sao_ctxt->i4_ctb_x * ctb_size); |
2744 | | |
2745 | 53.7k | ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride; |
2746 | | |
2747 | | /* Calculate the pointer to buff to store the (x,y)th sao |
2748 | | * for the top merge of (x,y+1)th ctb |
2749 | | */ |
2750 | 53.7k | ps_sao_ctxt->ps_top_ctb_sao = |
2751 | 53.7k | &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id] |
2752 | 53.7k | [ps_sao_ctxt->i4_ctb_x + |
2753 | 53.7k | (ps_sao_ctxt->i4_ctb_y) * |
2754 | 53.7k | ps_frm_ctb_prms->i4_num_ctbs_horz + |
2755 | 53.7k | (ps_ctxt->i4_bitrate_instance_num * |
2756 | 53.7k | ps_sao_ctxt->i4_num_ctb_units)]; |
2757 | | |
2758 | | /* Calculate the pointer to buff to store the top pixels of curr ctb*/ |
2759 | 53.7k | ps_sao_ctxt->pu1_curr_sao_src_top_luma = |
2760 | 53.7k | ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] + |
2761 | 53.7k | (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride + |
2762 | 53.7k | ps_sao_ctxt->i4_ctb_x * ctb_size + |
2763 | 53.7k | ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + |
2764 | 53.7k | ps_sao_ctxt->i4_top_chroma_buf_size); |
2765 | | |
2766 | | /* Calculate the pointer to buff to store the top pixels of curr ctb*/ |
2767 | 53.7k | ps_sao_ctxt->pu1_curr_sao_src_top_chroma = |
2768 | 53.7k | ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] + |
2769 | 53.7k | (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride + |
2770 | 53.7k | ps_sao_ctxt->i4_ctb_x * ctb_size + |
2771 | 53.7k | ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size + |
2772 | 53.7k | ps_sao_ctxt->i4_top_chroma_buf_size); |
2773 | | |
2774 | 53.7k | { |
2775 | 53.7k | UWORD32 u4_ctb_sao_bits; |
2776 | 53.7k | ihevce_sao_analyse( |
2777 | 53.7k | &ps_ctxt->s_sao_ctxt_t, |
2778 | 53.7k | ps_ctb_out_sao, |
2779 | 53.7k | &u4_ctb_sao_bits, |
2780 | 53.7k | ps_tile_params); |
2781 | 53.7k | ps_ctxt |
2782 | 53.7k | ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] |
2783 | 53.7k | [ps_ctxt->i4_bitrate_instance_num] |
2784 | 53.7k | ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits; |
2785 | 53.7k | ps_ctxt |
2786 | 53.7k | ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id] |
2787 | 53.7k | [ps_ctxt->i4_bitrate_instance_num] |
2788 | 53.7k | ->u4_frame_rdopt_bits += u4_ctb_sao_bits; |
2789 | 53.7k | } |
2790 | | /** Subpel generation not done for non-ref picture **/ |
2791 | 53.7k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2792 | 53.7k | { |
2793 | | /* Recon Padding */ |
2794 | 53.7k | ihevce_recon_padding( |
2795 | 53.7k | ps_pad_interp_recon, |
2796 | 53.7k | ctb_ctr, |
2797 | 53.7k | vert_ctr, |
2798 | 53.7k | ps_frm_ctb_prms, |
2799 | 53.7k | ps_ctxt->ps_func_selector); |
2800 | 53.7k | } |
2801 | 53.7k | } |
2802 | 46.1k | } //end of loop over CTBs in current CTB-row |
2803 | 53.0k | } |
2804 | | |
2805 | | /* Subpel Plane Generation*/ |
2806 | 235k | for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) |
2807 | 131k | { |
2808 | 131k | if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || |
2809 | 60.7k | ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) |
2810 | 70.3k | { |
2811 | 70.3k | if(0 != vert_ctr) |
2812 | 16.6k | { |
2813 | | /** Subpel generation not done for non-ref picture **/ |
2814 | 16.6k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2815 | 16.6k | { |
2816 | | /* Padding and Subpel Plane Generation */ |
2817 | 16.6k | ihevce_pad_interp_recon_ctb( |
2818 | 16.6k | ps_pad_interp_recon, |
2819 | 16.6k | ctb_ctr, |
2820 | 16.6k | vert_ctr - 1, |
2821 | 16.6k | ps_ctxt->i4_quality_preset, |
2822 | 16.6k | ps_frm_ctb_prms, |
2823 | 16.6k | ps_ctxt->ai2_scratch, |
2824 | 16.6k | ps_ctxt->i4_bitrate_instance_num, |
2825 | 16.6k | ps_ctxt->ps_func_selector); |
2826 | 16.6k | } |
2827 | 16.6k | } |
2828 | 70.3k | } |
2829 | 60.7k | else |
2830 | 60.7k | { // SAO Disabled |
2831 | 60.7k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2832 | 57.1k | { |
2833 | | /* Padding and Subpel Plane Generation */ |
2834 | 57.1k | ihevce_pad_interp_recon_ctb( |
2835 | 57.1k | ps_pad_interp_recon, |
2836 | 57.1k | ctb_ctr, |
2837 | 57.1k | vert_ctr, |
2838 | 57.1k | ps_ctxt->i4_quality_preset, |
2839 | 57.1k | ps_frm_ctb_prms, |
2840 | 57.1k | ps_ctxt->ai2_scratch, |
2841 | 57.1k | ps_ctxt->i4_bitrate_instance_num, |
2842 | 57.1k | ps_ctxt->ps_func_selector); |
2843 | 57.1k | } |
2844 | 60.7k | } |
2845 | 131k | } |
2846 | | |
2847 | 104k | { |
2848 | 104k | if(!ps_ctxt->i4_bitrate_instance_num) |
2849 | 104k | { |
2850 | 104k | if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || |
2851 | 51.4k | ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) |
2852 | 53.0k | { |
2853 | | /* If SAO is on, then signal completion of previous CTB row */ |
2854 | 53.0k | if(0 != vert_ctr) |
2855 | 6.90k | { |
2856 | 6.90k | { |
2857 | 6.90k | WORD32 post_ctb_ctr; |
2858 | | |
2859 | 23.5k | for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++) |
2860 | 16.6k | { |
2861 | 16.6k | ihevce_dmgr_map_set_sync( |
2862 | 16.6k | pv_dep_mngr_me_dep_encloop, |
2863 | 16.6k | post_ctb_ctr, |
2864 | 16.6k | (vert_ctr - 1), |
2865 | 16.6k | MAP_CTB_COMPLETE); |
2866 | 16.6k | } |
2867 | 6.90k | } |
2868 | 6.90k | } |
2869 | 53.0k | } |
2870 | 51.4k | else |
2871 | 51.4k | { |
2872 | 51.4k | { |
2873 | 51.4k | WORD32 post_ctb_ctr; |
2874 | | |
2875 | 112k | for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++) |
2876 | 60.7k | { |
2877 | 60.7k | ihevce_dmgr_map_set_sync( |
2878 | 60.7k | pv_dep_mngr_me_dep_encloop, |
2879 | 60.7k | post_ctb_ctr, |
2880 | 60.7k | vert_ctr, |
2881 | 60.7k | MAP_CTB_COMPLETE); |
2882 | 60.7k | } |
2883 | 51.4k | } |
2884 | 51.4k | } |
2885 | 104k | } |
2886 | 104k | } |
2887 | | |
2888 | | /*process last ctb row*/ |
2889 | 104k | if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || |
2890 | 51.4k | ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag) |
2891 | 53.0k | { |
2892 | 53.0k | sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t; |
2893 | | |
2894 | 53.0k | if(vert_ctr == |
2895 | 53.0k | (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) |
2896 | 46.1k | { |
2897 | 99.9k | for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++) |
2898 | 53.7k | { |
2899 | 53.7k | if(ps_ctxt->i4_deblk_pad_hpel_cur_pic) |
2900 | 53.7k | { |
2901 | | /* Padding and Subpel Plane Generation */ |
2902 | 53.7k | ihevce_pad_interp_recon_ctb( |
2903 | 53.7k | ps_pad_interp_recon, |
2904 | 53.7k | ctb_ctr, |
2905 | 53.7k | vert_ctr, |
2906 | 53.7k | ps_ctxt->i4_quality_preset, |
2907 | 53.7k | ps_frm_ctb_prms, |
2908 | 53.7k | ps_ctxt->ai2_scratch, |
2909 | 53.7k | ps_ctxt->i4_bitrate_instance_num, |
2910 | 53.7k | ps_ctxt->ps_func_selector); |
2911 | 53.7k | } |
2912 | 53.7k | } |
2913 | 46.1k | } |
2914 | | /* If SAO is on, then signal completion of the last CTB row of frame */ |
2915 | 53.0k | { |
2916 | 53.0k | if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
2917 | 46.1k | { |
2918 | 46.1k | if(!ps_ctxt->i4_bitrate_instance_num) |
2919 | 46.1k | { |
2920 | 46.1k | { |
2921 | 46.1k | WORD32 post_ctb_ctr; |
2922 | | |
2923 | 99.9k | for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++) |
2924 | 53.7k | { |
2925 | 53.7k | ihevce_dmgr_map_set_sync( |
2926 | 53.7k | pv_dep_mngr_me_dep_encloop, |
2927 | 53.7k | post_ctb_ctr, |
2928 | 53.7k | vert_ctr, |
2929 | 53.7k | MAP_CTB_COMPLETE); |
2930 | 53.7k | } |
2931 | 46.1k | } |
2932 | 46.1k | } |
2933 | 46.1k | } |
2934 | 53.0k | } |
2935 | 53.0k | } |
2936 | 104k | } |
2937 | | |
2938 | 104k | return; |
2939 | 104k | } |
2940 | | |
2941 | | /*! |
2942 | | ****************************************************************************** |
2943 | | * \if Function name : ihevce_enc_loop_pass \endif |
2944 | | * |
2945 | | * \brief |
2946 | | * Frame level enc_loop pass function |
2947 | | * |
2948 | | * \param[in] pv_ctxt : pointer to enc_loop module |
2949 | | * \param[in] ps_frm_lamda : Frame level Lambda params |
2950 | | * \param[in] ps_inp : pointer to input yuv buffer (frame buffer) |
2951 | | * \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer) |
2952 | | * \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer) |
2953 | | * \param[out] ps_ctb_out : pointer CTB output structure (frame buffer) |
2954 | | * \param[out] ps_cu_out : pointer CU output structure (frame buffer) |
2955 | | * \param[out] ps_tu_out : pointer TU output structure (frame buffer) |
2956 | | * \param[out] pi2_frm_coeffs : pointer coeff output frame buffer) |
2957 | | * |
2958 | | * \return |
2959 | | * None |
2960 | | * |
2961 | | * Note : Currently the frame level calcualtions done assumes that |
2962 | | * framewidth of the input /recon are excat multiple of ctbsize |
2963 | | * |
2964 | | * \author |
2965 | | * Ittiam |
2966 | | * |
2967 | | ***************************************************************************** |
2968 | | */ |
2969 | | void ihevce_enc_loop_process( |
2970 | | void *pv_ctxt, |
2971 | | ihevce_lap_enc_buf_t *ps_curr_inp, |
2972 | | ctb_analyse_t *ps_ctb_in, |
2973 | | ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse, |
2974 | | recon_pic_buf_t *ps_frm_recon, |
2975 | | cur_ctb_cu_tree_t *ps_cu_tree_out, |
2976 | | ctb_enc_loop_out_t *ps_ctb_out, |
2977 | | cu_enc_loop_out_t *ps_cu_out, |
2978 | | tu_enc_loop_out_t *ps_tu_out, |
2979 | | pu_t *ps_pu_out, |
2980 | | UWORD8 *pu1_frm_ecd_data, |
2981 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
2982 | | frm_lambda_ctxt_t *ps_frm_lamda, |
2983 | | multi_thrd_ctxt_t *ps_multi_thrd_ctxt, |
2984 | | WORD32 thrd_id, |
2985 | | WORD32 i4_enc_frm_id, |
2986 | | WORD32 i4_pass) |
2987 | 94.8k | { |
2988 | 94.8k | WORD32 vert_ctr; |
2989 | 94.8k | WORD32 tile_col_idx; |
2990 | 94.8k | iv_enc_yuv_buf_t s_curr_src_bufs; |
2991 | 94.8k | iv_enc_yuv_buf_t s_curr_recon_bufs; |
2992 | 94.8k | iv_enc_yuv_buf_src_t s_curr_recon_bufs_src; |
2993 | 94.8k | UWORD32 *pu4_pu_offsets; |
2994 | 94.8k | WORD32 end_of_frame; |
2995 | 94.8k | UWORD8 *apu1_y_sub_pel_planes[3]; |
2996 | 94.8k | pad_interp_recon_frm_t s_pad_interp_recon; |
2997 | 94.8k | ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt; |
2998 | | |
2999 | 94.8k | ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id]; |
3000 | | |
3001 | 94.8k | WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num; |
3002 | | |
3003 | | /* initialize the closed loop lambda for the current frame */ |
3004 | 94.8k | ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf; |
3005 | 94.8k | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf; |
3006 | 94.8k | ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor; |
3007 | 94.8k | ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf; |
3008 | 94.8k | ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf; |
3009 | 94.8k | ps_ctxt->thrd_id = thrd_id; |
3010 | 94.8k | ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic; |
3011 | | |
3012 | 94.8k | #if DISABLE_SAO_WHEN_NOISY |
3013 | 94.8k | ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in; |
3014 | 94.8k | ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz; |
3015 | 94.8k | #endif |
3016 | | |
3017 | | #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT |
3018 | | ps_ctxt->pv_err_func_selector = ps_func_selector; |
3019 | | #endif |
3020 | | |
3021 | 94.8k | ps_ctxt->i4_deblk_pad_hpel_cur_pic = |
3022 | 94.8k | ps_frm_recon->i4_deblk_pad_hpel_cur_pic || |
3023 | 7.51k | ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag || |
3024 | 3.07k | ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag; |
3025 | | |
3026 | | /* Share all reference pictures with nbr clients. This flag will be used only |
3027 | | in case of dist-enc mode */ |
3028 | 94.8k | ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0); |
3029 | 94.8k | ps_ctxt->pv_frm_recon = (void *)ps_frm_recon; |
3030 | | |
3031 | | /* Register the frame level ssd lamda for both luma and chroma*/ |
3032 | 94.8k | ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf; |
3033 | 94.8k | ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf; |
3034 | | |
3035 | 94.8k | ihevce_populate_cl_cu_lambda_prms( |
3036 | 94.8k | ps_ctxt, |
3037 | 94.8k | ps_frm_lamda, |
3038 | 94.8k | (WORD32)ps_ctxt->i1_slice_type, |
3039 | 94.8k | ps_curr_inp->s_lap_out.i4_temporal_lyr_id, |
3040 | 94.8k | ENC_LOOP_LAMBDA_TYPE); |
3041 | | |
3042 | 94.8k | ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS && |
3043 | 94.8k | (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) && |
3044 | 31.0k | (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE); |
3045 | | |
3046 | 94.8k | end_of_frame = 0; |
3047 | | |
3048 | | /* ----------------------------------------------------- */ |
3049 | | /* store the stride and dimensions of source and recon */ |
3050 | | /* buffer pointers will be over written at every CTB row */ |
3051 | | /* ----------------------------------------------------- */ |
3052 | 94.8k | memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t)); |
3053 | | |
3054 | 94.8k | memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t)); |
3055 | | |
3056 | 94.8k | memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t)); |
3057 | | |
3058 | | /* get the frame level pu offset pointer*/ |
3059 | 94.8k | pu4_pu_offsets = ps_frm_recon->pu4_pu_off; |
3060 | | |
3061 | 94.8k | s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type; |
3062 | | |
3063 | | /* ------------ Loop over all the CTB rows --------------- */ |
3064 | 294k | while(0 == end_of_frame) |
3065 | 199k | { |
3066 | 199k | UWORD8 *pu1_tmp; |
3067 | 199k | UWORD8 *pu1_row_pu_map; |
3068 | 199k | UWORD8 *pu1_row_ecd_data; |
3069 | 199k | ctb_analyse_t *ps_ctb_row_in; |
3070 | 199k | ctb_enc_loop_out_t *ps_ctb_row_out; |
3071 | 199k | cu_enc_loop_out_t *ps_row_cu; |
3072 | 199k | tu_enc_loop_out_t *ps_row_tu; |
3073 | 199k | pu_t *ps_row_pu; |
3074 | 199k | pu_col_mv_t *ps_row_col_pu; |
3075 | 199k | job_queue_t *ps_job; |
3076 | 199k | UWORD32 *pu4_pu_row_offsets; |
3077 | 199k | UWORD16 *pu2_num_pu_row; |
3078 | | |
3079 | 199k | ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse; |
3080 | 199k | cur_ctb_cu_tree_t *ps_row_cu_tree; |
3081 | 199k | UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2); |
3082 | | |
3083 | | /* Get the current row from the job queue */ |
3084 | 199k | ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job( |
3085 | 199k | ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id); |
3086 | | |
3087 | | /* Register the pointer to ctb out of the current frame*/ |
3088 | 199k | ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out; |
3089 | | |
3090 | | /* If all rows are done, set the end of process flag to 1, */ |
3091 | | /* and the current row to -1 */ |
3092 | 199k | if(NULL == ps_job) |
3093 | 94.8k | { |
3094 | 94.8k | vert_ctr = -1; |
3095 | 94.8k | tile_col_idx = -1; |
3096 | 94.8k | end_of_frame = 1; |
3097 | 94.8k | } |
3098 | 104k | else |
3099 | 104k | { |
3100 | 104k | ihevce_tile_params_t *ps_col_tile_params_temp; |
3101 | 104k | ihevce_tile_params_t *ps_tile_params; |
3102 | 104k | WORD32 i4_tile_id; |
3103 | | |
3104 | 104k | ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type); |
3105 | | /* set the output dependency */ |
3106 | 104k | ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id); |
3107 | | |
3108 | | /* Obtain the current row's details from the job */ |
3109 | 104k | vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no; |
3110 | 104k | { |
3111 | | /* Obtain the current colum tile index from the job */ |
3112 | 104k | tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx; |
3113 | | |
3114 | | /* The tile parameter for the col. idx. Use only the properties |
3115 | | which is same for all the bottom tiles like width, start_x, etc. |
3116 | | Don't use height, start_y, etc. */ |
3117 | 104k | ps_col_tile_params_temp = |
3118 | 104k | ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx); |
3119 | | |
3120 | | /* Derive actual tile_id based on vert_ctr */ |
3121 | 104k | i4_tile_id = |
3122 | 104k | *(ps_frm_ctb_prms->pi4_tile_id_map + |
3123 | 104k | vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride + |
3124 | 104k | ps_col_tile_params_temp->i4_first_ctb_x); |
3125 | | /* Derive pointer to current tile prms */ |
3126 | 104k | ps_tile_params = |
3127 | 104k | ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id); |
3128 | 104k | } |
3129 | | |
3130 | 104k | ps_ctxt->i4_tile_col_idx = tile_col_idx; |
3131 | | /* derive the current ctb row pointers */ |
3132 | | |
3133 | | /* luma src */ |
3134 | 104k | pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf + |
3135 | 104k | (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y * |
3136 | 104k | ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) + |
3137 | 104k | ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x; |
3138 | | |
3139 | 104k | pu1_tmp += |
3140 | 104k | (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * |
3141 | 104k | ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd); |
3142 | | |
3143 | 104k | s_curr_src_bufs.pv_y_buf = pu1_tmp; |
3144 | | |
3145 | 104k | if(!ps_ctxt->u1_is_input_data_hbd) |
3146 | 104k | { |
3147 | | /* cb src */ |
3148 | 104k | pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf; |
3149 | 104k | pu1_tmp += |
3150 | 104k | (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) * |
3151 | 104k | ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd); |
3152 | | |
3153 | 104k | s_curr_src_bufs.pv_u_buf = pu1_tmp; |
3154 | 104k | } |
3155 | | |
3156 | | /* luma recon */ |
3157 | 104k | pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf; |
3158 | 104k | pu1_tmp += |
3159 | 104k | (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd); |
3160 | | |
3161 | 104k | s_curr_recon_bufs.pv_y_buf = pu1_tmp; |
3162 | 104k | s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf; |
3163 | 104k | s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd; |
3164 | 104k | if(!ps_ctxt->u1_is_input_data_hbd) |
3165 | 104k | { |
3166 | | /* cb recon */ |
3167 | 104k | pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf; |
3168 | 104k | pu1_tmp += |
3169 | 104k | (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) * |
3170 | 104k | ps_frm_recon->s_yuv_buf_desc.i4_uv_strd); |
3171 | | |
3172 | 104k | s_curr_recon_bufs.pv_u_buf = pu1_tmp; |
3173 | 104k | s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf; |
3174 | 104k | s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd; |
3175 | | |
3176 | 104k | s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size; |
3177 | | |
3178 | | /* Register the source buffer pointers in sao context*/ |
3179 | 104k | ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf = |
3180 | 104k | (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf + |
3181 | 104k | (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y * |
3182 | 104k | ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) + |
3183 | 104k | ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x; |
3184 | | |
3185 | 104k | ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride = |
3186 | 104k | ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd; |
3187 | | |
3188 | 104k | ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf = |
3189 | 104k | (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf; |
3190 | | |
3191 | 104k | ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride = |
3192 | 104k | ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd; |
3193 | 104k | } |
3194 | | |
3195 | | /* Subpel planes hxfy, fxhy, hxhy*/ |
3196 | 104k | pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0]; |
3197 | 104k | pu1_tmp += |
3198 | 104k | (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd); |
3199 | 104k | apu1_y_sub_pel_planes[0] = pu1_tmp; |
3200 | 104k | s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0]; |
3201 | | |
3202 | 104k | pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1]; |
3203 | 104k | pu1_tmp += |
3204 | 104k | (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd); |
3205 | 104k | apu1_y_sub_pel_planes[1] = pu1_tmp; |
3206 | 104k | s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1]; |
3207 | | |
3208 | 104k | pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2]; |
3209 | 104k | pu1_tmp += |
3210 | 104k | (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd); |
3211 | 104k | apu1_y_sub_pel_planes[2] = pu1_tmp; |
3212 | 104k | s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2]; |
3213 | | |
3214 | | /* row level coeffs buffer */ |
3215 | 104k | pu1_row_ecd_data = |
3216 | 104k | pu1_frm_ecd_data + |
3217 | 104k | (vert_ctr * |
3218 | 104k | ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1) |
3219 | 104k | : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) * |
3220 | 104k | MAX_SCAN_COEFFS_BYTES_4x4); |
3221 | | |
3222 | | /* Row level CU buffer */ |
3223 | 104k | ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row); |
3224 | | |
3225 | | /* Row level TU buffer */ |
3226 | 104k | ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row); |
3227 | | |
3228 | | /* Row level PU buffer */ |
3229 | 104k | ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row); |
3230 | | |
3231 | | /* Row level colocated PU buffer */ |
3232 | | /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */ |
3233 | 104k | ps_row_col_pu = |
3234 | 104k | ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) * |
3235 | 104k | ps_frm_ctb_prms->i4_num_pus_in_ctb); |
3236 | | /* Row level col PU map buffer */ |
3237 | | /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */ |
3238 | 104k | pu1_row_pu_map = |
3239 | 104k | ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) * |
3240 | 104k | ps_frm_ctb_prms->i4_num_pus_in_ctb); |
3241 | | /* row ctb in pointer */ |
3242 | 104k | ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
3243 | | |
3244 | | /* row ctb out pointer */ |
3245 | 104k | ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
3246 | | |
3247 | | /* row number of PUs map pointer */ |
3248 | 104k | pu2_num_pu_row = |
3249 | 104k | ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
3250 | | |
3251 | | /* row pu offsets pointer */ |
3252 | 104k | pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
3253 | | /* store the first CTB pu offset pointer */ |
3254 | 104k | *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row; |
3255 | | /* Initialize ptr to current IPE row */ |
3256 | 104k | ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz); |
3257 | | |
3258 | | /* Initialize ptr to current row */ |
3259 | 104k | ps_row_cu_tree = ps_cu_tree_out + |
3260 | 104k | (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE); |
3261 | | |
3262 | | /* Get the EncLoop Top-Right CU Dep Mngr */ |
3263 | 104k | ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right = |
3264 | 104k | ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id] |
3265 | 104k | [i4_bitrate_instance_num]; |
3266 | | /* Get the EncLoop Deblock Dep Mngr */ |
3267 | 104k | ps_ctxt->pv_dep_mngr_enc_loop_dblk = |
3268 | 104k | ps_master_ctxt |
3269 | 104k | ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]; |
3270 | | /* Get the EncLoop Sao Dep Mngr */ |
3271 | 104k | ps_ctxt->pv_dep_mngr_enc_loop_sao = |
3272 | 104k | ps_master_ctxt |
3273 | 104k | ->aapv_dep_mngr_enc_loop_sao[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]; |
3274 | | |
3275 | 104k | ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0]; |
3276 | | |
3277 | 104k | { |
3278 | | /* derive the pointers of top row buffers */ |
3279 | 104k | ps_ctxt->pv_top_row_luma = |
3280 | 104k | (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] + |
3281 | 104k | (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) + |
3282 | 104k | (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride; |
3283 | | |
3284 | 104k | ps_ctxt->pv_top_row_chroma = |
3285 | 104k | (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] + |
3286 | 104k | (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) + |
3287 | 104k | (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride; |
3288 | | |
3289 | | /* derive the pointers of bottom row buffers to update current row data */ |
3290 | 104k | ps_ctxt->pv_bot_row_luma = |
3291 | 104k | (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] + |
3292 | 104k | (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) + |
3293 | 104k | (vert_ctr)*ps_ctxt->i4_top_row_luma_stride; |
3294 | | |
3295 | 104k | ps_ctxt->pv_bot_row_chroma = |
3296 | 104k | (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] + |
3297 | 104k | (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) + |
3298 | 104k | (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride; |
3299 | | |
3300 | | /* Register the buffer pointers in sao context*/ |
3301 | 104k | ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf = |
3302 | 104k | (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf; |
3303 | 104k | ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride = |
3304 | 104k | ps_frm_recon->s_yuv_buf_desc.i4_y_strd; |
3305 | | |
3306 | 104k | ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf = |
3307 | 104k | (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf; |
3308 | 104k | ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride = |
3309 | 104k | ps_frm_recon->s_yuv_buf_desc.i4_uv_strd; |
3310 | | |
3311 | 104k | ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt; |
3312 | | |
3313 | 104k | ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride = |
3314 | 104k | ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1; |
3315 | | |
3316 | 104k | ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride = |
3317 | 104k | ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2; |
3318 | 104k | } |
3319 | | |
3320 | 104k | ps_ctxt->ps_top_row_nbr = |
3321 | 104k | ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] + |
3322 | 104k | (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) + |
3323 | 104k | (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride; |
3324 | | |
3325 | 104k | ps_ctxt->ps_bot_row_nbr = |
3326 | 104k | ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] + |
3327 | 104k | (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) + |
3328 | 104k | (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride; |
3329 | | |
3330 | 104k | if(vert_ctr > 0) |
3331 | 9.64k | { |
3332 | 9.64k | ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0]; |
3333 | 9.64k | } |
3334 | 94.8k | else |
3335 | 94.8k | { |
3336 | 94.8k | ps_ctxt->pu1_top_rt_cabac_state = NULL; |
3337 | 94.8k | } |
3338 | | |
3339 | 104k | ASSERT( |
3340 | 104k | ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0] |
3341 | 104k | .ps_pps->i1_sign_data_hiding_flag == |
3342 | 104k | ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1] |
3343 | 104k | .ps_pps->i1_sign_data_hiding_flag); |
3344 | | |
3345 | | /* call the row level processing function */ |
3346 | 104k | ihevce_enc_loop_process_row( |
3347 | 104k | ps_ctxt, |
3348 | 104k | &s_curr_src_bufs, |
3349 | 104k | &s_curr_recon_bufs, |
3350 | 104k | &s_curr_recon_bufs_src, |
3351 | 104k | &apu1_y_sub_pel_planes[0], |
3352 | 104k | ps_ctb_row_in, |
3353 | 104k | ps_ctb_row_out, |
3354 | 104k | ps_row_ipe_analyse, |
3355 | 104k | ps_row_cu_tree, |
3356 | 104k | ps_row_cu, |
3357 | 104k | ps_row_tu, |
3358 | 104k | ps_row_pu, |
3359 | 104k | ps_row_col_pu, |
3360 | 104k | pu2_num_pu_row, |
3361 | 104k | pu1_row_pu_map, |
3362 | 104k | pu1_row_ecd_data, |
3363 | 104k | pu4_pu_row_offsets, |
3364 | 104k | ps_frm_ctb_prms, |
3365 | 104k | vert_ctr, |
3366 | 104k | ps_frm_recon, |
3367 | 104k | ps_ctxt->pv_dep_mngr_encloop_dep_me, |
3368 | 104k | &s_pad_interp_recon, |
3369 | 104k | i4_pass, |
3370 | 104k | ps_multi_thrd_ctxt, |
3371 | 104k | ps_tile_params); |
3372 | 104k | } |
3373 | 199k | } |
3374 | 94.8k | } |
3375 | | |
3376 | | /*! |
3377 | | ****************************************************************************** |
3378 | | * \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif |
3379 | | * |
3380 | | * \brief Returns to the caller key attributes relevant for dependency manager, |
3381 | | * ie, the number of vertical units in l0 layer |
3382 | | * |
3383 | | * \par Description: |
3384 | | * |
3385 | | * \param[in] pai4_ht : ht |
3386 | | * \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units |
3387 | | * for deblocking |
3388 | | * |
3389 | | * \return |
3390 | | * None |
3391 | | * |
3392 | | * \author |
3393 | | * Ittiam |
3394 | | * |
3395 | | ***************************************************************************** |
3396 | | */ |
3397 | | void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr) |
3398 | 21.4k | { |
3399 | | /* Blk ht at a given layer*/ |
3400 | 21.4k | WORD32 unit_ht_c; |
3401 | 21.4k | WORD32 ctb_size = 64; |
3402 | | |
3403 | | /* compute blk ht and unit ht */ |
3404 | 21.4k | unit_ht_c = ctb_size; |
3405 | | |
3406 | | /* set the numebr of vertical units */ |
3407 | 21.4k | *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c; |
3408 | 21.4k | } |
3409 | | |
3410 | | /*! |
3411 | | ****************************************************************************** |
3412 | | * \if Function name : ihevce_enc_loop_get_num_mem_recs \endif |
3413 | | * |
3414 | | * \brief |
3415 | | * Number of memory records are returned for enc_loop module |
3416 | | * Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk |
3417 | | * |
3418 | | * \return |
3419 | | * None |
3420 | | * |
3421 | | * \author |
3422 | | * Ittiam |
3423 | | * |
3424 | | ***************************************************************************** |
3425 | | */ |
3426 | | WORD32 |
3427 | | ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel) |
3428 | 7.14k | { |
3429 | 7.14k | WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS; |
3430 | 7.14k | WORD32 enc_loop_dblk_dep_mngr_mem_recs = |
3431 | 7.14k | i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs(); |
3432 | 7.14k | WORD32 enc_loop_sao_dep_mngr_mem_recs = |
3433 | 7.14k | i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs(); |
3434 | 7.14k | WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs = |
3435 | 7.14k | i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs(); |
3436 | 7.14k | WORD32 enc_loop_aux_br_dep_mngr_mem_recs = |
3437 | 7.14k | i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs(); |
3438 | | |
3439 | 7.14k | return ( |
3440 | 7.14k | (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs + enc_loop_sao_dep_mngr_mem_recs + |
3441 | 7.14k | enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs)); |
3442 | 7.14k | } |
3443 | | /*! |
3444 | | ****************************************************************************** |
3445 | | * \if Function name : ihevce_enc_loop_get_mem_recs \endif |
3446 | | * |
3447 | | * \brief |
3448 | | * Memory requirements are returned for ENC_LOOP. |
3449 | | * |
3450 | | * \param[in,out] ps_mem_tab : pointer to memory descriptors table |
3451 | | * \param[in] ps_init_prms : Create time static parameters |
3452 | | * \param[in] i4_num_proc_thrds : Number of processing threads for this module |
3453 | | * \param[in] i4_mem_space : memspace in whihc memory request should be done |
3454 | | * |
3455 | | * \return |
3456 | | * None |
3457 | | * |
3458 | | * \author |
3459 | | * Ittiam |
3460 | | * |
3461 | | ***************************************************************************** |
3462 | | */ |
3463 | | WORD32 ihevce_enc_loop_get_mem_recs( |
3464 | | iv_mem_rec_t *ps_mem_tab, |
3465 | | ihevce_static_cfg_params_t *ps_init_prms, |
3466 | | WORD32 i4_num_proc_thrds, |
3467 | | WORD32 i4_num_bitrate_inst, |
3468 | | WORD32 i4_num_enc_loop_frm_pllel, |
3469 | | WORD32 i4_mem_space, |
3470 | | WORD32 i4_resolution_id) |
3471 | 3.57k | { |
3472 | 3.57k | UWORD32 u4_width, u4_height, n_tabs; |
3473 | 3.57k | UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame; |
3474 | 3.57k | WORD32 ctr; |
3475 | 3.57k | WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format; |
3476 | | |
3477 | | /* derive frame dimensions */ |
3478 | | /*width of the input YUV to be encoded */ |
3479 | 3.57k | u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; |
3480 | | /*making the width a multiple of CTB size*/ |
3481 | 3.57k | u4_width += SET_CTB_ALIGN( |
3482 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE); |
3483 | | |
3484 | | /*height of the input YUV to be encoded */ |
3485 | 3.57k | u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; |
3486 | | /*making the height a multiple of CTB size*/ |
3487 | 3.57k | u4_height += SET_CTB_ALIGN( |
3488 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE); |
3489 | 3.57k | u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE); |
3490 | 3.57k | u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE); |
3491 | | /* memories should be requested assuming worst case requirememnts */ |
3492 | | |
3493 | | /* Module context structure */ |
3494 | 3.57k | ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t); |
3495 | | |
3496 | 3.57k | ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3497 | | |
3498 | 3.57k | ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8; |
3499 | | |
3500 | | /* Thread context structure */ |
3501 | 3.57k | ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size = |
3502 | 3.57k | i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t); |
3503 | | |
3504 | 3.57k | ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3505 | | |
3506 | 3.57k | ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16; |
3507 | | |
3508 | | /* Scale matrices */ |
3509 | 3.57k | ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16); |
3510 | | |
3511 | 3.57k | ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3512 | | |
3513 | 3.57k | ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8; |
3514 | | |
3515 | | /* Rescale matrices */ |
3516 | 3.57k | ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16); |
3517 | | |
3518 | 3.57k | ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3519 | | |
3520 | 3.57k | ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8; |
3521 | | |
3522 | | /* top row luma one row of pixel data per CTB row */ |
3523 | 3.57k | if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) |
3524 | 0 | { |
3525 | 0 | ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) * |
3526 | 0 | (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) * |
3527 | 0 | i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; |
3528 | 0 | } |
3529 | 3.57k | else |
3530 | 3.57k | { |
3531 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) * |
3532 | 3.57k | (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) * |
3533 | 3.57k | i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; |
3534 | 3.57k | } |
3535 | | |
3536 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3537 | | |
3538 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8; |
3539 | | |
3540 | | /* top row chroma */ |
3541 | 3.57k | if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) |
3542 | 0 | { |
3543 | 0 | ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size = |
3544 | 0 | (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) * |
3545 | 0 | i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; |
3546 | 0 | } |
3547 | 3.57k | else |
3548 | 3.57k | { |
3549 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size = |
3550 | 3.57k | (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) * |
3551 | 3.57k | i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; |
3552 | 3.57k | } |
3553 | | |
3554 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3555 | | |
3556 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8; |
3557 | | |
3558 | | /* top row neighbour 4x4 */ |
3559 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size = |
3560 | 3.57k | (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) * |
3561 | 3.57k | i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel; |
3562 | | |
3563 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3564 | | |
3565 | 3.57k | ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8; |
3566 | | |
3567 | | /* memory to dump rate control parameters by each thread for each bit-rate instance */ |
3568 | | /* RC params collated by each thread for each bit-rate instance separately */ |
3569 | 3.57k | ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel * |
3570 | 3.57k | i4_num_proc_thrds * sizeof(enc_loop_rc_params_t); |
3571 | | |
3572 | 3.57k | ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3573 | | |
3574 | 3.57k | ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8; |
3575 | | /* Memory required for deblocking */ |
3576 | 3.57k | { |
3577 | | /* Memory to store Qp of top4x4 blocks for each CTB row. |
3578 | | This memory is allocated at frame level and shared across |
3579 | | all cores. The Qp values are needed to form Qp-map(described |
3580 | | in the ENC_LOOP_DEBLOCKING section below)*/ |
3581 | | |
3582 | 3.57k | UWORD32 u4_size_bs_memory, u4_size_qp_memory; |
3583 | 3.57k | UWORD32 u4_size_top_4x4_qp_memory; |
3584 | | |
3585 | | /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/ |
3586 | | /*Space required per CTB*/ |
3587 | 3.57k | u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4); |
3588 | | /*Space required for entire CTB row*/ |
3589 | 3.57k | u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row; |
3590 | | /*Space required for entire frame*/ |
3591 | 3.57k | u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame; |
3592 | | /*Space required for multiple bitrate*/ |
3593 | 3.57k | u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst; |
3594 | | /*Space required for multiple frames in parallel*/ |
3595 | 3.57k | u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel; |
3596 | | |
3597 | 3.57k | ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory; |
3598 | 3.57k | ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3599 | 3.57k | ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8; |
3600 | | |
3601 | | /* Memory allocation of BS and Qp-map for deblocking at CTB-row level: |
3602 | | ## Boundary Strength(Vertical): |
3603 | | BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB |
3604 | | of the row followed by 8 entries of second CTB and so on. |
3605 | | 8 entries: Includes left edge of current CTB and excludes right edge. |
3606 | | ## Boundary Strength(Horizontal): |
3607 | | Same as Vertical. |
3608 | | 8 entries: Includes top edge of current CTB and excludes bottom edge. |
3609 | | |
3610 | | ## Qp-map storage: |
3611 | | T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row |
3612 | | 00 01 02 03 04 05 ..........to the end of the CTB row |
3613 | | 10 11 12 13 14 15 ..........to the end of the CTB row |
3614 | | 20 21 22 23 24 25 ..........to the end of the CTB row |
3615 | | 30 31 32 33 34 35 ..........to the end of the CTB row |
3616 | | 40 41 42 43 44 45 ..........to the end of the CTB row |
3617 | | ............................to the end of the CTB row |
3618 | | upto height_of_CTB..........to the end of the CTB row |
3619 | | |
3620 | | Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4). |
3621 | | A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB). |
3622 | | where, |
3623 | | => height_of_CTB = number of 4x4 blocks in a CTB vertically, |
3624 | | => +1 is done to store Qp of lowest 4x4-block layer of top-CTB |
3625 | | in order to deblock top edge of current CTB. |
3626 | | => width_of_CTB = number of 4x4 blocks in a CTB horizontally, |
3627 | | */ |
3628 | | |
3629 | | /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/ |
3630 | | /*1 vertical edge per 8 pixel*/ |
3631 | 3.57k | u4_size_bs_memory = (MAX_CTB_SIZE >> 3); |
3632 | | /*Vertical edges for entire width of CTB row*/ |
3633 | 3.57k | u4_size_bs_memory *= u4_ctb_in_a_row; |
3634 | | /*Each vertical edge of CTB row is 4 bytes*/ |
3635 | 3.57k | u4_size_bs_memory = u4_size_bs_memory << 2; |
3636 | | /*Adding Memory required for storing horizontal BS by doubling*/ |
3637 | 3.57k | u4_size_bs_memory = u4_size_bs_memory << 1; |
3638 | | |
3639 | | /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/ |
3640 | | /*Number of 4x4 blocks in the width of a CTB*/ |
3641 | 3.57k | u4_size_qp_memory = (MAX_CTB_SIZE >> 2); |
3642 | | /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest |
3643 | | 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/ |
3644 | 3.57k | u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1); |
3645 | | /*Storage for entire CTB row*/ |
3646 | 3.57k | u4_size_qp_memory *= u4_ctb_in_a_row; |
3647 | | |
3648 | | /*Multiplying by i4_num_proc_thrds to assign memory for each core*/ |
3649 | 3.57k | ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size = |
3650 | 3.57k | i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory); |
3651 | | |
3652 | 3.57k | ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3653 | | |
3654 | 3.57k | ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8; |
3655 | 3.57k | } |
3656 | | |
3657 | | /* Memory required to store pred for 422 chroma */ |
3658 | 3.57k | ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size = |
3659 | 3.57k | i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 * |
3660 | 3.57k | (i4_chroma_format == IV_YUV_422SP_UV) * |
3661 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3662 | | |
3663 | 3.57k | ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3664 | | |
3665 | 3.57k | ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8; |
3666 | | |
3667 | | /* Memory for inter pred buffers */ |
3668 | 3.57k | { |
3669 | 3.57k | WORD32 i4_num_bufs_per_thread = 0; |
3670 | | |
3671 | 3.57k | WORD32 i4_buf_size_per_cand = |
3672 | 3.57k | (MAX_CTB_SIZE) * (MAX_CTB_SIZE) * |
3673 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3674 | 3.57k | WORD32 i4_quality_preset = |
3675 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; |
3676 | 3.57k | switch(i4_quality_preset) |
3677 | 3.57k | { |
3678 | 1.72k | case IHEVCE_QUALITY_P0: |
3679 | 1.72k | { |
3680 | 1.72k | i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ; |
3681 | 1.72k | break; |
3682 | 0 | } |
3683 | 280 | case IHEVCE_QUALITY_P2: |
3684 | 280 | { |
3685 | 280 | i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ; |
3686 | 280 | break; |
3687 | 0 | } |
3688 | 396 | case IHEVCE_QUALITY_P3: |
3689 | 396 | { |
3690 | 396 | i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS; |
3691 | 396 | break; |
3692 | 0 | } |
3693 | 253 | case IHEVCE_QUALITY_P4: |
3694 | 253 | { |
3695 | 253 | i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS; |
3696 | 253 | break; |
3697 | 0 | } |
3698 | 299 | case IHEVCE_QUALITY_P5: |
3699 | 677 | case IHEVCE_QUALITY_P6: |
3700 | 919 | case IHEVCE_QUALITY_P7: |
3701 | 919 | { |
3702 | 919 | i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES; |
3703 | 919 | break; |
3704 | 677 | } |
3705 | 0 | default: |
3706 | 0 | { |
3707 | 0 | ASSERT(0); |
3708 | 0 | } |
3709 | 3.57k | } |
3710 | | |
3711 | 3.57k | i4_num_bufs_per_thread += 4; |
3712 | | |
3713 | 3.57k | ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size = |
3714 | 3.57k | i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand; |
3715 | | |
3716 | 3.57k | ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3717 | | |
3718 | 3.57k | ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8; |
3719 | 3.57k | } |
3720 | | |
3721 | | /* Memory required to store chroma intra pred */ |
3722 | 0 | ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size = |
3723 | 3.57k | i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * |
3724 | 3.57k | ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) * |
3725 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3726 | | |
3727 | 3.57k | ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3728 | | |
3729 | 3.57k | ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8; |
3730 | | |
3731 | | /* Memory required to store pred for reference substitution output */ |
3732 | | /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed, |
3733 | | allocate 16 bytes to the left and 7 bytes to the right to facilitate |
3734 | | SIMD access */ |
3735 | 3.57k | ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size = |
3736 | 3.57k | i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING) |
3737 | 3.57k | + INTRAPRED_SIMD_LEFT_PADDING)* |
3738 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3739 | | |
3740 | 3.57k | ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3741 | | |
3742 | 3.57k | ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8; |
3743 | | |
3744 | | /* Memory required to store pred for reference filtering output */ |
3745 | | /* While (MAX_TU_SIZE * 2 * 2) + 1 is the actual size needed, |
3746 | | allocate 16 bytes to the left and 7 bytes to the right to facilitate |
3747 | | SIMD access */ |
3748 | 3.57k | ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size = |
3749 | 3.57k | i4_num_proc_thrds * (((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING) |
3750 | 3.57k | + INTRAPRED_SIMD_LEFT_PADDING)* |
3751 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3752 | | |
3753 | 3.57k | ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3754 | | |
3755 | 3.57k | ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8; |
3756 | | |
3757 | 3.57k | #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
3758 | 3.57k | if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0) |
3759 | 1.72k | #endif |
3760 | 1.72k | { |
3761 | | /* Memory assignments for recon storage during CU Recursion */ |
3762 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = |
3763 | 1.72k | i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) * |
3764 | 1.72k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3765 | | |
3766 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3767 | | |
3768 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8; |
3769 | | |
3770 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = |
3771 | 1.72k | i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) * |
3772 | 1.72k | ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) * |
3773 | 1.72k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3774 | | |
3775 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3776 | | |
3777 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8; |
3778 | 1.72k | } |
3779 | 1.84k | #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
3780 | 1.84k | else |
3781 | 1.84k | { |
3782 | | /* Memory assignments for recon storage during CU Recursion */ |
3783 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0; |
3784 | | |
3785 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3786 | | |
3787 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8; |
3788 | | |
3789 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0; |
3790 | | |
3791 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3792 | | |
3793 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8; |
3794 | 1.84k | } |
3795 | 3.57k | #endif |
3796 | | |
3797 | 3.57k | #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
3798 | 3.57k | if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0) |
3799 | 1.72k | #endif |
3800 | 1.72k | { |
3801 | | /* Memory assignments for pred storage during CU Recursion */ |
3802 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = |
3803 | 1.72k | i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) * |
3804 | 1.72k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3805 | | |
3806 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3807 | | |
3808 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8; |
3809 | | |
3810 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = |
3811 | 1.72k | i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) * |
3812 | 1.72k | ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) * |
3813 | 1.72k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3814 | | |
3815 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3816 | | |
3817 | 1.72k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8; |
3818 | 1.72k | } |
3819 | 1.84k | #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
3820 | 1.84k | else |
3821 | 1.84k | { |
3822 | | /* Memory assignments for pred storage during CU Recursion */ |
3823 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0; |
3824 | | |
3825 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3826 | | |
3827 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8; |
3828 | | |
3829 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0; |
3830 | | |
3831 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3832 | | |
3833 | 1.84k | ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8; |
3834 | 1.84k | } |
3835 | 3.57k | #endif |
3836 | | |
3837 | | /* Memory assignments for CTB left luma data storage */ |
3838 | 3.57k | ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size = |
3839 | 3.57k | i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) * |
3840 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3841 | | |
3842 | 3.57k | ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3843 | | |
3844 | 3.57k | ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8; |
3845 | | |
3846 | | /* Memory assignments for CTB left chroma data storage */ |
3847 | 3.57k | ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size = |
3848 | 3.57k | i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) * |
3849 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
3850 | 3.57k | ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<= |
3851 | 3.57k | ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0); |
3852 | | |
3853 | 3.57k | ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3854 | | |
3855 | 3.57k | ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8; |
3856 | | |
3857 | | /* Memory required for SAO */ |
3858 | 3.57k | { |
3859 | 3.57k | WORD32 num_vert_units; |
3860 | 3.57k | WORD32 num_horz_units; |
3861 | 3.57k | WORD32 ctb_aligned_ht, ctb_aligned_wd; |
3862 | 3.57k | WORD32 luma_buf, chroma_buf; |
3863 | | |
3864 | 3.57k | num_vert_units = u4_height / MAX_CTB_SIZE; |
3865 | 3.57k | num_horz_units = u4_width / MAX_CTB_SIZE; |
3866 | | |
3867 | 3.57k | ctb_aligned_ht = u4_height; |
3868 | 3.57k | ctb_aligned_wd = u4_width; |
3869 | | |
3870 | | /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0 |
3871 | | * and 1 extra location is required for top left buf ptr for row 0 |
3872 | | * Also 1 extra byte is required for every row for top left pixel if |
3873 | | * the top left ptr is to be passed to leaf level unconditionally |
3874 | | */ |
3875 | 3.57k | luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) * |
3876 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
3877 | 3.57k | chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) * |
3878 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
3879 | | |
3880 | 3.57k | ps_mem_tab[ENC_LOOP_SAO].i4_mem_size = |
3881 | 3.57k | (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel); |
3882 | | |
3883 | | /* Add the memory required to store the sao information of top ctb for top merge |
3884 | | * This is frame level buffer. |
3885 | | */ |
3886 | 3.57k | ps_mem_tab[ENC_LOOP_SAO].i4_mem_size += |
3887 | 3.57k | ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) * |
3888 | 3.57k | (i4_num_enc_loop_frm_pllel); |
3889 | | |
3890 | 3.57k | ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3891 | | |
3892 | 3.57k | ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8; |
3893 | 3.57k | } |
3894 | | |
3895 | | /* Memory for CU level Coeff data buffer */ |
3896 | 3.57k | { |
3897 | | /* 16 additional bytes are required to ensure alignment */ |
3898 | 3.57k | { |
3899 | 3.57k | ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size = |
3900 | 3.57k | i4_num_proc_thrds * |
3901 | 3.57k | (((MAX_LUMA_COEFFS_CTB + |
3902 | 3.57k | (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) + |
3903 | 3.57k | 16) * |
3904 | 3.57k | (2) * sizeof(UWORD8)); |
3905 | 3.57k | } |
3906 | | |
3907 | 3.57k | ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3908 | | |
3909 | 3.57k | ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16; |
3910 | | |
3911 | 3.57k | ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size = |
3912 | 3.57k | i4_num_proc_thrds * |
3913 | 3.57k | (MAX_LUMA_COEFFS_CTB + |
3914 | 3.57k | (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) * |
3915 | 3.57k | sizeof(UWORD8); |
3916 | | |
3917 | 3.57k | ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3918 | | |
3919 | 3.57k | ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16; |
3920 | 3.57k | } |
3921 | | |
3922 | | /* Memory for CU dequant data buffer */ |
3923 | 3.57k | { |
3924 | | /* 16 additional bytes are required to ensure alignment */ |
3925 | 3.57k | { |
3926 | 3.57k | ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size = |
3927 | 3.57k | i4_num_proc_thrds * |
3928 | 3.57k | (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1)) |
3929 | 3.57k | : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) + |
3930 | 3.57k | 8) * |
3931 | 3.57k | (2) * sizeof(WORD16); |
3932 | 3.57k | } |
3933 | | |
3934 | 3.57k | ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
3935 | | |
3936 | 3.57k | ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16; |
3937 | 3.57k | } |
3938 | | |
3939 | | /* Memory for Recon Datastore (Used around and within the RDOPT loop) */ |
3940 | 3.57k | { |
3941 | 3.57k | WORD32 i4_memSize_perThread; |
3942 | | |
3943 | 3.57k | WORD32 i4_chroma_memSize_perThread = 0; |
3944 | | /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */ |
3945 | | /* used in RDOPT to store cur and best modes' data */ |
3946 | 3.57k | WORD32 i4_luma_memSize_perThread = |
3947 | 3.57k | 4 * MAX_CU_SIZE * MAX_CU_SIZE * |
3948 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
3949 | | |
3950 | | /* 'Glossary' for comments in the following codeBlock */ |
3951 | | /* 1 - 2 Bufs for storing recons of the best modes determined in the */ |
3952 | | /* function 'ihevce_intra_chroma_pred_mode_selector' */ |
3953 | | /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */ |
3954 | | /* used in RDOPT to store cur and best modes' data */ |
3955 | 3.57k | if(i4_chroma_format == IV_YUV_422SP_UV) |
3956 | 0 | { |
3957 | 0 | WORD32 i4_quality_preset = |
3958 | 0 | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; |
3959 | 0 | switch(i4_quality_preset) |
3960 | 0 | { |
3961 | 0 | case IHEVCE_QUALITY_P0: |
3962 | 0 | { |
3963 | | /* 1 */ |
3964 | 0 | i4_chroma_memSize_perThread += |
3965 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ * |
3966 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
3967 | | |
3968 | | /* 2 */ |
3969 | 0 | i4_chroma_memSize_perThread += |
3970 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ * |
3971 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
3972 | |
|
3973 | 0 | break; |
3974 | 0 | } |
3975 | 0 | case IHEVCE_QUALITY_P2: |
3976 | 0 | { |
3977 | | /* 1 */ |
3978 | 0 | i4_chroma_memSize_perThread += |
3979 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ * |
3980 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
3981 | | |
3982 | | /* 2 */ |
3983 | 0 | i4_chroma_memSize_perThread += |
3984 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ * |
3985 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
3986 | |
|
3987 | 0 | break; |
3988 | 0 | } |
3989 | 0 | case IHEVCE_QUALITY_P3: |
3990 | 0 | { |
3991 | | /* 1 */ |
3992 | 0 | i4_chroma_memSize_perThread += |
3993 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS * |
3994 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
3995 | | |
3996 | | /* 2 */ |
3997 | 0 | i4_chroma_memSize_perThread += |
3998 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS * |
3999 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4000 | |
|
4001 | 0 | break; |
4002 | 0 | } |
4003 | 0 | case IHEVCE_QUALITY_P4: |
4004 | 0 | { |
4005 | | /* 1 */ |
4006 | 0 | i4_chroma_memSize_perThread += |
4007 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS * |
4008 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4009 | | |
4010 | | /* 2 */ |
4011 | 0 | i4_chroma_memSize_perThread += |
4012 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS * |
4013 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4014 | |
|
4015 | 0 | break; |
4016 | 0 | } |
4017 | 0 | case IHEVCE_QUALITY_P5: |
4018 | 0 | { |
4019 | | /* 1 */ |
4020 | 0 | i4_chroma_memSize_perThread += |
4021 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS * |
4022 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4023 | | |
4024 | | /* 2 */ |
4025 | 0 | i4_chroma_memSize_perThread += |
4026 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS * |
4027 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4028 | |
|
4029 | 0 | break; |
4030 | 0 | } |
4031 | 0 | case IHEVCE_QUALITY_P6: |
4032 | 0 | case IHEVCE_QUALITY_P7: |
4033 | 0 | { |
4034 | | /* 1 */ |
4035 | 0 | i4_chroma_memSize_perThread += |
4036 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 * |
4037 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4038 | | |
4039 | | /* 2 */ |
4040 | 0 | i4_chroma_memSize_perThread += |
4041 | 0 | 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 * |
4042 | 0 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4043 | |
|
4044 | 0 | break; |
4045 | 0 | } |
4046 | 0 | } |
4047 | 0 | } |
4048 | 3.57k | else |
4049 | 3.57k | { |
4050 | 3.57k | WORD32 i4_quality_preset = |
4051 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; |
4052 | 3.57k | switch(i4_quality_preset) |
4053 | 3.57k | { |
4054 | 1.72k | case IHEVCE_QUALITY_P0: |
4055 | 1.72k | { |
4056 | | /* 1 */ |
4057 | 1.72k | i4_chroma_memSize_perThread += |
4058 | 1.72k | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ * |
4059 | 1.72k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4060 | | |
4061 | | /* 2 */ |
4062 | 1.72k | i4_chroma_memSize_perThread += |
4063 | 1.72k | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * |
4064 | 1.72k | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ * |
4065 | 1.72k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4066 | | |
4067 | 1.72k | break; |
4068 | 0 | } |
4069 | 280 | case IHEVCE_QUALITY_P2: |
4070 | 280 | { |
4071 | | /* 1 */ |
4072 | 280 | i4_chroma_memSize_perThread += |
4073 | 280 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ * |
4074 | 280 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4075 | | |
4076 | | /* 2 */ |
4077 | 280 | i4_chroma_memSize_perThread += |
4078 | 280 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * |
4079 | 280 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ * |
4080 | 280 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4081 | | |
4082 | 280 | break; |
4083 | 0 | } |
4084 | 396 | case IHEVCE_QUALITY_P3: |
4085 | 396 | { |
4086 | | /* 1 */ |
4087 | 396 | i4_chroma_memSize_perThread += |
4088 | 396 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS * |
4089 | 396 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4090 | | |
4091 | | /* 2 */ |
4092 | 396 | i4_chroma_memSize_perThread += |
4093 | 396 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * |
4094 | 396 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS * |
4095 | 396 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4096 | | |
4097 | 396 | break; |
4098 | 0 | } |
4099 | 253 | case IHEVCE_QUALITY_P4: |
4100 | 253 | { |
4101 | | /* 1 */ |
4102 | 253 | i4_chroma_memSize_perThread += |
4103 | 253 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS * |
4104 | 253 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4105 | | |
4106 | | /* 2 */ |
4107 | 253 | i4_chroma_memSize_perThread += |
4108 | 253 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * |
4109 | 253 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS * |
4110 | 253 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4111 | | |
4112 | 253 | break; |
4113 | 0 | } |
4114 | 299 | case IHEVCE_QUALITY_P5: |
4115 | 299 | { |
4116 | | /* 1 */ |
4117 | 299 | i4_chroma_memSize_perThread += |
4118 | 299 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS * |
4119 | 299 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4120 | | |
4121 | | /* 2 */ |
4122 | 299 | i4_chroma_memSize_perThread += |
4123 | 299 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * |
4124 | 299 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS * |
4125 | 299 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4126 | | |
4127 | 299 | break; |
4128 | 0 | } |
4129 | 378 | case IHEVCE_QUALITY_P6: |
4130 | 620 | case IHEVCE_QUALITY_P7: |
4131 | 620 | { |
4132 | | /* 1 */ |
4133 | 620 | i4_chroma_memSize_perThread += |
4134 | 620 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 * |
4135 | 620 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4136 | | |
4137 | | /* 2 */ |
4138 | 620 | i4_chroma_memSize_perThread += |
4139 | 620 | 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * |
4140 | 620 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 * |
4141 | 620 | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1); |
4142 | | |
4143 | 620 | break; |
4144 | 378 | } |
4145 | 3.57k | } |
4146 | 3.57k | } |
4147 | | |
4148 | 3.57k | i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread; |
4149 | | |
4150 | 3.57k | ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size = |
4151 | 3.57k | i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8); |
4152 | | |
4153 | 3.57k | ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
4154 | | |
4155 | 3.57k | ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16; |
4156 | 3.57k | } |
4157 | | |
4158 | 0 | n_tabs = NUM_ENC_LOOP_MEM_RECS; |
4159 | | |
4160 | | /*************************************************************************/ |
4161 | | /* --- EncLoop Deblock and SAO sync Dep Mngr Mem requests -- */ |
4162 | | /*************************************************************************/ |
4163 | | |
4164 | | /* Fill the memtabs for EncLoop Deblock Dep Mngr */ |
4165 | 3.57k | { |
4166 | 3.57k | WORD32 count; |
4167 | 3.57k | WORD32 num_vert_units; |
4168 | 3.57k | WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; |
4169 | | |
4170 | 3.57k | ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); |
4171 | 3.57k | ASSERT(num_vert_units > 0); |
4172 | 7.14k | for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) |
4173 | 3.57k | { |
4174 | 7.14k | for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++) |
4175 | 3.57k | { |
4176 | 3.57k | n_tabs += ihevce_dmgr_get_mem_recs( |
4177 | 3.57k | &ps_mem_tab[n_tabs], |
4178 | 3.57k | DEP_MNGR_ROW_ROW_SYNC, |
4179 | 3.57k | num_vert_units, |
4180 | 3.57k | ps_init_prms->s_app_tile_params.i4_num_tile_cols, |
4181 | 3.57k | i4_num_proc_thrds, |
4182 | 3.57k | i4_mem_space); |
4183 | 3.57k | } |
4184 | 3.57k | } |
4185 | | |
4186 | | /* Fill the memtabs for EncLoop SAO Dep Mngr */ |
4187 | 7.14k | for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) |
4188 | 3.57k | { |
4189 | 7.14k | for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++) |
4190 | 3.57k | { |
4191 | 3.57k | n_tabs += ihevce_dmgr_get_mem_recs( |
4192 | 3.57k | &ps_mem_tab[n_tabs], |
4193 | 3.57k | DEP_MNGR_ROW_ROW_SYNC, |
4194 | 3.57k | num_vert_units, |
4195 | 3.57k | ps_init_prms->s_app_tile_params.i4_num_tile_cols, |
4196 | 3.57k | i4_num_proc_thrds, |
4197 | 3.57k | i4_mem_space); |
4198 | 3.57k | } |
4199 | 3.57k | } |
4200 | 3.57k | } |
4201 | | |
4202 | | /*************************************************************************/ |
4203 | | /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests -- */ |
4204 | | /*************************************************************************/ |
4205 | | |
4206 | | /* Fill the memtabs for Top-Right CU sync Dep Mngr */ |
4207 | 3.57k | { |
4208 | 3.57k | WORD32 count; |
4209 | 3.57k | WORD32 num_vert_units; |
4210 | 3.57k | WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; |
4211 | 3.57k | ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); |
4212 | 3.57k | ASSERT(num_vert_units > 0); |
4213 | | |
4214 | 7.14k | for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) |
4215 | 3.57k | { |
4216 | 7.14k | for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++) |
4217 | 3.57k | { |
4218 | 3.57k | n_tabs += ihevce_dmgr_get_mem_recs( |
4219 | 3.57k | &ps_mem_tab[n_tabs], |
4220 | 3.57k | DEP_MNGR_ROW_ROW_SYNC, |
4221 | 3.57k | num_vert_units, |
4222 | 3.57k | ps_init_prms->s_app_tile_params.i4_num_tile_cols, |
4223 | 3.57k | i4_num_proc_thrds, |
4224 | 3.57k | i4_mem_space); |
4225 | 3.57k | } |
4226 | 3.57k | } |
4227 | 3.57k | } |
4228 | | |
4229 | | /*************************************************************************/ |
4230 | | /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests -- */ |
4231 | | /*************************************************************************/ |
4232 | | |
4233 | | /* Fill the memtabs for EncLoop Aux. on Ref. bitrate Dep Mngr */ |
4234 | 3.57k | { |
4235 | 3.57k | WORD32 count; |
4236 | 3.57k | WORD32 num_vert_units; |
4237 | 3.57k | WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; |
4238 | | |
4239 | 3.57k | ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); |
4240 | 3.57k | ASSERT(num_vert_units > 0); |
4241 | | |
4242 | 7.14k | for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) |
4243 | 3.57k | { |
4244 | 3.57k | for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++) |
4245 | 0 | { |
4246 | 0 | n_tabs += ihevce_dmgr_get_mem_recs( |
4247 | 0 | &ps_mem_tab[n_tabs], |
4248 | 0 | DEP_MNGR_ROW_ROW_SYNC, |
4249 | 0 | num_vert_units, |
4250 | 0 | ps_init_prms->s_app_tile_params.i4_num_tile_cols, |
4251 | 0 | i4_num_proc_thrds, |
4252 | 0 | i4_mem_space); |
4253 | 0 | } |
4254 | 3.57k | } |
4255 | 3.57k | } |
4256 | | |
4257 | 3.57k | return (n_tabs); |
4258 | 3.57k | } |
4259 | | |
4260 | | /*! |
4261 | | ****************************************************************************** |
4262 | | * \if Function name : ihevce_enc_loop_init \endif |
4263 | | * |
4264 | | * \brief |
4265 | | * Intialization for ENC_LOOP context state structure . |
4266 | | * |
4267 | | * \param[in] ps_mem_tab : pointer to memory descriptors table |
4268 | | * \param[in] ps_init_prms : Create time static parameters |
4269 | | * \param[in] pv_osal_handle : Osal handle |
4270 | | * |
4271 | | * \return |
4272 | | * None |
4273 | | * |
4274 | | * \author |
4275 | | * Ittiam |
4276 | | * |
4277 | | ***************************************************************************** |
4278 | | */ |
4279 | | void *ihevce_enc_loop_init( |
4280 | | iv_mem_rec_t *ps_mem_tab, |
4281 | | ihevce_static_cfg_params_t *ps_init_prms, |
4282 | | WORD32 i4_num_proc_thrds, |
4283 | | void *pv_osal_handle, |
4284 | | func_selector_t *ps_func_selector, |
4285 | | rc_quant_t *ps_rc_quant_ctxt, |
4286 | | ihevce_tile_params_t *ps_tile_params_base, |
4287 | | WORD32 i4_resolution_id, |
4288 | | WORD32 i4_num_enc_loop_frm_pllel, |
4289 | | UWORD8 u1_is_popcnt_available) |
4290 | 3.57k | { |
4291 | 3.57k | ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; |
4292 | 3.57k | ihevce_enc_loop_ctxt_t *ps_ctxt; |
4293 | 3.57k | WORD32 ctr, n_tabs; |
4294 | 3.57k | UWORD32 u4_width, u4_height; |
4295 | 3.57k | UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame; |
4296 | 3.57k | UWORD32 u4_size_bs_memory, u4_size_qp_memory; |
4297 | 3.57k | UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/ |
4298 | 3.57k | WORD32 i; |
4299 | 3.57k | WORD32 i4_num_bitrate_inst = |
4300 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances; |
4301 | 3.57k | enc_loop_rc_params_t *ps_enc_loop_rc_params; |
4302 | 3.57k | UWORD8 *pu1_sao_base; /* store the base address of sao*/ |
4303 | 3.57k | UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units; |
4304 | 3.57k | WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format; |
4305 | 3.57k | WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8); |
4306 | 3.57k | WORD32 i4_enc_frm_id; |
4307 | 3.57k | WORD32 num_cu_in_ctb; |
4308 | 3.57k | WORD32 i4_num_tile_cols = 1; //Default value is 1 |
4309 | | |
4310 | | /* ENC_LOOP state structure */ |
4311 | 3.57k | ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base; |
4312 | | |
4313 | 3.57k | ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds; |
4314 | | |
4315 | 3.57k | ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base; |
4316 | 3.57k | ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base; |
4317 | 3.57k | ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt; |
4318 | | /*Calculation of memory sizes for deblocking*/ |
4319 | 3.57k | { |
4320 | | /*width of the input YUV to be encoded. */ |
4321 | 3.57k | u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; |
4322 | | /*making the width a multiple of CTB size*/ |
4323 | 3.57k | u4_width += SET_CTB_ALIGN( |
4324 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE); |
4325 | | |
4326 | 3.57k | u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE); |
4327 | | |
4328 | | /*height of the input YUV to be encoded */ |
4329 | 3.57k | u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; |
4330 | | /*making the height a multiple of CTB size*/ |
4331 | 3.57k | u4_height += SET_CTB_ALIGN( |
4332 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE); |
4333 | | |
4334 | 3.57k | u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE); |
4335 | | |
4336 | | /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/ |
4337 | | /*1 vertical edge per 8 pixel*/ |
4338 | 3.57k | u4_size_bs_memory = (MAX_CTB_SIZE >> 3); |
4339 | | /*Vertical edges for entire width of CTB row*/ |
4340 | 3.57k | u4_size_bs_memory *= u4_ctb_in_a_row; |
4341 | | /*Each vertical edge of CTB row is 4 bytes*/ |
4342 | 3.57k | u4_size_bs_memory = u4_size_bs_memory << 2; |
4343 | | /*Adding Memory required for storing horizontal BS by doubling*/ |
4344 | 3.57k | u4_size_bs_memory = u4_size_bs_memory << 1; |
4345 | | |
4346 | | /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/ |
4347 | | /*Number of 4x4 blocks in the width of a CTB*/ |
4348 | 3.57k | u4_size_qp_memory = (MAX_CTB_SIZE >> 2); |
4349 | | /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest |
4350 | | 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/ |
4351 | 3.57k | u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1); |
4352 | | /*Storage for entire CTB row*/ |
4353 | 3.57k | u4_size_qp_memory *= u4_ctb_in_a_row; |
4354 | | |
4355 | 3.57k | pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base; |
4356 | 3.57k | } |
4357 | | |
4358 | | /*Derive the base pointer of sao*/ |
4359 | 3.57k | pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base; |
4360 | 3.57k | ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size); |
4361 | 3.57k | u4_ctb_aligned_wd = u4_width; |
4362 | 3.57k | u4_ctb_aligned_ht = u4_height; |
4363 | 3.57k | num_vert_units = (u4_height) / ctb_size; |
4364 | | |
4365 | 7.14k | for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++) |
4366 | 3.57k | { |
4367 | 3.57k | ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt; |
4368 | | /* Store Tile params base into EncLoop context */ |
4369 | 3.57k | ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base; |
4370 | 3.57k | ihevce_cmn_utils_instr_set_router( |
4371 | 3.57k | &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type); |
4372 | 3.57k | ihevce_sifter_sad_fxn_assigner( |
4373 | 3.57k | (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type); |
4374 | 3.57k | ps_ctxt->i4_max_search_range_horizontal = |
4375 | 3.57k | ps_init_prms->s_config_prms.i4_max_search_range_horz; |
4376 | 3.57k | ps_ctxt->i4_max_search_range_vertical = |
4377 | 3.57k | ps_init_prms->s_config_prms.i4_max_search_range_vert; |
4378 | | |
4379 | 3.57k | ps_ctxt->i4_quality_preset = |
4380 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; |
4381 | | |
4382 | 3.57k | if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7) |
4383 | 242 | { |
4384 | 242 | ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6; |
4385 | 242 | } |
4386 | | |
4387 | 3.57k | ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds; |
4388 | | |
4389 | 3.57k | ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass; |
4390 | | |
4391 | 3.57k | ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1; |
4392 | | |
4393 | 3.57k | ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type; |
4394 | | |
4395 | 3.57k | ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base; |
4396 | | |
4397 | 3.57k | ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base; |
4398 | | |
4399 | 3.57k | if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2) |
4400 | 1.72k | { |
4401 | 1.72k | ps_ctxt->i4_use_ctb_level_lamda = 0; |
4402 | 1.72k | } |
4403 | 1.84k | else |
4404 | 1.84k | { |
4405 | 1.84k | ps_ctxt->i4_use_ctb_level_lamda = 0; |
4406 | 1.84k | } |
4407 | | |
4408 | | /** Register the function selector pointer*/ |
4409 | 3.57k | ps_ctxt->ps_func_selector = ps_func_selector; |
4410 | | |
4411 | 3.57k | ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector; |
4412 | | |
4413 | | /* Initiallization for non-distributed mode */ |
4414 | 3.57k | ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0; |
4415 | 3.57k | ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0; |
4416 | 3.57k | ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0; |
4417 | 3.57k | ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0; |
4418 | | |
4419 | 3.57k | ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector; |
4420 | 3.57k | ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1); |
4421 | | |
4422 | 3.57k | ps_ctxt->i4_frm_top_row_luma_size = |
4423 | 3.57k | ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1); |
4424 | | |
4425 | 3.57k | ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2); |
4426 | | |
4427 | 3.57k | ps_ctxt->i4_frm_top_row_chroma_size = |
4428 | 3.57k | ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1); |
4429 | | |
4430 | 3.57k | { |
4431 | 7.14k | for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) |
4432 | 3.57k | { |
4433 | | /* +1 is to provision top left pel */ |
4434 | 3.57k | ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] = |
4435 | 3.57k | (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 + |
4436 | 3.57k | (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst); |
4437 | | |
4438 | | /* pointer incremented by 1 row to avoid OOB access in 0th row */ |
4439 | 3.57k | ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] = |
4440 | 3.57k | (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] + |
4441 | 3.57k | ps_ctxt->i4_top_row_luma_stride; |
4442 | | |
4443 | | /* +2 is to provision top left pel */ |
4444 | 3.57k | ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] = |
4445 | 3.57k | (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 + |
4446 | 3.57k | (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst); |
4447 | | |
4448 | | /* pointer incremented by 1 row to avoid OOB access in 0th row */ |
4449 | 3.57k | ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] = |
4450 | 3.57k | (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] + |
4451 | 3.57k | ps_ctxt->i4_top_row_chroma_stride; |
4452 | 3.57k | } |
4453 | 3.57k | } |
4454 | | |
4455 | | /* +1 is to provision top left nbr */ |
4456 | 3.57k | ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1); |
4457 | 3.57k | ps_ctxt->i4_frm_top_row_nbr_size = |
4458 | 3.57k | ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1); |
4459 | 7.14k | for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) |
4460 | 3.57k | { |
4461 | 3.57k | ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] = |
4462 | 3.57k | (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 + |
4463 | 3.57k | (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst); |
4464 | 3.57k | ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride; |
4465 | 3.57k | } |
4466 | | |
4467 | 3.57k | num_cu_in_ctb = ctb_size / MIN_CU_SIZE; |
4468 | 3.57k | num_cu_in_ctb *= num_cu_in_ctb; |
4469 | | |
4470 | | /* pointer incremented by 1 row to avoid OOB access in 0th row */ |
4471 | | |
4472 | | /* Memory for CU level Coeff data buffer */ |
4473 | 3.57k | { |
4474 | 3.57k | WORD32 i4_16byte_boundary_overshoot; |
4475 | 3.57k | WORD32 buf_size_per_cu; |
4476 | 3.57k | WORD32 buf_size_per_thread_wo_alignment_req; |
4477 | 3.57k | WORD32 buf_size_per_thread; |
4478 | | |
4479 | 3.57k | buf_size_per_cu = |
4480 | 3.57k | ((MAX_LUMA_COEFFS_CTB + |
4481 | 3.57k | (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) + |
4482 | 3.57k | 16) * |
4483 | 3.57k | sizeof(UWORD8); |
4484 | 3.57k | buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8); |
4485 | | |
4486 | 3.57k | { |
4487 | 3.57k | buf_size_per_thread = buf_size_per_cu * (2); |
4488 | | |
4489 | 10.7k | for(i = 0; i < 2; i++) |
4490 | 7.14k | { |
4491 | 7.14k | ps_ctxt->as_cu_prms[i].pu1_cu_coeffs = |
4492 | 7.14k | (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base + |
4493 | 7.14k | (ctr * buf_size_per_thread) + (i * buf_size_per_cu); |
4494 | | |
4495 | 7.14k | i4_16byte_boundary_overshoot = |
4496 | 7.14k | ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf); |
4497 | | |
4498 | 7.14k | ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot); |
4499 | 7.14k | } |
4500 | 3.57k | } |
4501 | | |
4502 | 3.57k | ps_ctxt->pu1_cu_recur_coeffs = |
4503 | 3.57k | (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base + |
4504 | 3.57k | (ctr * buf_size_per_thread_wo_alignment_req); |
4505 | 3.57k | } |
4506 | | |
4507 | | /* Memory for CU dequant data buffer */ |
4508 | 3.57k | { |
4509 | 3.57k | WORD32 buf_size_per_thread; |
4510 | 3.57k | WORD32 i4_16byte_boundary_overshoot; |
4511 | | |
4512 | 3.57k | WORD32 buf_size_per_cu = |
4513 | 3.57k | (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1)) |
4514 | 3.57k | : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) + |
4515 | 3.57k | 8) * |
4516 | 3.57k | sizeof(WORD16); |
4517 | | |
4518 | 3.57k | { |
4519 | 3.57k | buf_size_per_thread = buf_size_per_cu * 2; |
4520 | | |
4521 | 10.7k | for(i = 0; i < 2; i++) |
4522 | 7.14k | { |
4523 | 7.14k | ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs = |
4524 | 7.14k | (WORD16 |
4525 | 7.14k | *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu)); |
4526 | | |
4527 | 7.14k | i4_16byte_boundary_overshoot = |
4528 | 7.14k | ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf); |
4529 | | |
4530 | 7.14k | ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs = |
4531 | 7.14k | (WORD16 |
4532 | 7.14k | *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot)); |
4533 | 7.14k | } |
4534 | 3.57k | } |
4535 | 3.57k | } |
4536 | | |
4537 | | /*------ Deblocking memory's pointers assignements starts ------*/ |
4538 | | |
4539 | | /*Assign stride = 4x4 blocks in horizontal edge*/ |
4540 | 3.57k | ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row; |
4541 | | |
4542 | 3.57k | ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size = |
4543 | 3.57k | ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame; |
4544 | | |
4545 | | /*Assign frame level memory to store the Qp of |
4546 | | top 4x4 neighbours of each CTB row*/ |
4547 | 7.14k | for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) |
4548 | 3.57k | { |
4549 | 3.57k | ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] = |
4550 | 3.57k | (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base + |
4551 | 3.57k | (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst * |
4552 | 3.57k | i4_enc_frm_id); |
4553 | 3.57k | } |
4554 | | |
4555 | 3.57k | ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base; |
4556 | | |
4557 | 3.57k | ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz = |
4558 | 3.57k | (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1)); |
4559 | | |
4560 | 3.57k | ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory; |
4561 | | |
4562 | | /*Assign stride = 4x4 blocks in horizontal edge*/ |
4563 | 3.57k | ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row; |
4564 | | |
4565 | 3.57k | pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory); |
4566 | | |
4567 | | /*------Deblocking memory's pointers assignements ends ------*/ |
4568 | | |
4569 | | /*------SAO memory's pointer assignment starts------------*/ |
4570 | 3.57k | if(!is_hbd_mode) |
4571 | 3.57k | { |
4572 | | /* 2 is added to allocate top left pixel */ |
4573 | 3.57k | ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size = |
4574 | 3.57k | u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1); |
4575 | 3.57k | ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size = |
4576 | 3.57k | u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1); |
4577 | 3.57k | ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units = |
4578 | 3.57k | num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE); |
4579 | | |
4580 | 7.14k | for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) |
4581 | 3.57k | { |
4582 | 3.57k | ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] = |
4583 | 3.57k | pu1_sao_base + |
4584 | 3.57k | ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + |
4585 | 3.57k | ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) * |
4586 | 3.57k | i4_num_bitrate_inst * i4_enc_frm_id) + // move to the next frame_id |
4587 | 3.57k | u4_ctb_aligned_wd + |
4588 | 3.57k | 2; |
4589 | | |
4590 | 3.57k | ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] = |
4591 | 3.57k | pu1_sao_base + |
4592 | 3.57k | ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + |
4593 | 3.57k | ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) * |
4594 | 3.57k | i4_num_bitrate_inst * i4_enc_frm_id) + |
4595 | 3.57k | +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) + |
4596 | 3.57k | u4_ctb_aligned_wd + 4; |
4597 | | |
4598 | 3.57k | ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base + |
4599 | 3.57k | ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) |
4600 | 3.57k | *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) + |
4601 | 3.57k | (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id)); |
4602 | 3.57k | } |
4603 | 3.57k | ps_ctxt->s_sao_ctxt_t.i4_ctb_size = |
4604 | 3.57k | (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size); |
4605 | 3.57k | ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd; |
4606 | 3.57k | } |
4607 | | |
4608 | | /*------SAO memory's pointer assignment ends------------*/ |
4609 | | |
4610 | | /* perform all one time initialisation here */ |
4611 | 3.57k | ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8; |
4612 | | |
4613 | 3.57k | ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0]; |
4614 | | |
4615 | 3.57k | ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type; |
4616 | | |
4617 | | /* move the pointer to 1,2 location */ |
4618 | 3.57k | ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd; |
4619 | 3.57k | ps_ctxt->pu1_ctb_nbr_map++; |
4620 | | |
4621 | 3.57k | ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW; |
4622 | | |
4623 | 3.57k | CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd); |
4624 | | |
4625 | 3.57k | CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd); |
4626 | | |
4627 | 3.57k | CREATE_SUBBLOCK2CSBFID_MAP( |
4628 | 3.57k | gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd); |
4629 | | |
4630 | 3.57k | CREATE_SUBBLOCK2CSBFID_MAP( |
4631 | 3.57k | gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd); |
4632 | | |
4633 | | /* For both instance initialise the chroma dequant start idx */ |
4634 | 3.57k | ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE); |
4635 | 3.57k | ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE); |
4636 | | |
4637 | | /* initialise all the function pointer tables */ |
4638 | 3.57k | { |
4639 | 3.57k | ps_ctxt->pv_inter_rdopt_cu_mc_mvp = |
4640 | 3.57k | (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp; |
4641 | | |
4642 | 3.57k | ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu; |
4643 | | |
4644 | 3.57k | #if ENABLE_RDO_BASED_TU_RECURSION |
4645 | 3.57k | if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) |
4646 | 1.72k | { |
4647 | 1.72k | ps_ctxt->pv_inter_rdopt_cu_ntu = |
4648 | 1.72k | (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer; |
4649 | 1.72k | } |
4650 | 3.57k | #endif |
4651 | 3.57k | ps_ctxt->pv_intra_chroma_pred_mode_selector = |
4652 | 3.57k | (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector; |
4653 | 3.57k | ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu; |
4654 | 3.57k | ps_ctxt->pv_final_rdopt_mode_prcs = |
4655 | 3.57k | (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs; |
4656 | 3.57k | ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results; |
4657 | 3.57k | ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy; |
4658 | 3.57k | ps_ctxt->pv_enc_loop_ctb_left_copy = |
4659 | 3.57k | (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy; |
4660 | | |
4661 | | /* Memory assignments for chroma intra pred buffer */ |
4662 | 3.57k | { |
4663 | 3.57k | WORD32 pred_buf_size = |
4664 | 3.57k | MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1); |
4665 | 3.57k | WORD32 pred_buf_size_per_thread = |
4666 | 3.57k | NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size; |
4667 | 3.57k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base + |
4668 | 3.57k | (ctr * pred_buf_size_per_thread); |
4669 | | |
4670 | 10.7k | for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++) |
4671 | 7.14k | { |
4672 | 7.14k | ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base; |
4673 | 7.14k | pu1_base += pred_buf_size; |
4674 | 7.14k | } |
4675 | 3.57k | } |
4676 | | |
4677 | | /* Memory assignments for reference substitution output */ |
4678 | 3.57k | { |
4679 | 3.57k | WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING |
4680 | 3.57k | + INTRAPRED_SIMD_LEFT_PADDING); |
4681 | 3.57k | WORD32 pred_buf_size_per_thread = pred_buf_size; |
4682 | 3.57k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base + |
4683 | 3.57k | (ctr * pred_buf_size_per_thread); |
4684 | | |
4685 | 3.57k | ps_ctxt->pv_ref_sub_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING; |
4686 | 3.57k | } |
4687 | | |
4688 | | /* Memory assignments for reference filtering output */ |
4689 | 3.57k | { |
4690 | 3.57k | WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + INTRAPRED_SIMD_RIGHT_PADDING |
4691 | 3.57k | + INTRAPRED_SIMD_LEFT_PADDING); |
4692 | 3.57k | WORD32 pred_buf_size_per_thread = pred_buf_size; |
4693 | 3.57k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base + |
4694 | 3.57k | (ctr * pred_buf_size_per_thread); |
4695 | | |
4696 | 3.57k | ps_ctxt->pv_ref_filt_out = pu1_base + INTRAPRED_SIMD_LEFT_PADDING; |
4697 | 3.57k | } |
4698 | | |
4699 | | /* Memory assignments for recon storage during CU Recursion */ |
4700 | 3.57k | #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
4701 | 3.57k | if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) |
4702 | 1.72k | #endif |
4703 | 1.72k | { |
4704 | 1.72k | { |
4705 | 1.72k | WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE); |
4706 | 1.72k | WORD32 pred_buf_size_per_thread = pred_buf_size; |
4707 | 1.72k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base + |
4708 | 1.72k | (ctr * pred_buf_size_per_thread); |
4709 | | |
4710 | 1.72k | ps_ctxt->pv_cu_luma_recon = pu1_base; |
4711 | 1.72k | } |
4712 | | |
4713 | 1.72k | { |
4714 | 1.72k | WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) * |
4715 | 1.72k | ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1); |
4716 | 1.72k | WORD32 pred_buf_size_per_thread = pred_buf_size; |
4717 | 1.72k | UWORD8 *pu1_base = |
4718 | 1.72k | (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base + |
4719 | 1.72k | (ctr * pred_buf_size_per_thread); |
4720 | | |
4721 | 1.72k | ps_ctxt->pv_cu_chrma_recon = pu1_base; |
4722 | 1.72k | } |
4723 | 1.72k | } |
4724 | | |
4725 | | /* Memory assignments for pred storage during CU Recursion */ |
4726 | 3.57k | #if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS |
4727 | 3.57k | if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) |
4728 | 1.72k | #endif |
4729 | 1.72k | { |
4730 | 1.72k | { |
4731 | 1.72k | WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE); |
4732 | 1.72k | WORD32 pred_buf_size_per_thread = pred_buf_size; |
4733 | 1.72k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base + |
4734 | 1.72k | (ctr * pred_buf_size_per_thread); |
4735 | | |
4736 | 1.72k | ps_ctxt->pv_CTB_pred_luma = pu1_base; |
4737 | 1.72k | } |
4738 | | |
4739 | 1.72k | { |
4740 | 1.72k | WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) * |
4741 | 1.72k | ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1); |
4742 | 1.72k | WORD32 pred_buf_size_per_thread = pred_buf_size; |
4743 | 1.72k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base + |
4744 | 1.72k | (ctr * pred_buf_size_per_thread); |
4745 | | |
4746 | 1.72k | ps_ctxt->pv_CTB_pred_chroma = pu1_base; |
4747 | 1.72k | } |
4748 | 1.72k | } |
4749 | | |
4750 | | /* Memory assignments for CTB left luma data storage */ |
4751 | 3.57k | { |
4752 | 3.57k | WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE); |
4753 | 3.57k | WORD32 pred_buf_size_per_thread = pred_buf_size; |
4754 | 3.57k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base + |
4755 | 3.57k | (ctr * pred_buf_size_per_thread); |
4756 | | |
4757 | 3.57k | ps_ctxt->pv_left_luma_data = pu1_base; |
4758 | 3.57k | } |
4759 | | |
4760 | | /* Memory assignments for CTB left chroma data storage */ |
4761 | 3.57k | { |
4762 | 3.57k | WORD32 pred_buf_size = |
4763 | 3.57k | (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1); |
4764 | 3.57k | WORD32 pred_buf_size_per_thread = pred_buf_size; |
4765 | 3.57k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base + |
4766 | 3.57k | (ctr * pred_buf_size_per_thread); |
4767 | | |
4768 | 3.57k | ps_ctxt->pv_left_chrm_data = pu1_base; |
4769 | 3.57k | } |
4770 | 3.57k | } |
4771 | | |
4772 | | /* Memory for inter pred buffers */ |
4773 | 3.57k | { |
4774 | 3.57k | WORD32 i4_num_bufs_per_thread; |
4775 | | |
4776 | 3.57k | WORD32 i4_buf_size_per_cand = |
4777 | 3.57k | (MAX_CTB_SIZE) * (MAX_CTB_SIZE) * |
4778 | 3.57k | ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8); |
4779 | | |
4780 | 3.57k | i4_num_bufs_per_thread = |
4781 | 3.57k | (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) / |
4782 | 3.57k | i4_buf_size_per_cand; |
4783 | | |
4784 | 3.57k | ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4; |
4785 | | |
4786 | 3.57k | ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX; |
4787 | | |
4788 | 3.57k | { |
4789 | 3.57k | UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base + |
4790 | 3.57k | +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread); |
4791 | | |
4792 | 29.4k | for(i = 0; i < i4_num_bufs_per_thread; i++) |
4793 | 25.8k | { |
4794 | 25.8k | ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] = |
4795 | 25.8k | pu1_base + i * i4_buf_size_per_cand; |
4796 | 25.8k | ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i); |
4797 | 25.8k | } |
4798 | 3.57k | } |
4799 | 3.57k | } |
4800 | | |
4801 | | /* Memory required to store pred for 422 chroma */ |
4802 | 3.57k | if(i4_chroma_format == IV_YUV_422SP_UV) |
4803 | 0 | { |
4804 | 0 | WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2; |
4805 | 0 | WORD32 pred_buf_size_per_thread = |
4806 | 0 | pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * |
4807 | 0 | sizeof(UWORD8); |
4808 | 0 | void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base + |
4809 | 0 | (ctr * pred_buf_size_per_thread); |
4810 | |
|
4811 | 0 | ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base; |
4812 | 0 | } |
4813 | 3.57k | else |
4814 | 3.57k | { |
4815 | 3.57k | ps_ctxt->pv_422_chroma_intra_pred_buf = NULL; |
4816 | 3.57k | } |
4817 | | |
4818 | | /* Memory for Recon Datastore (Used around and within the RDOPT loop) */ |
4819 | 3.57k | { |
4820 | 3.57k | WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE; |
4821 | 3.57k | WORD32 i4_chromaBufSize = |
4822 | 3.57k | MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1); |
4823 | 3.57k | WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size / |
4824 | 3.57k | (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1)); |
4825 | 3.57k | WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset; |
4826 | 3.57k | { |
4827 | 3.57k | UWORD8 *pu1_mem_base = |
4828 | 3.57k | (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) + |
4829 | 3.57k | ctr * i4_memSize_perThread); |
4830 | | |
4831 | 3.57k | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] = |
4832 | 3.57k | pu1_mem_base + i4_lumaBufSize * 0; |
4833 | 3.57k | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] = |
4834 | 3.57k | pu1_mem_base + i4_lumaBufSize * 1; |
4835 | 3.57k | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] = |
4836 | 3.57k | pu1_mem_base + i4_lumaBufSize * 2; |
4837 | 3.57k | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] = |
4838 | 3.57k | pu1_mem_base + i4_lumaBufSize * 3; |
4839 | | |
4840 | 3.57k | pu1_mem_base += i4_lumaBufSize * 4; |
4841 | | |
4842 | 3.57k | switch(i4_quality_preset) |
4843 | 3.57k | { |
4844 | 1.72k | case IHEVCE_QUALITY_P0: |
4845 | 1.72k | { |
4846 | 1.72k | #if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ |
4847 | 1.72k | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4848 | 1.72k | pu1_mem_base + i4_chromaBufSize * 0; |
4849 | 1.72k | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4850 | 1.72k | pu1_mem_base + i4_chromaBufSize * 1; |
4851 | | #else |
4852 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4853 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4854 | | #endif |
4855 | | |
4856 | 1.72k | #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ |
4857 | 1.72k | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4858 | 1.72k | pu1_mem_base + i4_chromaBufSize * 2; |
4859 | 1.72k | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4860 | 1.72k | pu1_mem_base + i4_chromaBufSize * 3; |
4861 | 1.72k | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4862 | 1.72k | pu1_mem_base + i4_chromaBufSize * 2; |
4863 | 1.72k | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4864 | 1.72k | pu1_mem_base + i4_chromaBufSize * 3; |
4865 | | #else |
4866 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4867 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4868 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4869 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4870 | | #endif |
4871 | | |
4872 | 1.72k | break; |
4873 | 0 | } |
4874 | 280 | case IHEVCE_QUALITY_P2: |
4875 | 280 | { |
4876 | 280 | #if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ |
4877 | 280 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4878 | 280 | pu1_mem_base + i4_chromaBufSize * 0; |
4879 | 280 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4880 | 280 | pu1_mem_base + i4_chromaBufSize * 1; |
4881 | | #else |
4882 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4883 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4884 | | #endif |
4885 | | |
4886 | 280 | #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ |
4887 | 280 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4888 | 280 | pu1_mem_base + i4_chromaBufSize * 2; |
4889 | 280 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4890 | 280 | pu1_mem_base + i4_chromaBufSize * 3; |
4891 | 280 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4892 | 280 | pu1_mem_base + i4_chromaBufSize * 2; |
4893 | 280 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4894 | 280 | pu1_mem_base + i4_chromaBufSize * 3; |
4895 | | #else |
4896 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4897 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4898 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4899 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4900 | | #endif |
4901 | | |
4902 | 280 | break; |
4903 | 0 | } |
4904 | 396 | case IHEVCE_QUALITY_P3: |
4905 | 396 | { |
4906 | 396 | #if ENABLE_CHROMA_RDOPT_EVAL_IN_MS |
4907 | 396 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4908 | 396 | pu1_mem_base + i4_chromaBufSize * 0; |
4909 | 396 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4910 | 396 | pu1_mem_base + i4_chromaBufSize * 1; |
4911 | | #else |
4912 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4913 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4914 | | #endif |
4915 | | |
4916 | | #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS |
4917 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4918 | | pu1_mem_base + i4_chromaBufSize * 2; |
4919 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4920 | | pu1_mem_base + i4_chromaBufSize * 3; |
4921 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4922 | | pu1_mem_base + i4_chromaBufSize * 2; |
4923 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4924 | | pu1_mem_base + i4_chromaBufSize * 3; |
4925 | | #else |
4926 | 396 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4927 | 396 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4928 | 396 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4929 | 396 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4930 | 396 | #endif |
4931 | | |
4932 | 396 | break; |
4933 | 0 | } |
4934 | 253 | case IHEVCE_QUALITY_P4: |
4935 | 253 | { |
4936 | | #if ENABLE_CHROMA_RDOPT_EVAL_IN_HS |
4937 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4938 | | pu1_mem_base + i4_chromaBufSize * 0; |
4939 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4940 | | pu1_mem_base + i4_chromaBufSize * 1; |
4941 | | #else |
4942 | 253 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4943 | 253 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4944 | 253 | #endif |
4945 | | |
4946 | | #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS |
4947 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4948 | | pu1_mem_base + i4_chromaBufSize * 2; |
4949 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4950 | | pu1_mem_base + i4_chromaBufSize * 3; |
4951 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4952 | | pu1_mem_base + i4_chromaBufSize * 2; |
4953 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4954 | | pu1_mem_base + i4_chromaBufSize * 3; |
4955 | | #else |
4956 | 253 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4957 | 253 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4958 | 253 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4959 | 253 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4960 | 253 | #endif |
4961 | | |
4962 | 253 | break; |
4963 | 0 | } |
4964 | 299 | case IHEVCE_QUALITY_P5: |
4965 | 299 | { |
4966 | | #if ENABLE_CHROMA_RDOPT_EVAL_IN_XS |
4967 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4968 | | pu1_mem_base + i4_chromaBufSize * 0; |
4969 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = |
4970 | | pu1_mem_base + i4_chromaBufSize * 1; |
4971 | | #else |
4972 | 299 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4973 | 299 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL; |
4974 | 299 | #endif |
4975 | | |
4976 | | #if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS |
4977 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4978 | | pu1_mem_base + i4_chromaBufSize * 2; |
4979 | | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4980 | | pu1_mem_base + i4_chromaBufSize * 3; |
4981 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = |
4982 | | pu1_mem_base + i4_chromaBufSize * 2; |
4983 | | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = |
4984 | | pu1_mem_base + i4_chromaBufSize * 3; |
4985 | | #else |
4986 | 299 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4987 | 299 | ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4988 | 299 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL; |
4989 | 299 | ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL; |
4990 | 299 | #endif |
4991 | | |
4992 | 299 | break; |
4993 | 0 | } |
4994 | 3.57k | } |
4995 | 3.57k | } |
4996 | | |
4997 | 3.57k | ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE; |
4998 | 3.57k | ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE; |
4999 | 3.57k | ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE; |
5000 | 3.57k | ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE; |
5001 | | |
5002 | 3.57k | } /* Recon Datastore */ |
5003 | | |
5004 | | /****************************************************/ |
5005 | | /****************************************************/ |
5006 | | /* ps_pps->i1_sign_data_hiding_flag == UNHIDDEN */ |
5007 | | /* when NO_SBH. else HIDDEN */ |
5008 | | /****************************************************/ |
5009 | | /****************************************************/ |
5010 | | /* Zero cbf tool is enabled by default for all presets */ |
5011 | 0 | ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; |
5012 | | |
5013 | 3.57k | if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) |
5014 | 2.00k | { |
5015 | 2.00k | ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING; |
5016 | 2.00k | ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING; |
5017 | 2.00k | ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ; |
5018 | 2.00k | ps_ctxt->i4_sbh_level = ALL_CAND_SBH; |
5019 | 2.00k | } |
5020 | 1.56k | else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3) |
5021 | 396 | { |
5022 | 396 | ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING; |
5023 | 396 | ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING; |
5024 | 396 | ps_ctxt->i4_rdoq_level = NO_RDOQ; |
5025 | 396 | ps_ctxt->i4_sbh_level = NO_SBH; |
5026 | 396 | } |
5027 | 1.17k | else |
5028 | 1.17k | { |
5029 | 1.17k | ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING; |
5030 | 1.17k | ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING; |
5031 | 1.17k | ps_ctxt->i4_rdoq_level = NO_RDOQ; |
5032 | 1.17k | ps_ctxt->i4_sbh_level = NO_SBH; |
5033 | 1.17k | } |
5034 | | |
5035 | | #if DISABLE_QUANT_ROUNDING |
5036 | | ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING; |
5037 | | ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING; |
5038 | | #endif |
5039 | | /*Disabling RDOQ only when spatial modulation is enabled |
5040 | | as RDOQ degrades visual quality*/ |
5041 | 3.57k | if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1) |
5042 | 1.54k | { |
5043 | 1.54k | ps_ctxt->i4_rdoq_level = NO_RDOQ; |
5044 | 1.54k | } |
5045 | | |
5046 | | #if DISABLE_RDOQ |
5047 | | ps_ctxt->i4_rdoq_level = NO_RDOQ; |
5048 | | #endif |
5049 | | |
5050 | | #if DISABLE_SBH |
5051 | | ps_ctxt->i4_sbh_level = NO_SBH; |
5052 | | #endif |
5053 | | |
5054 | | /*Rounding factor calc based on previous cabac states */ |
5055 | | |
5056 | 3.57k | ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0]; |
5057 | 3.57k | ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0]; |
5058 | 3.57k | ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0]; |
5059 | 3.57k | ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0]; |
5060 | | |
5061 | 3.57k | ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0]; |
5062 | 3.57k | ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0]; |
5063 | 3.57k | ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0]; |
5064 | 3.57k | ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0]; |
5065 | | |
5066 | 3.57k | ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0]; |
5067 | 3.57k | ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0]; |
5068 | 3.57k | ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0]; |
5069 | | |
5070 | 3.57k | ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0]; |
5071 | 3.57k | ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0]; |
5072 | 3.57k | ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0]; |
5073 | | |
5074 | | /****************************************************************************************/ |
5075 | | /* Setting the perform rdoq and sbh flags appropriately */ |
5076 | | /****************************************************************************************/ |
5077 | 3.57k | { |
5078 | | /******************************************/ |
5079 | | /* For best cand rdoq and/or sbh */ |
5080 | | /******************************************/ |
5081 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq = |
5082 | 3.57k | (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ); |
5083 | | /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean |
5084 | | we would have to do RDOQ again.*/ |
5085 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq = |
5086 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq || |
5087 | 3.57k | ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) && |
5088 | 0 | (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level)); |
5089 | | |
5090 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh = |
5091 | 3.57k | (ps_ctxt->i4_sbh_level == BEST_CAND_SBH); |
5092 | | |
5093 | | /* SBH should be performed if |
5094 | | a) i4_sbh_level is BEST_CAND_SBH. |
5095 | | b) For all quality presets above medium speed(i.e. high speed and extreme speed) and |
5096 | | if SBH has to be done because for these presets the quant, iquant and scan coeff |
5097 | | data are calculated in this function and not during the RDOPT stage*/ |
5098 | | |
5099 | | /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/ |
5100 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh = |
5101 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh || |
5102 | 3.57k | ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) && |
5103 | 0 | (ALL_CAND_SBH == ps_ctxt->i4_sbh_level)); |
5104 | | |
5105 | | /******************************************/ |
5106 | | /* For all cand rdoq and/or sbh */ |
5107 | | /******************************************/ |
5108 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq = |
5109 | 3.57k | (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ); |
5110 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh = |
5111 | 3.57k | (ps_ctxt->i4_sbh_level == ALL_CAND_SBH); |
5112 | 3.57k | ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth = |
5113 | 3.57k | ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth; |
5114 | 3.57k | } |
5115 | | |
5116 | 3.57k | if(!is_hbd_mode) |
5117 | 3.57k | { |
5118 | 3.57k | if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1) |
5119 | 1.27k | { |
5120 | 1.27k | if(ps_ctxt->i4_rdoq_level == NO_RDOQ) |
5121 | 989 | { |
5122 | 989 | ps_ctxt->apf_quant_iquant_ssd[0] = |
5123 | 989 | ps_func_selector->ihevc_quant_iquant_ssd_fptr; |
5124 | 989 | ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr; |
5125 | 989 | } |
5126 | 286 | else |
5127 | 286 | { |
5128 | 286 | ps_ctxt->apf_quant_iquant_ssd[0] = |
5129 | 286 | ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr; |
5130 | 286 | ps_ctxt->apf_quant_iquant_ssd[2] = |
5131 | 286 | ps_func_selector->ihevc_quant_iquant_rdoq_fptr; |
5132 | 286 | } |
5133 | | |
5134 | | /*If coef level RDOQ is enabled, quantization based on corr. error to be done */ |
5135 | 1.27k | if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) |
5136 | 805 | { |
5137 | 805 | ps_ctxt->apf_quant_iquant_ssd[1] = |
5138 | 805 | ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr; |
5139 | 805 | ps_ctxt->apf_quant_iquant_ssd[3] = |
5140 | 805 | ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr; |
5141 | 805 | } |
5142 | 470 | else |
5143 | 470 | { |
5144 | 470 | ps_ctxt->apf_quant_iquant_ssd[1] = |
5145 | 470 | ps_func_selector->ihevc_quant_iquant_ssd_fptr; |
5146 | 470 | ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr; |
5147 | 470 | } |
5148 | 1.27k | } |
5149 | 2.29k | else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0) |
5150 | 2.29k | { |
5151 | 2.29k | if(ps_ctxt->i4_rdoq_level == NO_RDOQ) |
5152 | 1.79k | { |
5153 | 1.79k | ps_ctxt->apf_quant_iquant_ssd[0] = |
5154 | 1.79k | ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr; |
5155 | 1.79k | ps_ctxt->apf_quant_iquant_ssd[2] = |
5156 | 1.79k | ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr; |
5157 | 1.79k | } |
5158 | 501 | else |
5159 | 501 | { |
5160 | 501 | ps_ctxt->apf_quant_iquant_ssd[0] = |
5161 | 501 | ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr; |
5162 | 501 | ps_ctxt->apf_quant_iquant_ssd[2] = |
5163 | 501 | ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr; |
5164 | 501 | } |
5165 | | |
5166 | | /*If coef level RDOQ is enabled, quantization based on corr. error to be done */ |
5167 | 2.29k | if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) |
5168 | 1.20k | { |
5169 | 1.20k | ps_ctxt->apf_quant_iquant_ssd[1] = |
5170 | 1.20k | ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr; |
5171 | 1.20k | ps_ctxt->apf_quant_iquant_ssd[3] = |
5172 | 1.20k | ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr; |
5173 | 1.20k | } |
5174 | 1.09k | else |
5175 | 1.09k | { |
5176 | 1.09k | ps_ctxt->apf_quant_iquant_ssd[1] = |
5177 | 1.09k | ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr; |
5178 | 1.09k | ps_ctxt->apf_quant_iquant_ssd[3] = |
5179 | 1.09k | ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr; |
5180 | 1.09k | } |
5181 | 2.29k | } |
5182 | | |
5183 | 3.57k | ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] = |
5184 | 3.57k | ps_func_selector->ihevc_sao_edge_offset_class0_fptr; |
5185 | 3.57k | ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] = |
5186 | 3.57k | ps_func_selector->ihevc_sao_edge_offset_class1_fptr; |
5187 | 3.57k | ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] = |
5188 | 3.57k | ps_func_selector->ihevc_sao_edge_offset_class2_fptr; |
5189 | 3.57k | ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] = |
5190 | 3.57k | ps_func_selector->ihevc_sao_edge_offset_class3_fptr; |
5191 | | |
5192 | 3.57k | ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] = |
5193 | 3.57k | ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr; |
5194 | 3.57k | ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] = |
5195 | 3.57k | ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr; |
5196 | 3.57k | ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] = |
5197 | 3.57k | ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr; |
5198 | 3.57k | ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] = |
5199 | 3.57k | ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr; |
5200 | | |
5201 | 3.57k | ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr; |
5202 | 3.57k | ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr; |
5203 | 3.57k | ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr; |
5204 | 3.57k | ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr; |
5205 | 3.57k | ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr; |
5206 | | |
5207 | 3.57k | ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr; |
5208 | 3.57k | ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr; |
5209 | 3.57k | ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr; |
5210 | | |
5211 | 3.57k | ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr; |
5212 | 3.57k | ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr; |
5213 | 3.57k | ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr; |
5214 | 3.57k | ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr; |
5215 | 3.57k | ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr; |
5216 | | |
5217 | 3.57k | ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr; |
5218 | 3.57k | ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr; |
5219 | 3.57k | ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr; |
5220 | | |
5221 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] = |
5222 | 3.57k | ps_func_selector->ihevc_intra_pred_luma_planar_fptr; |
5223 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr; |
5224 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] = |
5225 | 3.57k | ps_func_selector->ihevc_intra_pred_luma_mode2_fptr; |
5226 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] = |
5227 | 3.57k | ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr; |
5228 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] = |
5229 | 3.57k | ps_func_selector->ihevc_intra_pred_luma_horz_fptr; |
5230 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] = |
5231 | 3.57k | ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr; |
5232 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] = |
5233 | 3.57k | ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr; |
5234 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] = |
5235 | 3.57k | ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr; |
5236 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr; |
5237 | 3.57k | ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] = |
5238 | 3.57k | ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr; |
5239 | | |
5240 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] = |
5241 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_planar_fptr; |
5242 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] = |
5243 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_dc_fptr; |
5244 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] = |
5245 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr; |
5246 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] = |
5247 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr; |
5248 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] = |
5249 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_horz_fptr; |
5250 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] = |
5251 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr; |
5252 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] = |
5253 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr; |
5254 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] = |
5255 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr; |
5256 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] = |
5257 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_ver_fptr; |
5258 | 3.57k | ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] = |
5259 | 3.57k | ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr; |
5260 | | |
5261 | 3.57k | ps_ctxt->apf_chrm_resd_trns_had[0] = |
5262 | 3.57k | (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit; |
5263 | 3.57k | ps_ctxt->apf_chrm_resd_trns_had[1] = |
5264 | 3.57k | (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit; |
5265 | 3.57k | ps_ctxt->apf_chrm_resd_trns_had[2] = |
5266 | 3.57k | (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit; |
5267 | 3.57k | } |
5268 | | |
5269 | 3.57k | if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0) |
5270 | 2.29k | { |
5271 | | /* initialise the scale & rescale matricies */ |
5272 | 2.29k | ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
5273 | 2.29k | ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
5274 | 2.29k | ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0]; |
5275 | 2.29k | ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0]; |
5276 | 2.29k | ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0]; |
5277 | | /*init for inter matrix*/ |
5278 | 2.29k | ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
5279 | 2.29k | ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
5280 | 2.29k | ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0]; |
5281 | 2.29k | ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0]; |
5282 | 2.29k | ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0]; |
5283 | | |
5284 | | /*init for rescale matrix*/ |
5285 | 2.29k | ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
5286 | 2.29k | ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
5287 | 2.29k | ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0]; |
5288 | 2.29k | ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0]; |
5289 | 2.29k | ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0]; |
5290 | | /*init for rescale inter matrix*/ |
5291 | 2.29k | ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
5292 | 2.29k | ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
5293 | 2.29k | ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0]; |
5294 | 2.29k | ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0]; |
5295 | 2.29k | ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0]; |
5296 | 2.29k | } |
5297 | 1.27k | else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1) |
5298 | 1.27k | { |
5299 | | /* initialise the scale & rescale matricies */ |
5300 | 1.27k | ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
5301 | 1.27k | ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
5302 | 1.27k | ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0]; |
5303 | 1.27k | ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0]; |
5304 | 1.27k | ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0]; |
5305 | | /*init for inter matrix*/ |
5306 | 1.27k | ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
5307 | 1.27k | ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
5308 | 1.27k | ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0]; |
5309 | 1.27k | ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0]; |
5310 | 1.27k | ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0]; |
5311 | | |
5312 | | /*init for rescale matrix*/ |
5313 | 1.27k | ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
5314 | 1.27k | ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
5315 | 1.27k | ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0]; |
5316 | 1.27k | ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0]; |
5317 | 1.27k | ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0]; |
5318 | | /*init for rescale inter matrix*/ |
5319 | 1.27k | ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
5320 | 1.27k | ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
5321 | 1.27k | ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0]; |
5322 | 1.27k | ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0]; |
5323 | 1.27k | ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0]; |
5324 | 1.27k | } |
5325 | 0 | else |
5326 | 0 | { |
5327 | 0 | ASSERT(0); |
5328 | 0 | } |
5329 | | |
5330 | | /* Not recomputing Luma pred-data and header data for any preset now */ |
5331 | 3.57k | ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0; |
5332 | 3.57k | ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0; |
5333 | 3.57k | ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1; |
5334 | | |
5335 | 3.57k | switch(ps_ctxt->i4_quality_preset) |
5336 | 3.57k | { |
5337 | 1.72k | case IHEVCE_QUALITY_P0: |
5338 | 1.72k | { |
5339 | 1.72k | ps_ctxt->i4_max_merge_candidates = 5; |
5340 | 1.72k | ps_ctxt->i4_use_satd_for_merge_eval = 1; |
5341 | 1.72k | ps_ctxt->u1_use_top_at_ctb_boundary = 1; |
5342 | 1.72k | ps_ctxt->u1_use_early_cbf_data = 0; |
5343 | 1.72k | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ; |
5344 | 1.72k | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = |
5345 | 1.72k | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ; |
5346 | | |
5347 | 1.72k | break; |
5348 | 0 | } |
5349 | 280 | case IHEVCE_QUALITY_P2: |
5350 | 280 | { |
5351 | 280 | ps_ctxt->i4_max_merge_candidates = 5; |
5352 | 280 | ps_ctxt->i4_use_satd_for_merge_eval = 1; |
5353 | 280 | ps_ctxt->u1_use_top_at_ctb_boundary = 1; |
5354 | 280 | ps_ctxt->u1_use_early_cbf_data = 0; |
5355 | | |
5356 | 280 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ; |
5357 | 280 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = |
5358 | 280 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ; |
5359 | | |
5360 | 280 | break; |
5361 | 0 | } |
5362 | 396 | case IHEVCE_QUALITY_P3: |
5363 | 396 | { |
5364 | 396 | ps_ctxt->i4_max_merge_candidates = 3; |
5365 | 396 | ps_ctxt->i4_use_satd_for_merge_eval = 1; |
5366 | 396 | ps_ctxt->u1_use_top_at_ctb_boundary = 0; |
5367 | | |
5368 | 396 | ps_ctxt->u1_use_early_cbf_data = 0; |
5369 | 396 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS; |
5370 | 396 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = |
5371 | 396 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS; |
5372 | | |
5373 | 396 | break; |
5374 | 0 | } |
5375 | 253 | case IHEVCE_QUALITY_P4: |
5376 | 253 | { |
5377 | 253 | ps_ctxt->i4_max_merge_candidates = 2; |
5378 | 253 | ps_ctxt->i4_use_satd_for_merge_eval = 1; |
5379 | 253 | ps_ctxt->u1_use_top_at_ctb_boundary = 0; |
5380 | 253 | ps_ctxt->u1_use_early_cbf_data = 0; |
5381 | 253 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS; |
5382 | 253 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = |
5383 | 253 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS; |
5384 | | |
5385 | 253 | break; |
5386 | 0 | } |
5387 | 299 | case IHEVCE_QUALITY_P5: |
5388 | 299 | { |
5389 | 299 | ps_ctxt->i4_max_merge_candidates = 2; |
5390 | 299 | ps_ctxt->i4_use_satd_for_merge_eval = 0; |
5391 | 299 | ps_ctxt->u1_use_top_at_ctb_boundary = 0; |
5392 | 299 | ps_ctxt->u1_use_early_cbf_data = 0; |
5393 | 299 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS; |
5394 | 299 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = |
5395 | 299 | ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS; |
5396 | | |
5397 | 299 | break; |
5398 | 0 | } |
5399 | 620 | case IHEVCE_QUALITY_P6: |
5400 | 620 | { |
5401 | 620 | ps_ctxt->i4_max_merge_candidates = 2; |
5402 | 620 | ps_ctxt->i4_use_satd_for_merge_eval = 0; |
5403 | 620 | ps_ctxt->u1_use_top_at_ctb_boundary = 0; |
5404 | 620 | ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON; |
5405 | 620 | break; |
5406 | 0 | } |
5407 | 0 | default: |
5408 | 0 | { |
5409 | 0 | ASSERT(0); |
5410 | 0 | } |
5411 | 3.57k | } |
5412 | | |
5413 | | #if DISABLE_SKIP_AND_MERGE_EVAL |
5414 | | ps_ctxt->i4_max_merge_candidates = 0; |
5415 | | #endif |
5416 | | |
5417 | 3.57k | ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = |
5418 | 3.57k | !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
5419 | | |
5420 | | /*initialize memory for RC related parameters required/populated by enc_loop */ |
5421 | | /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces |
5422 | | |-------|-> Thread 0, instance 0 |
5423 | | | | |
5424 | | | | |
5425 | | | | |
5426 | | |-------|-> thread 0, instance 1 |
5427 | | | | |
5428 | | | | |
5429 | | | | |
5430 | | |-------|-> thread 0, intance 2 |
5431 | | | | |
5432 | | | | |
5433 | | | | |
5434 | | |-------|-> thread 1, instance 0 |
5435 | | | | |
5436 | | | | |
5437 | | | | |
5438 | | |-------|-> thread 1, instance 1 |
5439 | | | | |
5440 | | | | |
5441 | | | | |
5442 | | |-------|-> thread 1, instance 2 |
5443 | | ... ... |
5444 | | |
5445 | | Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place. |
5446 | | Finally, one thread will become master and collate the data from all the threads */ |
5447 | 7.14k | for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++) |
5448 | 3.57k | { |
5449 | 7.14k | for(i = 0; i < i4_num_bitrate_inst; i++) |
5450 | 3.57k | { |
5451 | 3.57k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params; |
5452 | 3.57k | ps_enc_loop_rc_params++; |
5453 | 3.57k | } |
5454 | 3.57k | } |
5455 | | /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */ |
5456 | | |
5457 | | #if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE |
5458 | | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0; |
5459 | | #endif |
5460 | | |
5461 | 3.57k | ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride = |
5462 | 3.57k | MAX_TU_SIZE; |
5463 | 3.57k | ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride = |
5464 | 3.57k | MAX_TU_SIZE; |
5465 | | /*Multiplying by two to account for interleaving of cb and cr*/ |
5466 | 3.57k | ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE |
5467 | 3.57k | << 1; |
5468 | 3.57k | ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride = |
5469 | 3.57k | MAX_TU_SIZE << 1; |
5470 | | |
5471 | | /* Memory for a frame level memory to store tile-id */ |
5472 | | /* corresponding to each CTB of frame */ |
5473 | 3.57k | ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0]; |
5474 | | |
5475 | 3.57k | ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1; |
5476 | | /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/ |
5477 | | /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */ |
5478 | 3.57k | if(ps_init_prms->s_coding_tools_prms.i4_vqet & |
5479 | 3.57k | (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) |
5480 | 0 | { |
5481 | 0 | UWORD32 psy_strength; |
5482 | 0 | UWORD32 psy_strength_mask = |
5483 | 0 | 224; // only bits 5,6,7 are ones. These three bits represent the psy strength |
5484 | 0 | psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask; |
5485 | 0 | ps_ctxt->u1_enable_psyRDOPT = 1; |
5486 | 0 | ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1; |
5487 | 0 | if(psy_strength == 0) |
5488 | 0 | { |
5489 | 0 | ps_ctxt->u1_enable_psyRDOPT = 0; |
5490 | 0 | ps_ctxt->u4_psy_strength = 0; |
5491 | 0 | } |
5492 | 0 | } |
5493 | | |
5494 | 3.57k | ps_ctxt->u1_is_stasino_enabled = |
5495 | 3.57k | ((ps_init_prms->s_coding_tools_prms.i4_vqet & |
5496 | 3.57k | (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) && |
5497 | 0 | (ps_init_prms->s_coding_tools_prms.i4_vqet & |
5498 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION))); |
5499 | | |
5500 | 3.57k | ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI; |
5501 | 3.57k | ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I; |
5502 | 3.57k | ps_ctxt++; |
5503 | 3.57k | } |
5504 | | /* Store Tile params base into EncLoop Master context */ |
5505 | 3.57k | ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base; |
5506 | | |
5507 | 3.57k | if(1 == ps_tile_params_base->i4_tiles_enabled_flag) |
5508 | 0 | { |
5509 | 0 | i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols; |
5510 | 0 | } |
5511 | | |
5512 | | /* Updating ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */ |
5513 | | /* Loop over all tile-cols in frame */ |
5514 | 7.14k | for(ctr = 0; ctr < i4_num_tile_cols; ctr++) |
5515 | 3.57k | { |
5516 | 3.57k | WORD32 i4_tile_col_wd_in_ctb_unit = |
5517 | 3.57k | (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit; |
5518 | 3.57k | WORD32 offset_x; |
5519 | | |
5520 | 3.57k | if(ctr == (i4_num_tile_cols - 1)) |
5521 | 3.57k | { /* Last tile-row of frame */ |
5522 | 3.57k | WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size; |
5523 | | |
5524 | 3.57k | WORD32 cu_aligned_pic_wd = |
5525 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width + |
5526 | 3.57k | SET_CTB_ALIGN( |
5527 | 3.57k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, |
5528 | 3.57k | min_cu_size); |
5529 | | |
5530 | 3.57k | WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd); |
5531 | | |
5532 | 3.57k | offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE; |
5533 | 3.57k | offset_x += last_hz_ctb_wd; |
5534 | 3.57k | } |
5535 | 0 | else |
5536 | 0 | { /* Not the last tile-row of frame */ |
5537 | 0 | offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE; |
5538 | 0 | } |
5539 | | |
5540 | 3.57k | offset_x /= 4; |
5541 | 3.57k | offset_x -= 1; |
5542 | | |
5543 | 3.57k | ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x; |
5544 | 3.57k | } |
5545 | | |
5546 | 3.57k | n_tabs = NUM_ENC_LOOP_MEM_RECS; |
5547 | | |
5548 | | /*store num bit-rate instances in the master context */ |
5549 | 3.57k | ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst; |
5550 | 3.57k | ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel; |
5551 | | /*************************************************************************/ |
5552 | | /* --- EncLoop Deblock and SAO sync Dep Mngr Mem init -- */ |
5553 | | /*************************************************************************/ |
5554 | 3.57k | { |
5555 | 3.57k | WORD32 count; |
5556 | 3.57k | WORD32 num_vert_units, num_blks_in_row; |
5557 | 3.57k | WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; |
5558 | 3.57k | WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; |
5559 | | |
5560 | 3.57k | ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); |
5561 | 3.57k | ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row); |
5562 | 3.57k | ASSERT(num_vert_units > 0); |
5563 | 3.57k | ASSERT(num_blks_in_row > 0); |
5564 | | |
5565 | 7.14k | for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) |
5566 | 3.57k | { |
5567 | 7.14k | for(i = 0; i < i4_num_bitrate_inst; i++) |
5568 | 3.57k | { |
5569 | 3.57k | ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init( |
5570 | 3.57k | &ps_mem_tab[n_tabs], |
5571 | 3.57k | pv_osal_handle, |
5572 | 3.57k | DEP_MNGR_ROW_ROW_SYNC, |
5573 | 3.57k | num_vert_units, |
5574 | 3.57k | num_blks_in_row, |
5575 | 3.57k | i4_num_tile_cols, /* Number of Col Tiles */ |
5576 | 3.57k | i4_num_proc_thrds, |
5577 | 3.57k | 0 /*Sem Disabled*/ |
5578 | 3.57k | ); |
5579 | | |
5580 | 3.57k | n_tabs += ihevce_dmgr_get_num_mem_recs(); |
5581 | 3.57k | } |
5582 | 3.57k | } |
5583 | | |
5584 | 7.14k | for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) |
5585 | 3.57k | { |
5586 | 7.14k | for(i = 0; i < i4_num_bitrate_inst; i++) |
5587 | 3.57k | { |
5588 | 3.57k | ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[count][i] = ihevce_dmgr_init( |
5589 | 3.57k | &ps_mem_tab[n_tabs], |
5590 | 3.57k | pv_osal_handle, |
5591 | 3.57k | DEP_MNGR_ROW_ROW_SYNC, |
5592 | 3.57k | num_vert_units, |
5593 | 3.57k | num_blks_in_row, |
5594 | 3.57k | i4_num_tile_cols, /* Number of Col Tiles */ |
5595 | 3.57k | i4_num_proc_thrds, |
5596 | 3.57k | 0 /*Sem Disabled*/ |
5597 | 3.57k | ); |
5598 | | |
5599 | 3.57k | n_tabs += ihevce_dmgr_get_num_mem_recs(); |
5600 | 3.57k | } |
5601 | 3.57k | } |
5602 | 3.57k | } |
5603 | | /*************************************************************************/ |
5604 | | /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init -- */ |
5605 | | /*************************************************************************/ |
5606 | 3.57k | { |
5607 | 3.57k | WORD32 count; |
5608 | 3.57k | WORD32 num_vert_units, num_blks_in_row; |
5609 | 3.57k | WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height; |
5610 | 3.57k | WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; |
5611 | | |
5612 | 3.57k | WORD32 i4_sem = 0; |
5613 | | |
5614 | 3.57k | if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >= |
5615 | 3.57k | IHEVCE_QUALITY_P4) |
5616 | 1.17k | i4_sem = 0; |
5617 | 2.40k | else |
5618 | 2.40k | i4_sem = 1; |
5619 | 3.57k | ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units); |
5620 | | /* For Top-Right CU sync, adding one more CTB since value updation */ |
5621 | | /* happens in that way for the last CTB in the row */ |
5622 | 3.57k | num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE); |
5623 | 3.57k | num_blks_in_row += MAX_CTB_SIZE; |
5624 | | |
5625 | 3.57k | ASSERT(num_vert_units > 0); |
5626 | 3.57k | ASSERT(num_blks_in_row > 0); |
5627 | | |
5628 | 7.14k | for(count = 0; count < i4_num_enc_loop_frm_pllel; count++) |
5629 | 3.57k | { |
5630 | 7.14k | for(i = 0; i < i4_num_bitrate_inst; i++) |
5631 | 3.57k | { |
5632 | | /* For ES/HS, CU level updates uses spin-locks than semaphore */ |
5633 | 3.57k | { |
5634 | 3.57k | ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] = |
5635 | 3.57k | ihevce_dmgr_init( |
5636 | 3.57k | &ps_mem_tab[n_tabs], |
5637 | 3.57k | pv_osal_handle, |
5638 | 3.57k | DEP_MNGR_ROW_ROW_SYNC, |
5639 | 3.57k | num_vert_units, |
5640 | 3.57k | num_blks_in_row, |
5641 | 3.57k | i4_num_tile_cols, /* Number of Col Tiles */ |
5642 | 3.57k | i4_num_proc_thrds, |
5643 | 3.57k | i4_sem /*Sem Disabled*/ |
5644 | 3.57k | ); |
5645 | 3.57k | } |
5646 | 3.57k | n_tabs += ihevce_dmgr_get_num_mem_recs(); |
5647 | 3.57k | } |
5648 | 3.57k | } |
5649 | 3.57k | } |
5650 | | |
5651 | 17.8k | for(i = 1; i < 5; i++) |
5652 | 14.2k | { |
5653 | 14.2k | WORD32 i4_log2_trans_size = i + 1; |
5654 | 14.2k | WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth; |
5655 | | |
5656 | 14.2k | ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1; |
5657 | 14.2k | } |
5658 | | |
5659 | 3.57k | ga_trans_shift[0] = ga_trans_shift[1]; |
5660 | | |
5661 | | /* return the handle to caller */ |
5662 | 3.57k | return ((void *)ps_master_ctxt); |
5663 | 3.57k | } |
5664 | | |
5665 | | /*! |
5666 | | ****************************************************************************** |
5667 | | * \if Function name : ihevce_enc_loop_reg_sem_hdls \endif |
5668 | | * |
5669 | | * \brief |
5670 | | * Intialization for ENC_LOOP context state structure . |
5671 | | * |
5672 | | * \param[in] ps_mem_tab : pointer to memory descriptors table |
5673 | | * \param[in] ppv_sem_hdls : Array of semaphore handles |
5674 | | * \param[in] i4_num_proc_thrds : Number of processing threads |
5675 | | * |
5676 | | * \return |
5677 | | * None |
5678 | | * |
5679 | | * \author |
5680 | | * Ittiam |
5681 | | * |
5682 | | ***************************************************************************** |
5683 | | */ |
5684 | | void ihevce_enc_loop_reg_sem_hdls( |
5685 | | void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds) |
5686 | 3.57k | { |
5687 | 3.57k | ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; |
5688 | 3.57k | WORD32 i, enc_frm_id; |
5689 | | |
5690 | 3.57k | ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; |
5691 | | |
5692 | | /*************************************************************************/ |
5693 | | /* --- EncLoop Deblock and SAO sync Dep Mngr reg Semaphores -- */ |
5694 | | /*************************************************************************/ |
5695 | 7.14k | for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++) |
5696 | 3.57k | { |
5697 | 7.14k | for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++) |
5698 | 3.57k | { |
5699 | 3.57k | ihevce_dmgr_reg_sem_hdls( |
5700 | 3.57k | ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i], |
5701 | 3.57k | ppv_sem_hdls, |
5702 | 3.57k | i4_num_proc_thrds); |
5703 | 3.57k | } |
5704 | 3.57k | } |
5705 | | |
5706 | 7.14k | for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++) |
5707 | 3.57k | { |
5708 | 7.14k | for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++) |
5709 | 3.57k | { |
5710 | 3.57k | ihevce_dmgr_reg_sem_hdls( |
5711 | 3.57k | ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][i], |
5712 | 3.57k | ppv_sem_hdls, |
5713 | 3.57k | i4_num_proc_thrds); |
5714 | 3.57k | } |
5715 | 3.57k | } |
5716 | | |
5717 | | /*************************************************************************/ |
5718 | | /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores -- */ |
5719 | | /*************************************************************************/ |
5720 | 7.14k | for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++) |
5721 | 3.57k | { |
5722 | 7.14k | for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++) |
5723 | 3.57k | { |
5724 | 3.57k | ihevce_dmgr_reg_sem_hdls( |
5725 | 3.57k | ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i], |
5726 | 3.57k | ppv_sem_hdls, |
5727 | 3.57k | i4_num_proc_thrds); |
5728 | 3.57k | } |
5729 | 3.57k | } |
5730 | | |
5731 | 3.57k | return; |
5732 | 3.57k | } |
5733 | | |
5734 | | /*! |
5735 | | ****************************************************************************** |
5736 | | * \if Function name : ihevce_enc_loop_delete \endif |
5737 | | * |
5738 | | * \brief |
5739 | | * Destroy EncLoop module |
5740 | | * Note : Only Destroys the resources allocated in the module like |
5741 | | * semaphore,etc. Memory free is done Separately using memtabs |
5742 | | * |
5743 | | * \param[in] pv_me_ctxt : pointer to EncLoop ctxt |
5744 | | * |
5745 | | * \return |
5746 | | * None |
5747 | | * |
5748 | | * \author |
5749 | | * Ittiam |
5750 | | * |
5751 | | ***************************************************************************** |
5752 | | */ |
5753 | | void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt) |
5754 | 3.57k | { |
5755 | 3.57k | ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt; |
5756 | 3.57k | WORD32 ctr, enc_frm_id; |
5757 | | |
5758 | 3.57k | ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; |
5759 | | |
5760 | 7.14k | for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++) |
5761 | 3.57k | { |
5762 | 7.14k | for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++) |
5763 | 3.57k | { |
5764 | | /* --- EncLoop Deblock sync Dep Mngr Delete --*/ |
5765 | 3.57k | ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]); |
5766 | | /* --- EncLoop Sao sync Dep Mngr Delete --*/ |
5767 | 3.57k | ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_sao[enc_frm_id][ctr]); |
5768 | | /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/ |
5769 | 3.57k | ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]); |
5770 | 3.57k | } |
5771 | 3.57k | } |
5772 | 3.57k | } |
5773 | | |
5774 | | /*! |
5775 | | ****************************************************************************** |
5776 | | * \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif |
5777 | | * |
5778 | | * \brief |
5779 | | * Frame level Reset for the Dependency Mngrs local to EncLoop., |
5780 | | * ie CU_TopRight and Dblk |
5781 | | * |
5782 | | * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer |
5783 | | * |
5784 | | * \return |
5785 | | * None |
5786 | | * |
5787 | | * \author |
5788 | | * Ittiam |
5789 | | * |
5790 | | ***************************************************************************** |
5791 | | */ |
5792 | | void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id) |
5793 | 94.8k | { |
5794 | 94.8k | WORD32 ctr, frame_id; |
5795 | 94.8k | ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; |
5796 | | |
5797 | 94.8k | ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; |
5798 | | |
5799 | 94.8k | if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel) |
5800 | 94.8k | { |
5801 | 94.8k | frame_id = 0; |
5802 | 94.8k | } |
5803 | 0 | else |
5804 | 0 | { |
5805 | 0 | frame_id = enc_frm_id; |
5806 | 0 | } |
5807 | | |
5808 | 189k | for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++) |
5809 | 94.8k | { |
5810 | | /* Dep. Mngr : Reset the num ctb Deblocked in every row for ENC sync */ |
5811 | 94.8k | ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]); |
5812 | | |
5813 | | /* Dep. Mngr : Reset the num SAO ctb in every row for ENC sync */ |
5814 | 94.8k | ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_sao[frame_id][ctr]); |
5815 | | |
5816 | | /* Dep. Mngr : Reset the TopRight CU Processed in every row for ENC sync */ |
5817 | 94.8k | ihevce_dmgr_rst_row_row_sync( |
5818 | 94.8k | ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]); |
5819 | 94.8k | } |
5820 | 94.8k | } |
5821 | | |
5822 | | /*! |
5823 | | ****************************************************************************** |
5824 | | * \if Function name : ihevce_enc_loop_frame_init \endif |
5825 | | * |
5826 | | * \brief |
5827 | | * Frame level init of enocde loop function . |
5828 | | * |
5829 | | * \param[in] pv_enc_loop_ctxt : Enc_loop context pointer |
5830 | | * \param[in] pi4_cu_processed : ptr to cur frame cu process in pix. |
5831 | | * \param[in] aps_ref_list : ref pic list for the current frame |
5832 | | * \param[in] ps_slice_hdr : ptr to current slice header params |
5833 | | * \param[in] ps_pps : ptr to active pps params |
5834 | | * \param[in] ps_sps : ptr to active sps params |
5835 | | * \param[in] ps_vps : ptr to active vps params |
5836 | | |
5837 | | |
5838 | | * \param[in] i1_weighted_pred_flag : weighted pred enable flag (unidir) |
5839 | | * \param[in] i1_weighted_bipred_flag : weighted pred enable flag (bidir) |
5840 | | * \param[in] log2_luma_wght_denom : down shift factor for weighted pred of luma |
5841 | | * \param[in] log2_chroma_wght_denom : down shift factor for weighted pred of chroma |
5842 | | * \param[in] cur_poc : currennt frame poc |
5843 | | * \param[in] i4_bitrate_instance_num : number indicating the instance of bit-rate for multi-rate encoder |
5844 | | * |
5845 | | * \return |
5846 | | * None |
5847 | | * |
5848 | | * \author |
5849 | | * Ittiam |
5850 | | * |
5851 | | ***************************************************************************** |
5852 | | */ |
5853 | | void ihevce_enc_loop_frame_init( |
5854 | | void *pv_enc_loop_ctxt, |
5855 | | WORD32 i4_frm_qp, |
5856 | | recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2], |
5857 | | recon_pic_buf_t *ps_frm_recon, |
5858 | | slice_header_t *ps_slice_hdr, |
5859 | | pps_t *ps_pps, |
5860 | | sps_t *ps_sps, |
5861 | | vps_t *ps_vps, |
5862 | | WORD8 i1_weighted_pred_flag, |
5863 | | WORD8 i1_weighted_bipred_flag, |
5864 | | WORD32 log2_luma_wght_denom, |
5865 | | WORD32 log2_chroma_wght_denom, |
5866 | | WORD32 cur_poc, |
5867 | | WORD32 i4_display_num, |
5868 | | enc_ctxt_t *ps_enc_ctxt, |
5869 | | me_enc_rdopt_ctxt_t *ps_curr_inp_prms, |
5870 | | WORD32 i4_bitrate_instance_num, |
5871 | | WORD32 i4_thrd_id, |
5872 | | WORD32 i4_enc_frm_id, |
5873 | | WORD32 i4_num_bitrates, |
5874 | | WORD32 i4_quality_preset, |
5875 | | void *pv_dep_mngr_encloop_dep_me) |
5876 | 94.8k | { |
5877 | | /* local variables */ |
5878 | 94.8k | ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; |
5879 | 94.8k | ihevce_enc_loop_ctxt_t *ps_ctxt; |
5880 | 94.8k | WORD32 chroma_qp_offset, i4_div_factor; |
5881 | 94.8k | WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type; |
5882 | 94.8k | WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag; |
5883 | | |
5884 | | /* ENC_LOOP master state structure */ |
5885 | 94.8k | ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; |
5886 | | |
5887 | | /* Nithya: Store the current POC in the slice header */ |
5888 | 94.8k | ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc; |
5889 | | |
5890 | | /* Update the POC list of the current frame to the recon buffer */ |
5891 | 94.8k | if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0) |
5892 | 94.8k | { |
5893 | 94.8k | int i4_i; |
5894 | 251k | for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++) |
5895 | 157k | { |
5896 | 157k | ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc; |
5897 | 157k | } |
5898 | 94.8k | } |
5899 | 94.8k | if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0) |
5900 | 11.3k | { |
5901 | 11.3k | int i4_i; |
5902 | 24.7k | for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++) |
5903 | 13.3k | { |
5904 | 13.3k | ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc; |
5905 | 13.3k | } |
5906 | 11.3k | } |
5907 | | |
5908 | | /* loop over all the threads */ |
5909 | | // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++) |
5910 | 94.8k | { |
5911 | | /* ENC_LOOP state structure */ |
5912 | 94.8k | ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id]; |
5913 | | |
5914 | | /* SAO ctxt structure initialization*/ |
5915 | 94.8k | ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps; |
5916 | 94.8k | ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps; |
5917 | 94.8k | ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr; |
5918 | | |
5919 | | /*bit-rate instance number for Multi-bitrate (MBR) encode */ |
5920 | 94.8k | ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num; |
5921 | 94.8k | ps_ctxt->i4_num_bitrates = i4_num_bitrates; |
5922 | 94.8k | ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format; |
5923 | 94.8k | ps_ctxt->i4_is_first_query = 1; |
5924 | 94.8k | ps_ctxt->i4_is_ctb_qp_modified = 0; |
5925 | | |
5926 | | /* enc_frm_id for multiframe encode */ |
5927 | | |
5928 | 94.8k | if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel) |
5929 | 94.8k | { |
5930 | 94.8k | ps_ctxt->i4_enc_frm_id = 0; |
5931 | 94.8k | i4_enc_frm_id = 0; |
5932 | 94.8k | } |
5933 | 0 | else |
5934 | 0 | { |
5935 | 0 | ps_ctxt->i4_enc_frm_id = i4_enc_frm_id; |
5936 | 0 | } |
5937 | | |
5938 | | /*Initialize the sub pic rc buf appropriately */ |
5939 | | |
5940 | | /*Set the thrd id flag */ |
5941 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5942 | 94.8k | .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1; |
5943 | | |
5944 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5945 | 94.8k | .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5946 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5947 | 94.8k | .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5948 | | |
5949 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5950 | 94.8k | .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5951 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5952 | 94.8k | .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5953 | | |
5954 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5955 | 94.8k | .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5956 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5957 | 94.8k | .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5958 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5959 | 94.8k | .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5960 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5961 | 94.8k | .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5962 | 94.8k | ps_enc_ctxt->s_multi_thrd |
5963 | 94.8k | .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0; |
5964 | 94.8k | ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] = |
5965 | 94.8k | i4_frm_qp; |
5966 | | |
5967 | | /*Frame level data for Sub Pic rc is initalized here */ |
5968 | | /*Can be sent once per frame*/ |
5969 | 94.8k | { |
5970 | 94.8k | WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert * |
5971 | 94.8k | ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz; |
5972 | | |
5973 | | /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/ |
5974 | 94.8k | ps_ctxt->u4_total_cu_bits = 0; |
5975 | 94.8k | ps_ctxt->u4_total_cu_hdr_bits = 0; |
5976 | | |
5977 | 94.8k | ps_ctxt->u4_cu_tot_bits_into_qscale = 0; |
5978 | 94.8k | ps_ctxt->u4_cu_tot_bits = 0; |
5979 | 94.8k | ps_ctxt->u4_total_cu_bits_mul_qs = 0; |
5980 | 94.8k | ps_ctxt->i4_display_num = i4_display_num; |
5981 | 94.8k | ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled; |
5982 | | /*The Qscale is to be generated every 10th of total frame ctb is completed */ |
5983 | | //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ; |
5984 | 94.8k | ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100; |
5985 | | |
5986 | 94.8k | ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR); |
5987 | | /*Sub Pic RC frame level params */ |
5988 | 94.8k | ps_ctxt->i8_frame_l1_ipe_sad = |
5989 | 94.8k | ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad; |
5990 | 94.8k | ps_ctxt->i8_frame_l0_ipe_satd = |
5991 | 94.8k | ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd; |
5992 | 94.8k | ps_ctxt->i8_frame_l1_me_sad = |
5993 | 94.8k | ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad; |
5994 | 94.8k | ps_ctxt->i8_frame_l1_activity_fact = |
5995 | 94.8k | ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact; |
5996 | 94.8k | if(ps_ctxt->i4_sub_pic_level_rc) |
5997 | 0 | { |
5998 | 0 | ASSERT( |
5999 | 0 | ps_curr_inp_prms->ps_curr_inp->s_lap_out |
6000 | 0 | .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0); |
6001 | |
|
6002 | 0 | ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id] |
6003 | 0 | [ps_ctxt->i4_bitrate_instance_num] = |
6004 | 0 | ps_curr_inp_prms->ps_curr_inp->s_lap_out |
6005 | 0 | .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num]; |
6006 | 0 | } |
6007 | | //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1; |
6008 | | |
6009 | 94.8k | ps_ctxt->i4_is_I_scenecut = |
6010 | 94.8k | ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) && |
6011 | 0 | (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME || |
6012 | 0 | ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME)); |
6013 | | |
6014 | 94.8k | ps_ctxt->i4_is_non_I_scenecut = |
6015 | 94.8k | ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) && |
6016 | 0 | (ps_ctxt->i4_is_I_scenecut == 0)); |
6017 | | |
6018 | | /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd; |
6019 | | ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/ |
6020 | 94.8k | ps_ctxt->i4_is_model_valid = |
6021 | 94.8k | ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid; |
6022 | 94.8k | } |
6023 | | /* cb and cr offsets are assumed to be same */ |
6024 | 94.8k | chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset; |
6025 | | |
6026 | | /* assumption of cb = cr qp */ |
6027 | 94.8k | ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset); |
6028 | 94.8k | ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset); |
6029 | | |
6030 | 94.8k | ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0); |
6031 | | |
6032 | 94.8k | ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8; |
6033 | | |
6034 | 94.8k | ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth; |
6035 | 94.8k | ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type; |
6036 | | |
6037 | | /*remember chroma qp offset as qp related parameters are calculated at CU level*/ |
6038 | 94.8k | ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset; |
6039 | 94.8k | ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag; |
6040 | 94.8k | ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag; |
6041 | | |
6042 | 94.8k | ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic; |
6043 | 94.8k | ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id; |
6044 | 94.8k | ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER; |
6045 | 94.8k | ps_ctxt->i4_use_const_lamda_modifier = |
6046 | 94.8k | ps_ctxt->i4_use_const_lamda_modifier || |
6047 | 94.8k | ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & |
6048 | 94.8k | (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) && |
6049 | 0 | ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & |
6050 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) || |
6051 | 0 | (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & |
6052 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) || |
6053 | 0 | (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & |
6054 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) || |
6055 | 0 | (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet & |
6056 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3)))); |
6057 | | |
6058 | 94.8k | { |
6059 | 94.8k | ps_ctxt->f_i_pic_lamda_modifier = |
6060 | 94.8k | ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier; |
6061 | 94.8k | } |
6062 | | |
6063 | 94.8k | ps_ctxt->i4_frame_qp = i4_frm_qp; |
6064 | 94.8k | ps_ctxt->i4_frame_mod_qp = i4_frm_qp; |
6065 | 94.8k | ps_ctxt->i4_cu_qp = i4_frm_qp; |
6066 | 94.8k | ps_ctxt->i4_prev_cu_qp = i4_frm_qp; |
6067 | 94.8k | ps_ctxt->i4_chrm_cu_qp = |
6068 | 94.8k | (ps_ctxt->u1_chroma_array_type == 2) |
6069 | 94.8k | ? MIN(i4_frm_qp + chroma_qp_offset, 51) |
6070 | 94.8k | : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET]; |
6071 | | |
6072 | 94.8k | ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; |
6073 | 94.8k | i4_div_factor = (i4_frm_qp + 3) / 6; |
6074 | 94.8k | i4_div_factor = CLIP3(i4_div_factor, 3, 6); |
6075 | 94.8k | ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; |
6076 | | |
6077 | 94.8k | ps_ctxt->i4_chrm_cu_qp_div6 = |
6078 | 94.8k | (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; |
6079 | 94.8k | ps_ctxt->i4_chrm_cu_qp_mod6 = |
6080 | 94.8k | (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; |
6081 | | |
6082 | 94.8k | #define INTER_RND_QP_BY_6 |
6083 | 94.8k | #ifdef INTER_RND_QP_BY_6 |
6084 | | |
6085 | 94.8k | { /*1/6 rounding for 8 bit b frames*/ |
6086 | 94.8k | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85 |
6087 | 94.8k | /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/; |
6088 | 94.8k | } |
6089 | | #else |
6090 | | /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */ |
6091 | | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3; |
6092 | | #endif |
6093 | | |
6094 | 94.8k | if(ISLICE == i1_slice_type) |
6095 | 31.0k | { |
6096 | | /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */ |
6097 | 31.0k | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171 |
6098 | 31.0k | /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/; |
6099 | 31.0k | } |
6100 | 63.8k | else |
6101 | 63.8k | { |
6102 | | /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */ |
6103 | 63.8k | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = |
6104 | 63.8k | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER]; |
6105 | | /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */ |
6106 | 63.8k | } |
6107 | | |
6108 | 94.8k | ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag; |
6109 | | |
6110 | 94.8k | ps_ctxt->i1_slice_type = i1_slice_type; |
6111 | | |
6112 | | /* intialize the inter pred (MC) context at frame level */ |
6113 | 94.8k | ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list; |
6114 | 94.8k | ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag; |
6115 | 94.8k | ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag; |
6116 | 94.8k | ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom; |
6117 | 94.8k | ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom; |
6118 | | |
6119 | | /* intialize the MV pred context at frame level */ |
6120 | 94.8k | ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list; |
6121 | 94.8k | ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr; |
6122 | 94.8k | ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps; |
6123 | 94.8k | ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 = |
6124 | 94.8k | ps_pps->i1_log2_parallel_merge_level - 2; |
6125 | | |
6126 | 94.8k | #if ADAPT_COLOCATED_FROM_L0_FLAG |
6127 | 94.8k | if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag) |
6128 | 74.5k | { |
6129 | 74.5k | if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) && |
6130 | 11.3k | (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp < |
6131 | 11.3k | ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp)) |
6132 | 3.27k | { |
6133 | 3.27k | ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1; |
6134 | 3.27k | } |
6135 | 74.5k | } |
6136 | 94.8k | #endif |
6137 | | /* Initialization of deblocking params */ |
6138 | 94.8k | ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2; |
6139 | 94.8k | ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2; |
6140 | | |
6141 | 94.8k | ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset; |
6142 | | |
6143 | 94.8k | ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset; |
6144 | | /*init frame level stat accumualtion parameters */ |
6145 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6146 | 94.8k | ->u4_frame_sad_acc = 0; |
6147 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6148 | 94.8k | ->u4_frame_intra_sad_acc = 0; |
6149 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6150 | 94.8k | ->u4_frame_open_loop_intra_sad = 0; |
6151 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6152 | 94.8k | ->i8_frame_open_loop_ssd = 0; |
6153 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6154 | 94.8k | ->u4_frame_inter_sad_acc = 0; |
6155 | | |
6156 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6157 | 94.8k | ->i8_frame_cost_acc = 0; |
6158 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6159 | 94.8k | ->i8_frame_intra_cost_acc = 0; |
6160 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6161 | 94.8k | ->i8_frame_inter_cost_acc = 0; |
6162 | | |
6163 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6164 | 94.8k | ->u4_frame_intra_sad = 0; |
6165 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6166 | 94.8k | ->u4_frame_rdopt_bits = 0; |
6167 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6168 | 94.8k | ->u4_frame_rdopt_header_bits = 0; |
6169 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6170 | 94.8k | ->i4_qp_normalized_8x8_cu_sum[0] = 0; |
6171 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6172 | 94.8k | ->i4_qp_normalized_8x8_cu_sum[1] = 0; |
6173 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6174 | 94.8k | ->i4_8x8_cu_sum[0] = 0; |
6175 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6176 | 94.8k | ->i4_8x8_cu_sum[1] = 0; |
6177 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6178 | 94.8k | ->i8_sad_by_qscale[0] = 0; |
6179 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num] |
6180 | 94.8k | ->i8_sad_by_qscale[1] = 0; |
6181 | | /* Compute the frame_qstep */ |
6182 | 94.8k | GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep); |
6183 | | |
6184 | 94.8k | ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter; |
6185 | | |
6186 | 94.8k | ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant; |
6187 | | /* intialize the cabac rdopt context at frame level */ |
6188 | 94.8k | ihevce_entropy_rdo_frame_init( |
6189 | 94.8k | &ps_ctxt->s_rdopt_entropy_ctxt, |
6190 | 94.8k | ps_slice_hdr, |
6191 | 94.8k | ps_pps, |
6192 | 94.8k | ps_sps, |
6193 | 94.8k | ps_vps, |
6194 | 94.8k | ps_master_ctxt->au1_cu_skip_top_row, |
6195 | 94.8k | &ps_enc_ctxt->s_rc_quant); |
6196 | | |
6197 | | /* register the dep mngr instance for forward ME sync */ |
6198 | 94.8k | ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me; |
6199 | 94.8k | } |
6200 | 94.8k | } |
6201 | | /* |
6202 | | ****************************************************************************** |
6203 | | * \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif |
6204 | | * |
6205 | | * \brief |
6206 | | * returns Nil |
6207 | | * |
6208 | | * \param[in] pv_enc_loop_ctxt : pointer to encode loop context |
6209 | | * \param[out]ps_rc_prms : ptr to frame level info structure |
6210 | | * |
6211 | | * \return |
6212 | | * None |
6213 | | * |
6214 | | * \author |
6215 | | * Ittiam |
6216 | | * |
6217 | | ***************************************************************************** |
6218 | | */ |
6219 | | void ihevce_enc_loop_get_frame_rc_prms( |
6220 | | void *pv_enc_loop_ctxt, |
6221 | | rc_bits_sad_t *ps_rc_prms, |
6222 | | WORD32 i4_br_id, //bitrate instance id |
6223 | | WORD32 i4_enc_frm_id) // frame id |
6224 | 94.8k | { |
6225 | | /*Get the master thread pointer*/ |
6226 | 94.8k | ihevce_enc_loop_master_ctxt_t *ps_master_ctxt; |
6227 | 94.8k | ihevce_enc_loop_ctxt_t *ps_ctxt; |
6228 | 94.8k | UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0; |
6229 | 94.8k | LWORD64 i8_total_ssd_frame = 0; |
6230 | 94.8k | UWORD32 total_frame_sad = 0; |
6231 | 94.8k | UWORD32 total_frame_rdopt_bits = 0; |
6232 | 94.8k | UWORD32 total_frame_rdopt_header_bits = 0; |
6233 | 94.8k | WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 }; |
6234 | 94.8k | WORD32 i4_8x8_cu_sum[2] = { 0, 0 }; |
6235 | 94.8k | LWORD64 i8_sad_by_qscale[2] = { 0, 0 }; |
6236 | 94.8k | WORD32 i4_curr_qp_acc = 0; |
6237 | 94.8k | WORD32 i; |
6238 | | |
6239 | | /* ENC_LOOP master state structure */ |
6240 | 94.8k | ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt; |
6241 | | |
6242 | 94.8k | if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel) |
6243 | 94.8k | { |
6244 | 94.8k | i4_enc_frm_id = 0; |
6245 | 94.8k | } |
6246 | | /*loop through all threads and accumulate intra sad across all threads*/ |
6247 | 189k | for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++) |
6248 | 94.8k | { |
6249 | | /* ENC_LOOP state structure */ |
6250 | 94.8k | ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i]; |
6251 | 94.8k | total_frame_open_loop_intra_sad += |
6252 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad; |
6253 | 94.8k | i8_total_ssd_frame += |
6254 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd; |
6255 | 94.8k | total_frame_intra_sad += |
6256 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad; |
6257 | 94.8k | total_frame_sad += |
6258 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc; |
6259 | 94.8k | total_frame_rdopt_bits += |
6260 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits; |
6261 | 94.8k | total_frame_rdopt_header_bits += |
6262 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits; |
6263 | 94.8k | i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id] |
6264 | 94.8k | ->i4_qp_normalized_8x8_cu_sum[0]; |
6265 | 94.8k | i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id] |
6266 | 94.8k | ->i4_qp_normalized_8x8_cu_sum[1]; |
6267 | 94.8k | i4_8x8_cu_sum[0] += |
6268 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0]; |
6269 | 94.8k | i4_8x8_cu_sum[1] += |
6270 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1]; |
6271 | 94.8k | i8_sad_by_qscale[0] += |
6272 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0]; |
6273 | 94.8k | i8_sad_by_qscale[1] += |
6274 | 94.8k | ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1]; |
6275 | 94.8k | } |
6276 | | |
6277 | 94.8k | ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad; |
6278 | 94.8k | ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame; |
6279 | 94.8k | ps_rc_prms->u4_total_sad = total_frame_sad; |
6280 | 94.8k | ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits; |
6281 | 94.8k | ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits; |
6282 | | /*This accumulation of intra frame sad is not intact. This can only be a temp change*/ |
6283 | 94.8k | ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad; |
6284 | 94.8k | ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0]; |
6285 | 94.8k | ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1]; |
6286 | 94.8k | ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0]; |
6287 | 94.8k | ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1]; |
6288 | 94.8k | ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0]; |
6289 | 94.8k | ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1]; |
6290 | 94.8k | } |