/src/libhevc/encoder/ihevce_ipe_pass.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*! |
22 | | ****************************************************************************** |
23 | | * \file ihevce_ipe_pass.c |
24 | | * |
25 | | * \brief |
26 | | * This file contains interface functions of Intra Prediction Estimation |
27 | | * module |
28 | | * \date |
29 | | * 18/09/2012 |
30 | | * |
31 | | * \author |
32 | | * Ittiam |
33 | | * |
34 | | * |
35 | | * List of Functions |
36 | | * |
37 | | * |
38 | | ****************************************************************************** |
39 | | */ |
40 | | |
41 | | /*****************************************************************************/ |
42 | | /* File Includes */ |
43 | | /*****************************************************************************/ |
44 | | /* System include files */ |
45 | | #include <stdio.h> |
46 | | #include <string.h> |
47 | | #include <stdlib.h> |
48 | | #include <assert.h> |
49 | | #include <stdarg.h> |
50 | | #include <math.h> |
51 | | |
52 | | /* User include files */ |
53 | | #include "ihevc_typedefs.h" |
54 | | #include "itt_video_api.h" |
55 | | #include "ihevce_api.h" |
56 | | |
57 | | #include "rc_cntrl_param.h" |
58 | | #include "rc_frame_info_collector.h" |
59 | | #include "rc_look_ahead_params.h" |
60 | | |
61 | | #include "ihevc_debug.h" |
62 | | #include "ihevc_defs.h" |
63 | | #include "ihevc_structs.h" |
64 | | #include "ihevc_platform_macros.h" |
65 | | #include "ihevc_deblk.h" |
66 | | #include "ihevc_itrans_recon.h" |
67 | | #include "ihevc_chroma_itrans_recon.h" |
68 | | #include "ihevc_chroma_intra_pred.h" |
69 | | #include "ihevc_intra_pred.h" |
70 | | #include "ihevc_inter_pred.h" |
71 | | #include "ihevc_mem_fns.h" |
72 | | #include "ihevc_padding.h" |
73 | | #include "ihevc_weighted_pred.h" |
74 | | #include "ihevc_sao.h" |
75 | | #include "ihevc_resi_trans.h" |
76 | | #include "ihevc_quant_iquant_ssd.h" |
77 | | #include "ihevc_cabac_tables.h" |
78 | | #include "ihevc_quant_tables.h" |
79 | | |
80 | | #include "ihevce_defs.h" |
81 | | #include "ihevce_hle_interface.h" |
82 | | #include "ihevce_lap_enc_structs.h" |
83 | | #include "ihevce_multi_thrd_structs.h" |
84 | | #include "ihevce_multi_thrd_funcs.h" |
85 | | #include "ihevce_me_common_defs.h" |
86 | | #include "ihevce_had_satd.h" |
87 | | #include "ihevce_error_codes.h" |
88 | | #include "ihevce_bitstream.h" |
89 | | #include "ihevce_cabac.h" |
90 | | #include "ihevce_rdoq_macros.h" |
91 | | #include "ihevce_function_selector.h" |
92 | | #include "ihevce_enc_structs.h" |
93 | | #include "ihevce_entropy_structs.h" |
94 | | #include "ihevce_cmn_utils_instr_set_router.h" |
95 | | #include "ihevce_enc_loop_structs.h" |
96 | | #include "ihevce_inter_pred.h" |
97 | | #include "ihevc_weighted_pred.h" |
98 | | #include "ihevce_ipe_instr_set_router.h" |
99 | | #include "ihevce_ipe_structs.h" |
100 | | #include "ihevce_ipe_pass.h" |
101 | | #include "ihevce_decomp_pre_intra_structs.h" |
102 | | #include "ihevce_decomp_pre_intra_pass.h" |
103 | | #include "ihevce_recur_bracketing.h" |
104 | | #include "ihevce_nbr_avail.h" |
105 | | #include "ihevce_global_tables.h" |
106 | | #include "ihevc_resi_trans.h" |
107 | | |
108 | | #include "cast_types.h" |
109 | | #include "osal.h" |
110 | | #include "osal_defaults.h" |
111 | | |
112 | | /*****************************************************************************/ |
113 | | /* Global Tables */ |
114 | | /*****************************************************************************/ |
115 | | |
116 | | /** |
117 | | ****************************************************************************** |
118 | | * @brief Look up table for choosing the appropriate function for |
119 | | * Intra prediction |
120 | | * |
121 | | * @remarks Same look up table enums are used for luma & chroma but each |
122 | | * have seperate functions implemented |
123 | | ****************************************************************************** |
124 | | */ |
125 | | WORD32 g_i4_ipe_funcs[MAX_NUM_IP_MODES] = { |
126 | | IPE_FUNC_MODE_0, /* Mode 0 */ |
127 | | IPE_FUNC_MODE_1, /* Mode 1 */ |
128 | | IPE_FUNC_MODE_2, /* Mode 2 */ |
129 | | IPE_FUNC_MODE_3TO9, /* Mode 3 */ |
130 | | IPE_FUNC_MODE_3TO9, /* Mode 4 */ |
131 | | IPE_FUNC_MODE_3TO9, /* Mode 5 */ |
132 | | IPE_FUNC_MODE_3TO9, /* Mode 6 */ |
133 | | IPE_FUNC_MODE_3TO9, /* Mode 7 */ |
134 | | IPE_FUNC_MODE_3TO9, /* Mode 8 */ |
135 | | IPE_FUNC_MODE_3TO9, /* Mode 9 */ |
136 | | IPE_FUNC_MODE_10, /* Mode 10 */ |
137 | | IPE_FUNC_MODE_11TO17, /* Mode 11 */ |
138 | | IPE_FUNC_MODE_11TO17, /* Mode 12 */ |
139 | | IPE_FUNC_MODE_11TO17, /* Mode 13 */ |
140 | | IPE_FUNC_MODE_11TO17, /* Mode 14 */ |
141 | | IPE_FUNC_MODE_11TO17, /* Mode 15 */ |
142 | | IPE_FUNC_MODE_11TO17, /* Mode 16 */ |
143 | | IPE_FUNC_MODE_11TO17, /* Mode 17 */ |
144 | | IPE_FUNC_MODE_18_34, /* Mode 18 */ |
145 | | IPE_FUNC_MODE_19TO25, /* Mode 19 */ |
146 | | IPE_FUNC_MODE_19TO25, /* Mode 20 */ |
147 | | IPE_FUNC_MODE_19TO25, /* Mode 21 */ |
148 | | IPE_FUNC_MODE_19TO25, /* Mode 22 */ |
149 | | IPE_FUNC_MODE_19TO25, /* Mode 23 */ |
150 | | IPE_FUNC_MODE_19TO25, /* Mode 24 */ |
151 | | IPE_FUNC_MODE_19TO25, /* Mode 25 */ |
152 | | IPE_FUNC_MODE_26, /* Mode 26 */ |
153 | | IPE_FUNC_MODE_27TO33, /* Mode 27 */ |
154 | | IPE_FUNC_MODE_27TO33, /* Mode 26 */ |
155 | | IPE_FUNC_MODE_27TO33, /* Mode 29 */ |
156 | | IPE_FUNC_MODE_27TO33, /* Mode 30 */ |
157 | | IPE_FUNC_MODE_27TO33, /* Mode 31 */ |
158 | | IPE_FUNC_MODE_27TO33, /* Mode 32 */ |
159 | | IPE_FUNC_MODE_27TO33, /* Mode 33 */ |
160 | | IPE_FUNC_MODE_18_34, /* Mode 34 */ |
161 | | }; |
162 | | |
163 | | /** |
164 | | ****************************************************************************** |
165 | | * @brief Look up table for deciding whether to use original samples or |
166 | | * filtered reference samples for Intra prediction |
167 | | * |
168 | | * @remarks This table has the flags for transform size of 8, 16 and 32 |
169 | | * Input is log2nT - 3 and intra prediction mode |
170 | | ****************************************************************************** |
171 | | */ |
172 | | UWORD8 gau1_ipe_filter_flag[3][MAX_NUM_IP_MODES] = { |
173 | | { 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
174 | | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }, |
175 | | { 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, |
176 | | 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, |
177 | | { 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, |
178 | | 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1 } |
179 | | }; |
180 | | |
181 | | /*****************************************************************************/ |
182 | | /* Function Definitions */ |
183 | | /*****************************************************************************/ |
184 | | |
185 | | /*! |
186 | | ****************************************************************************** |
187 | | * \if Function name : ihevce_ipe_recompute_lambda_from_min_8x8_act_in_ctb \endif |
188 | | * |
189 | | * \brief |
190 | | * This function recomputes lambda using min 8x8 act in CTB |
191 | | * |
192 | | * \author |
193 | | * Ittiam |
194 | | * |
195 | | * \return |
196 | | * Nothing |
197 | | * |
198 | | ****************************************************************************** |
199 | | */ |
200 | | void ihevce_ipe_recompute_lambda_from_min_8x8_act_in_ctb( |
201 | | ihevce_ipe_ctxt_t *ps_ctxt, ihevce_ed_ctb_l1_t *ps_ed_ctb_l1) |
202 | 74.9k | { |
203 | 74.9k | WORD32 i4_cu_qp = 0; |
204 | 74.9k | #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON |
205 | 74.9k | WORD32 i4_activity; |
206 | 74.9k | #endif |
207 | 74.9k | WORD32 i4_qscale; |
208 | 74.9k | WORD32 i4_curr_satd; |
209 | 74.9k | long double ld_avg_satd; |
210 | | |
211 | | #if LAMDA_BASED_ON_QUANT |
212 | | i4_curr_satd = ps_ed_ctb_l1->i4_32x32_satd[0][2]; |
213 | | i8_avg_satd = ps_ctxt->i8_curr_frame_32x32_avg_act[2]; |
214 | | #else |
215 | 74.9k | i4_curr_satd = ps_ed_ctb_l1->i4_32x32_satd[0][3]; |
216 | 74.9k | ld_avg_satd = 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0]; |
217 | 74.9k | #endif |
218 | | |
219 | 74.9k | if(ps_ctxt->i4_l0ipe_qp_mod) |
220 | 74.9k | { |
221 | 74.9k | #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON |
222 | 74.9k | i4_cu_qp = ihevce_cu_level_qp_mod( |
223 | 74.9k | ps_ctxt->i4_qscale, |
224 | 74.9k | i4_curr_satd, |
225 | 74.9k | ld_avg_satd, |
226 | 74.9k | ps_ctxt->f_strength, |
227 | 74.9k | &i4_activity, |
228 | 74.9k | &i4_qscale, |
229 | 74.9k | ps_ctxt->ps_rc_quant_ctxt); |
230 | 74.9k | #endif |
231 | 74.9k | } |
232 | 74.9k | ihevce_get_ipe_ol_cu_lambda_prms(ps_ctxt, i4_cu_qp); |
233 | 74.9k | } |
234 | | /*! |
235 | | ****************************************************************************** |
236 | | * \if Function name : ihevce_ipe_pass_satd \endif |
237 | | * |
238 | | * \brief |
239 | | * This function calcuates the SATD for a given size and returns the value |
240 | | * |
241 | | * \date |
242 | | * 18/09/2012 |
243 | | * |
244 | | * \author |
245 | | * Ittiam |
246 | | * |
247 | | * \return |
248 | | * |
249 | | * List of Functions |
250 | | * |
251 | | ****************************************************************************** |
252 | | */ |
253 | | UWORD32 ihevce_ipe_pass_satd(WORD16 *pi2_coeff, WORD32 coeff_stride, WORD32 trans_size) |
254 | 8.37M | { |
255 | 8.37M | WORD32 i, j, satd; |
256 | | |
257 | 8.37M | satd = 0; |
258 | | |
259 | | /* run a loop and find the satd by doing ABS */ |
260 | 41.8M | for(i = 0; i < trans_size; i++) |
261 | 33.4M | { |
262 | 167M | for(j = 0; j < trans_size; j++) |
263 | 133M | { |
264 | 133M | satd += abs(*pi2_coeff++); |
265 | 133M | } |
266 | | /* row level update */ |
267 | 33.4M | pi2_coeff += coeff_stride - trans_size; |
268 | 33.4M | } |
269 | | |
270 | 8.37M | { |
271 | 8.37M | WORD32 transform_shift; |
272 | 8.37M | WORD32 log2_trans_size; |
273 | | |
274 | 8.37M | GETRANGE(log2_trans_size, trans_size); |
275 | 8.37M | log2_trans_size -= 1; |
276 | 8.37M | transform_shift = MAX_TR_DYNAMIC_RANGE - BIT_DEPTH - log2_trans_size; |
277 | 8.37M | satd >>= transform_shift; |
278 | 8.37M | } |
279 | | |
280 | 8.37M | return (satd); |
281 | 8.37M | } |
282 | | |
283 | | /*! |
284 | | ****************************************************************************** |
285 | | * \if Function name : ihevce_ipe_get_num_mem_recs \endif |
286 | | * |
287 | | * \brief |
288 | | * Number of memory records are returned for IPE module |
289 | | * |
290 | | * |
291 | | * \return |
292 | | * None |
293 | | * |
294 | | * \author |
295 | | * Ittiam |
296 | | * |
297 | | ***************************************************************************** |
298 | | */ |
299 | | WORD32 ihevce_ipe_get_num_mem_recs(void) |
300 | 15.1k | { |
301 | 15.1k | return (NUM_IPE_MEM_RECS); |
302 | 15.1k | } |
303 | | |
304 | | /*! |
305 | | ****************************************************************************** |
306 | | * \if Function name : ihevce_ipe_get_mem_recs \endif |
307 | | * |
308 | | * \brief |
309 | | * Memory requirements are returned for IPE. |
310 | | * |
311 | | * \param[in,out] ps_mem_tab : pointer to memory descriptors table |
312 | | * \param[in] ps_init_prms : Create time static parameters |
313 | | * \param[in] i4_num_proc_thrds : Number of processing threads for this module |
314 | | * \param[in] i4_mem_space : memspace in whihc memory request should be done |
315 | | * |
316 | | * \return |
317 | | * None |
318 | | * |
319 | | * \author |
320 | | * Ittiam |
321 | | * |
322 | | ***************************************************************************** |
323 | | */ |
324 | | WORD32 |
325 | | ihevce_ipe_get_mem_recs(iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space) |
326 | 7.59k | { |
327 | | /* memories should be requested assuming worst case requirememnts */ |
328 | | |
329 | | /* Module context structure */ |
330 | 7.59k | ps_mem_tab[IPE_CTXT].i4_mem_size = sizeof(ihevce_ipe_master_ctxt_t); |
331 | | |
332 | 7.59k | ps_mem_tab[IPE_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
333 | | |
334 | 7.59k | ps_mem_tab[IPE_CTXT].i4_mem_alignment = 8; |
335 | | |
336 | | /* Threads ctxt structure */ |
337 | 7.59k | ps_mem_tab[IPE_THRDS_CTXT].i4_mem_size = i4_num_proc_thrds * sizeof(ihevce_ipe_ctxt_t); |
338 | | |
339 | 7.59k | ps_mem_tab[IPE_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
340 | | |
341 | 7.59k | ps_mem_tab[IPE_THRDS_CTXT].i4_mem_alignment = 32; |
342 | | |
343 | 7.59k | return (NUM_IPE_MEM_RECS); |
344 | 7.59k | } |
345 | | |
346 | | /*! |
347 | | ****************************************************************************** |
348 | | * \if Function name : ihevce_ipe_init \endif |
349 | | * |
350 | | * \brief |
351 | | * Intialization for IPE context state structure . |
352 | | * |
353 | | * \param[in] ps_mem_tab : pointer to memory descriptors table |
354 | | * \param[in] ps_init_prms : Create time static parameters |
355 | | * |
356 | | * \return |
357 | | * None |
358 | | * |
359 | | * \author |
360 | | * Ittiam |
361 | | * |
362 | | ***************************************************************************** |
363 | | */ |
364 | | void *ihevce_ipe_init( |
365 | | iv_mem_rec_t *ps_mem_tab, |
366 | | ihevce_static_cfg_params_t *ps_init_prms, |
367 | | WORD32 i4_num_proc_thrds, |
368 | | WORD32 i4_ref_id, |
369 | | func_selector_t *ps_func_selector, |
370 | | rc_quant_t *ps_rc_quant_ctxt, |
371 | | WORD32 i4_resolution_id, |
372 | | UWORD8 u1_is_popcnt_available) |
373 | 7.59k | { |
374 | 7.59k | WORD32 i4_thrds; |
375 | 7.59k | UWORD32 u4_width, u4_ctb_in_a_row; |
376 | | // WORD32 i4_ctr; |
377 | 7.59k | ihevce_ipe_master_ctxt_t *ps_master_ctxt; |
378 | 7.59k | ihevce_ipe_ctxt_t *ps_ctxt; |
379 | | |
380 | | /* IPE master state structure */ |
381 | 7.59k | ps_master_ctxt = (ihevce_ipe_master_ctxt_t *)ps_mem_tab[IPE_CTXT].pv_base; |
382 | | |
383 | 7.59k | ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds; |
384 | | |
385 | 7.59k | ps_ctxt = (ihevce_ipe_ctxt_t *)ps_mem_tab[IPE_THRDS_CTXT].pv_base; |
386 | | |
387 | 7.59k | ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt; |
388 | | |
389 | | /*width of the input YUV to be encoded. */ |
390 | 7.59k | u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width; |
391 | | /*making the width a multiple of CTB size*/ |
392 | 7.59k | u4_width += SET_CTB_ALIGN( |
393 | 7.59k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE); |
394 | | |
395 | 7.59k | u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE); |
396 | | |
397 | | /* perform all one initialisation here */ |
398 | 15.1k | for(i4_thrds = 0; i4_thrds < ps_master_ctxt->i4_num_proc_thrds; i4_thrds++) |
399 | 7.59k | { |
400 | 7.59k | ps_master_ctxt->aps_ipe_thrd_ctxt[i4_thrds] = ps_ctxt; |
401 | | |
402 | | /* initialise the CU and TU sizes */ |
403 | 7.59k | ps_ctxt->u1_ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size); |
404 | 7.59k | ps_ctxt->u1_min_cu_size = (1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size); |
405 | 7.59k | ps_ctxt->u1_min_tu_size = (1 << ps_init_prms->s_config_prms.i4_min_log2_tu_size); |
406 | | |
407 | | /** Register the function selector pointer*/ |
408 | 7.59k | ps_ctxt->ps_func_selector = ps_func_selector; |
409 | | |
410 | | /* Initiailize the encoder quality preset */ |
411 | | /* IPE algorithm is controlled based on this preset */ |
412 | 7.59k | ps_ctxt->i4_quality_preset = |
413 | 7.59k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset; |
414 | | |
415 | 7.59k | if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7) |
416 | 417 | { |
417 | 417 | ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6; |
418 | 417 | } |
419 | | |
420 | | /* initialise all the pointer to start of arrays */ |
421 | 7.59k | ps_ctxt->ps_ipe_cu_tree = &ps_ctxt->as_ipe_cu_tree[0]; |
422 | | |
423 | | /* initialize QP */ |
424 | 7.59k | ps_ctxt->i1_QP = |
425 | 7.59k | ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].ai4_frame_qp[i4_ref_id]; |
426 | 7.59k | ps_ctxt->u1_num_b_frames = |
427 | 7.59k | (1 << ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers) - 1; |
428 | | |
429 | 7.59k | ps_ctxt->b_sad_type = IPE_SAD_TYPE; |
430 | 7.59k | ps_ctxt->u1_ipe_step_size = IPE_STEP_SIZE; |
431 | | |
432 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_0] = |
433 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_planar_fptr; |
434 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_1] = |
435 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_dc_fptr; |
436 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_2] = |
437 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode2_fptr; |
438 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_3TO9] = |
439 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr; |
440 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_10] = |
441 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_horz_fptr; |
442 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_11TO17] = |
443 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr; |
444 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_18_34] = |
445 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr; |
446 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_19TO25] = |
447 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr; |
448 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_26] = |
449 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ver_fptr; |
450 | 7.59k | ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_27TO33] = |
451 | 7.59k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr; |
452 | | |
453 | | /* nbr parameters initialization */ |
454 | | /* perform all one initialisation here */ |
455 | | |
456 | 7.59k | ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8; |
457 | | |
458 | 7.59k | ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0]; |
459 | | |
460 | | /* move the pointer to 1,2 location */ |
461 | 7.59k | ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd; |
462 | 7.59k | ps_ctxt->pu1_ctb_nbr_map++; |
463 | 7.59k | ps_ctxt->i4_l0ipe_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1; |
464 | 7.59k | ps_ctxt->i4_pass = ps_init_prms->s_pass_prms.i4_pass; |
465 | 7.59k | if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0) |
466 | 4.70k | { |
467 | | /* initialise the scale & rescale matricies */ |
468 | 4.70k | ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
469 | 4.70k | ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
470 | 4.70k | ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0]; |
471 | 4.70k | ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0]; |
472 | 4.70k | ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0]; |
473 | | /*init for inter matrix*/ |
474 | 4.70k | ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
475 | 4.70k | ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
476 | 4.70k | ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0]; |
477 | 4.70k | ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0]; |
478 | 4.70k | ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0]; |
479 | | |
480 | | /*init for rescale matrix*/ |
481 | 4.70k | ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
482 | 4.70k | ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
483 | 4.70k | ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0]; |
484 | 4.70k | ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0]; |
485 | 4.70k | ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0]; |
486 | | /*init for rescale inter matrix*/ |
487 | 4.70k | ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
488 | 4.70k | ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
489 | 4.70k | ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0]; |
490 | 4.70k | ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0]; |
491 | 4.70k | ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0]; |
492 | 4.70k | } |
493 | 2.88k | else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1) |
494 | 2.88k | { |
495 | | /* initialise the scale & rescale matricies */ |
496 | 2.88k | ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
497 | 2.88k | ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
498 | 2.88k | ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0]; |
499 | 2.88k | ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0]; |
500 | 2.88k | ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0]; |
501 | | /*init for inter matrix*/ |
502 | 2.88k | ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
503 | 2.88k | ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0]; |
504 | 2.88k | ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0]; |
505 | 2.88k | ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0]; |
506 | 2.88k | ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0]; |
507 | | |
508 | | /*init for rescale matrix*/ |
509 | 2.88k | ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
510 | 2.88k | ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
511 | 2.88k | ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0]; |
512 | 2.88k | ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0]; |
513 | 2.88k | ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0]; |
514 | | /*init for rescale inter matrix*/ |
515 | 2.88k | ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
516 | 2.88k | ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0]; |
517 | 2.88k | ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0]; |
518 | 2.88k | ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0]; |
519 | 2.88k | ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0]; |
520 | 2.88k | } |
521 | 0 | else |
522 | 0 | { |
523 | 0 | ASSERT(0); |
524 | 0 | } |
525 | | |
526 | 7.59k | ps_ctxt->u1_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth; |
527 | | |
528 | | /** |
529 | | * Initialize the intra prediction modes map for the CTB to INTRA_DC |
530 | | **/ |
531 | 7.59k | { |
532 | 7.59k | WORD32 row, col; |
533 | 136k | for(row = 0; row < (MAX_TU_ROW_IN_CTB + 1); row++) |
534 | 2.32M | for(col = 0; col < (MAX_TU_COL_IN_CTB + 1); col++) |
535 | 2.19M | ps_ctxt->au1_ctb_mode_map[row][col] = INTRA_DC; |
536 | 7.59k | } |
537 | | |
538 | 7.59k | ihevce_cmn_utils_instr_set_router( |
539 | 7.59k | &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type); |
540 | | |
541 | 7.59k | ihevce_ipe_instr_set_router( |
542 | 7.59k | &ps_ctxt->s_ipe_optimised_function_list, ps_init_prms->e_arch_type); |
543 | | |
544 | | /* increment the thread ctxt pointer */ |
545 | 7.59k | ps_ctxt++; |
546 | 7.59k | } |
547 | | |
548 | | /* return the handle to caller */ |
549 | 7.59k | return ((void *)ps_master_ctxt); |
550 | 7.59k | } |
551 | | /*! |
552 | | ****************************************************************************** |
553 | | * \if Function name : ihevce_ipe_get_frame_intra_satd_cost \endif |
554 | | * |
555 | | * \brief |
556 | | * Function to export frame-level accumalated SATD . |
557 | | * |
558 | | * \param[in] pv_ctxt : pointer to IPE module |
559 | | * |
560 | | * \return |
561 | | * None |
562 | | * |
563 | | * \author |
564 | | * Ittiam |
565 | | * |
566 | | ***************************************************************************** |
567 | | */ |
568 | | LWORD64 ihevce_ipe_get_frame_intra_satd_cost( |
569 | | void *pv_ctxt, |
570 | | LWORD64 *pi8_frame_satd_by_qpmod, |
571 | | LWORD64 *pi8_frame_acc_mode_bits_cost, |
572 | | LWORD64 *pi8_frame_acc_activity_factor, |
573 | | LWORD64 *pi8_frame_l0_acc_satd) |
574 | 128k | { |
575 | 128k | WORD32 i4_thrds; |
576 | | |
577 | 128k | ihevce_ipe_master_ctxt_t *ps_master_ctxt; |
578 | 128k | ihevce_ipe_ctxt_t *ps_ctxt; |
579 | 128k | LWORD64 i8_frame_acc_satd_cost = 0; |
580 | 128k | LWORD64 i8_frame_acc_satd = 0; |
581 | 128k | LWORD64 i8_frame_satd_by_qpmod = 0; |
582 | 128k | LWORD64 i8_frame_acc_mode_bits_cost = 0; |
583 | 128k | LWORD64 i8_frame_acc_activity_factor = 0; |
584 | | /* IPE master state structure */ |
585 | 128k | ps_master_ctxt = (ihevce_ipe_master_ctxt_t *)pv_ctxt; |
586 | | |
587 | | /* perform all one initialisation here */ |
588 | 256k | for(i4_thrds = 0; i4_thrds < ps_master_ctxt->i4_num_proc_thrds; i4_thrds++) |
589 | 128k | { |
590 | 128k | ps_ctxt = ps_master_ctxt->aps_ipe_thrd_ctxt[i4_thrds]; |
591 | | |
592 | 128k | i8_frame_acc_satd_cost += ps_ctxt->i8_frame_acc_satd_cost; |
593 | 128k | i8_frame_satd_by_qpmod += (ps_ctxt->i8_frame_acc_satd_by_modqp_q10 >> SATD_BY_ACT_Q_FAC); |
594 | 128k | i8_frame_acc_mode_bits_cost += ps_ctxt->i8_frame_acc_mode_bits_cost; |
595 | | |
596 | 128k | i8_frame_acc_activity_factor += ps_ctxt->i8_frame_acc_act_factor; |
597 | | |
598 | 128k | i8_frame_acc_satd += ps_ctxt->i8_frame_acc_satd; |
599 | 128k | } |
600 | 128k | *pi8_frame_satd_by_qpmod = i8_frame_satd_by_qpmod; |
601 | | |
602 | 128k | *pi8_frame_acc_mode_bits_cost = i8_frame_acc_mode_bits_cost; |
603 | | |
604 | 128k | *pi8_frame_acc_activity_factor = i8_frame_acc_activity_factor; |
605 | | |
606 | 128k | *pi8_frame_l0_acc_satd = i8_frame_acc_satd; |
607 | | |
608 | 128k | return (i8_frame_acc_satd_cost); |
609 | 128k | } |
610 | | |
611 | | /** |
612 | | ******************************************************************************* |
613 | | * \if Function name : ihevce_intra_pred_ref_filtering \endif |
614 | | * |
615 | | * \brief |
616 | | * Intra prediction interpolation filter for ref_filtering for Encoder |
617 | | * |
618 | | * \par Description: |
619 | | * Reference DC filtering for neighboring samples dependent on TU size and |
620 | | * mode Refer to section 8.4.4.2.3 in the standard |
621 | | * |
622 | | * \param[in] pu1_src pointer to the source |
623 | | * \param[out] pu1_dst pointer to the destination |
624 | | * \param[in] nt integer Transform Block size |
625 | | * |
626 | | * \returns |
627 | | * none |
628 | | * |
629 | | * \author |
630 | | * Ittiam |
631 | | * |
632 | | ******************************************************************************* |
633 | | */ |
634 | | |
635 | | #if IHEVCE_INTRA_REF_FILTERING == C |
636 | | void ihevce_intra_pred_ref_filtering(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst) |
637 | 15.7M | { |
638 | 15.7M | WORD32 i; /* Generic indexing variable */ |
639 | 15.7M | WORD32 four_nt = 4 * nt; |
640 | | |
641 | | /* Extremities Untouched*/ |
642 | 15.7M | pu1_dst[0] = pu1_src[0]; |
643 | 15.7M | pu1_dst[4 * nt] = pu1_src[4 * nt]; |
644 | | /* Perform bilinear filtering of Reference Samples */ |
645 | 667M | for(i = 0; i < (four_nt - 1); i++) |
646 | 651M | { |
647 | 651M | pu1_dst[i + 1] = (pu1_src[i] + 2 * pu1_src[i + 1] + pu1_src[i + 2] + 2) >> 2; |
648 | 651M | } |
649 | 15.7M | } |
650 | | #endif |
651 | | |
652 | | /*! |
653 | | ****************************************************************************** |
654 | | * \if Function name : ihevce_ipe_process_ctb \endif |
655 | | * |
656 | | * \brief |
657 | | * CTB level IPE function |
658 | | * |
659 | | * \param[in] pv_ctxt : pointer to IPE module |
660 | | * \param[in] ps_frm_ctb_prms : CTB characteristics parameters |
661 | | * \param[in] ps_curr_src : pointer to input yuv buffer (row buffer) |
662 | | * \param[out] ps_ctb_out : pointer to CTB analyse output structure (row buffer) |
663 | | * \param[out] ps_row_cu : pointer to CU analyse output structure (row buffer) |
664 | | * |
665 | | * \return |
666 | | * None |
667 | | * |
668 | | * Note : This function will receive CTB pointers which may point to |
669 | | * blocks of CTB size or smaller (at the right and bottom edges of the picture) |
670 | | * This function recursively creates smaller square partitions and passes them |
671 | | * on for intra processing estimation |
672 | | * |
673 | | * \author |
674 | | * Ittiam |
675 | | * |
676 | | ***************************************************************************** |
677 | | */ |
678 | | void ihevce_ipe_process_ctb( |
679 | | ihevce_ipe_ctxt_t *ps_ctxt, |
680 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
681 | | iv_enc_yuv_buf_t *ps_curr_src, |
682 | | ihevce_ipe_cu_tree_t *ps_curr_ctb_node, |
683 | | ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb, |
684 | | ctb_analyse_t *ps_ctb_out, |
685 | | //cu_analyse_t *ps_row_cu, |
686 | | ihevce_ed_blk_t *ps_ed_l1_ctb, |
687 | | ihevce_ed_blk_t *ps_ed_l2_ctb, |
688 | | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1) |
689 | 235k | { |
690 | | /* reset the map buffer to 0*/ |
691 | 235k | memset( |
692 | 235k | &ps_ctxt->au1_nbr_ctb_map[0][0], |
693 | 235k | 0, |
694 | 235k | (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8)); |
695 | | |
696 | | /* set the CTB neighbour availability flags */ |
697 | 235k | ihevce_set_ctb_nbr( |
698 | 235k | &ps_ctxt->s_ctb_nbr_avail_flags, |
699 | 235k | ps_ctxt->pu1_ctb_nbr_map, |
700 | 235k | ps_ctxt->i4_nbr_map_strd, |
701 | 235k | ps_ctxt->u2_ctb_num_in_row, |
702 | 235k | ps_ctxt->u2_ctb_row_num, |
703 | 235k | ps_frm_ctb_prms); |
704 | | |
705 | | /* IPE cu and mode decision */ |
706 | 235k | ihevce_bracketing_analysis( |
707 | 235k | ps_ctxt, |
708 | 235k | ps_curr_ctb_node, |
709 | 235k | ps_curr_src, |
710 | 235k | ps_ctb_out, |
711 | | //ps_row_cu, |
712 | 235k | ps_ed_l1_ctb, |
713 | 235k | ps_ed_l2_ctb, |
714 | 235k | ps_ed_ctb_l1, |
715 | 235k | ps_l0_ipe_out_ctb); |
716 | | |
717 | 235k | return; |
718 | 235k | } |
719 | | |
720 | | /*! |
721 | | ****************************************************************************** |
722 | | * \if Function name : ihevce_ipe_process_row \endif |
723 | | * |
724 | | * \brief |
725 | | * Row level IPE function |
726 | | * |
727 | | * \param[in] pv_ctxt : pointer to IPE module |
728 | | * \param[in] ps_frm_ctb_prms : CTB characteristics parameters |
729 | | * \param[in] ps_curr_src : pointer to input yuv buffer (row buffer) |
730 | | * \param[out] ps_ctb_out : pointer to CTB analyse output structure (row buffer) |
731 | | * \param[out] ps_cu_out : pointer to CU analyse output structure (row buffer) |
732 | | *\param[out] pi4_num_ctbs_cur_row : pointer to store the number of ctbs processed in current row |
733 | | *\param[in] pi4_num_ctbs_top_row : pointer to check the number of ctbs processed in top row |
734 | | * |
735 | | * \return |
736 | | * None |
737 | | * |
738 | | * Note : Currently the frame level calculations done assumes that |
739 | | * framewidth of the input are excat multiple of ctbsize |
740 | | * |
741 | | * \author |
742 | | * Ittiam |
743 | | * |
744 | | ***************************************************************************** |
745 | | */ |
746 | | void ihevce_ipe_process_row( |
747 | | ihevce_ipe_ctxt_t *ps_ctxt, |
748 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
749 | | iv_enc_yuv_buf_t *ps_curr_src, |
750 | | ipe_l0_ctb_analyse_for_me_t *ps_ipe_ctb_out_row, |
751 | | ctb_analyse_t *ps_ctb_out, |
752 | | //cu_analyse_t *ps_row_cu, |
753 | | ihevce_ed_blk_t *ps_ed_l1_row, |
754 | | ihevce_ed_blk_t *ps_ed_l2_row, |
755 | | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row, |
756 | | WORD32 blk_inc_ctb_l1, |
757 | | WORD32 blk_inc_ctb_l2) |
758 | 142k | { |
759 | | /* local variables */ |
760 | 142k | UWORD16 ctb_ctr; |
761 | 142k | iv_enc_yuv_buf_t s_curr_src_bufs; |
762 | 142k | ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb; |
763 | 142k | UWORD16 u2_pic_wdt; |
764 | 142k | UWORD16 u2_pic_hgt; |
765 | 142k | ihevce_ed_blk_t *ps_ed_l1_ctb; |
766 | 142k | ihevce_ed_blk_t *ps_ed_l2_ctb; |
767 | 142k | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1; |
768 | | |
769 | 142k | UWORD8 u1_ctb_size; |
770 | | |
771 | 142k | u2_pic_wdt = ps_frm_ctb_prms->i4_cu_aligned_pic_wd; |
772 | 142k | u2_pic_hgt = ps_frm_ctb_prms->i4_cu_aligned_pic_ht; |
773 | | |
774 | 142k | u1_ctb_size = ps_ctxt->u1_ctb_size; |
775 | | |
776 | | /* ----------------------------------------------------- */ |
777 | | /* store the stride and dimensions of source */ |
778 | | /* buffer pointers will be over written at every CTB row */ |
779 | | /* ----------------------------------------------------- */ |
780 | 142k | memcpy(&s_curr_src_bufs, ps_curr_src, sizeof(iv_enc_yuv_buf_t)); |
781 | 142k | ps_l0_ipe_out_ctb = ps_ipe_ctb_out_row; |
782 | | |
783 | | /* --------- Loop over all the CTBs in a row --------------- */ |
784 | 378k | for(ctb_ctr = 0; ctb_ctr < ps_frm_ctb_prms->i4_num_ctbs_horz; ctb_ctr++) |
785 | 235k | { |
786 | | //UWORD8 num_cus_in_ctb; |
787 | | |
788 | 235k | UWORD8 *pu1_tmp; |
789 | | |
790 | | /* Create pointer to ctb node */ |
791 | 235k | ihevce_ipe_cu_tree_t *ps_ctb_node; |
792 | | |
793 | 235k | WORD32 nbr_flags; |
794 | | |
795 | 235k | WORD32 row; |
796 | | /* luma src */ |
797 | 235k | pu1_tmp = (UWORD8 *)ps_curr_src->pv_y_buf; |
798 | 235k | pu1_tmp += (ctb_ctr * ps_frm_ctb_prms->i4_ctb_size); |
799 | | |
800 | 235k | s_curr_src_bufs.pv_y_buf = pu1_tmp; |
801 | | |
802 | | /* Cb & CR pixel interleaved src */ |
803 | 235k | pu1_tmp = (UWORD8 *)ps_curr_src->pv_u_buf; |
804 | 235k | pu1_tmp += (ctb_ctr * (ps_frm_ctb_prms->i4_ctb_size >> 1)); |
805 | | |
806 | 235k | s_curr_src_bufs.pv_u_buf = pu1_tmp; |
807 | | |
808 | | /* Store the number of current ctb within row in the context */ |
809 | 235k | ps_ctxt->u2_ctb_num_in_row = ctb_ctr; |
810 | | |
811 | | /* Initialize number of coding units in ctb to 0 */ |
812 | 235k | ps_ctb_out->u1_num_cus_in_ctb = 0; |
813 | | /* Initialize split flag to 0 - No partition */ |
814 | 235k | ps_ctb_out->u4_cu_split_flags = 0; |
815 | | /* store the cu pointer for current ctb out */ |
816 | | //ps_ctb_out->ps_coding_units_in_ctb = ps_row_cu; |
817 | | |
818 | | /* Initialize the CTB parameters at the root node level */ |
819 | 235k | ps_ctb_node = ps_ctxt->ps_ipe_cu_tree; |
820 | 235k | ps_ctb_node->ps_parent = NULL; |
821 | 235k | ps_ctb_node->u1_depth = 0; |
822 | 235k | ps_ctb_node->u1_cu_size = u1_ctb_size; |
823 | 235k | ps_ctb_node->u2_x0 = 0; |
824 | 235k | ps_ctb_node->u2_y0 = 0; |
825 | | |
826 | 235k | ps_ctb_node->u2_orig_x = ctb_ctr * ps_ctb_node->u1_cu_size; |
827 | 235k | ps_ctb_node->u2_orig_y = ps_ctxt->u2_ctb_row_num * ps_ctb_node->u1_cu_size; |
828 | | |
829 | 235k | ps_ctb_node->u1_width = u1_ctb_size; |
830 | 235k | ps_ctb_node->u1_height = u1_ctb_size; |
831 | 235k | #if !(PIC_ALIGN_CTB_SIZE) |
832 | 235k | if(ps_ctxt->u2_ctb_num_in_row == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1)) |
833 | 142k | { |
834 | 142k | ps_ctb_node->u1_width = u2_pic_wdt - (ps_ctxt->u2_ctb_num_in_row) * (u1_ctb_size); |
835 | 142k | } |
836 | 235k | if(ps_ctxt->u2_ctb_row_num == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) |
837 | 155k | { |
838 | 155k | ps_ctb_node->u1_height = u2_pic_hgt - (ps_ctxt->u2_ctb_row_num) * (u1_ctb_size); |
839 | 155k | } |
840 | 235k | #endif |
841 | | |
842 | 235k | switch(ps_ctb_node->u1_cu_size) |
843 | 235k | { |
844 | 235k | case 64: |
845 | 235k | ps_ctb_node->u1_log2_nt = 6; |
846 | 235k | ps_ctb_node->u1_part_flag_pos = 0; |
847 | 235k | break; |
848 | 0 | case 32: |
849 | 0 | ps_ctb_node->u1_log2_nt = 5; |
850 | 0 | ps_ctb_node->u1_part_flag_pos = 4; |
851 | 0 | break; |
852 | 0 | case 16: |
853 | 0 | ps_ctb_node->u1_log2_nt = 4; |
854 | 0 | ps_ctb_node->u1_part_flag_pos = 8; |
855 | 0 | break; |
856 | 235k | } |
857 | | |
858 | | /* Set neighbor flags for the CTB */ |
859 | 235k | nbr_flags = 0; |
860 | | |
861 | 235k | if(ps_ctxt->u2_ctb_num_in_row != 0) |
862 | 92.3k | { |
863 | 92.3k | nbr_flags |= LEFT_FLAG; /* Set Left Flag if not in first column */ |
864 | 92.3k | ps_ctb_node->u1_num_left_avail = ((u2_pic_hgt - ps_ctb_node->u2_orig_y) >= u1_ctb_size) |
865 | 92.3k | ? u1_ctb_size |
866 | 92.3k | : u2_pic_hgt - ps_ctb_node->u2_orig_y; |
867 | 92.3k | } |
868 | 142k | else |
869 | 142k | { |
870 | 142k | ps_ctb_node->u1_num_left_avail = 0; |
871 | 142k | } |
872 | | |
873 | 235k | if((ps_ctxt->u2_ctb_num_in_row != 0) && (ps_ctxt->u2_ctb_row_num != 0)) |
874 | 60.0k | nbr_flags |= TOP_LEFT_FLAG; /* Set Top-Left Flag if not in first row or first column */ |
875 | | |
876 | 235k | if(ps_ctxt->u2_ctb_row_num != 0) |
877 | 79.3k | { |
878 | 79.3k | nbr_flags |= TOP_FLAG; /* Set Top Flag if not in first row */ |
879 | 79.3k | ps_ctb_node->u1_num_top_avail = ((u2_pic_wdt - ps_ctb_node->u2_orig_x) >= u1_ctb_size) |
880 | 79.3k | ? u1_ctb_size |
881 | 79.3k | : u2_pic_wdt - ps_ctb_node->u2_orig_x; |
882 | 79.3k | } |
883 | 155k | else |
884 | 155k | { |
885 | 155k | ps_ctb_node->u1_num_top_avail = 0; |
886 | 155k | } |
887 | | |
888 | 235k | if(ps_ctxt->u2_ctb_row_num != 0) |
889 | 79.3k | { |
890 | 79.3k | if(ps_ctxt->u2_ctb_num_in_row == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1)) |
891 | 19.3k | ps_ctb_node->u1_num_top_right_avail = 0; |
892 | 60.0k | else |
893 | 60.0k | { |
894 | 60.0k | ps_ctb_node->u1_num_top_right_avail = |
895 | 60.0k | ((u2_pic_wdt - ps_ctb_node->u2_orig_x - u1_ctb_size) >= u1_ctb_size) |
896 | 60.0k | ? u1_ctb_size |
897 | 60.0k | : u2_pic_wdt - ps_ctb_node->u2_orig_x - u1_ctb_size; |
898 | 60.0k | nbr_flags |= |
899 | 60.0k | TOP_RIGHT_FLAG; /* Set Top-Right Flag if not in first row or last column*/ |
900 | 60.0k | } |
901 | 79.3k | } |
902 | 155k | else |
903 | 155k | { |
904 | 155k | ps_ctb_node->u1_num_top_right_avail = 0; |
905 | 155k | } |
906 | | |
907 | 235k | ps_ctb_node->u1_num_bottom_left_avail = 0; |
908 | | |
909 | 235k | ps_ctb_node->i4_nbr_flag = nbr_flags; |
910 | | |
911 | | /** |
912 | | * Update CTB Mode Map |
913 | | * In case this is first CTB in a row, set left most column to INTRA_DC (NA) |
914 | | * else copy last column to first column |
915 | | **/ |
916 | 235k | if(ctb_ctr == 0) |
917 | 142k | { |
918 | 2.57M | for(row = 0; row < (MAX_TU_ROW_IN_CTB + 1); row++) |
919 | 2.42M | { |
920 | 2.42M | ps_ctxt->au1_ctb_mode_map[row][0] = INTRA_DC; |
921 | 2.42M | } |
922 | 142k | } |
923 | 92.3k | else |
924 | 92.3k | { |
925 | 1.66M | for(row = 0; row < (MAX_TU_ROW_IN_CTB + 1); row++) |
926 | 1.56M | { |
927 | 1.56M | ps_ctxt->au1_ctb_mode_map[row][0] = |
928 | 1.56M | ps_ctxt->au1_ctb_mode_map[row][MAX_TU_COL_IN_CTB]; |
929 | 1.56M | } |
930 | 92.3k | } |
931 | | |
932 | | /* --------- IPE call at CTB level ------------------ */ |
933 | | |
934 | | /* IPE CTB function is expected to Decide on the CUs sizes */ |
935 | | /* and populate the best intra prediction modes and TX flags*/ |
936 | | /* Interface of this CTb level function is kept open */ |
937 | | |
938 | 235k | ps_ed_l1_ctb = ps_ed_l1_row + ctb_ctr * blk_inc_ctb_l1; |
939 | 235k | ps_ed_l2_ctb = ps_ed_l2_row + ctb_ctr * blk_inc_ctb_l2; |
940 | 235k | ps_ed_ctb_l1 = ps_ed_ctb_l1_row + ctb_ctr; |
941 | | |
942 | 235k | if(ps_ctxt->u1_use_lambda_derived_from_min_8x8_act_in_ctb) |
943 | 74.9k | { |
944 | 74.9k | ihevce_ipe_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_ed_ctb_l1); |
945 | 74.9k | } |
946 | | |
947 | 235k | ihevce_ipe_process_ctb( |
948 | 235k | ps_ctxt, |
949 | 235k | ps_frm_ctb_prms, |
950 | 235k | &s_curr_src_bufs, |
951 | 235k | ps_ctb_node, |
952 | 235k | ps_l0_ipe_out_ctb, |
953 | 235k | ps_ctb_out, |
954 | | //ps_row_cu, |
955 | 235k | ps_ed_l1_ctb, |
956 | 235k | ps_ed_l2_ctb, |
957 | 235k | ps_ed_ctb_l1); |
958 | | |
959 | | /* -------------- ctb level updates ----------------- */ |
960 | | |
961 | 235k | ps_l0_ipe_out_ctb++; |
962 | | //num_cus_in_ctb = ps_ctb_out->u1_num_cus_in_ctb; |
963 | | |
964 | | //ps_row_cu += num_cus_in_ctb; |
965 | | |
966 | 235k | ps_ctb_out++; |
967 | 235k | } |
968 | 142k | return; |
969 | 142k | } |
970 | | |
971 | | /*! |
972 | | ****************************************************************************** |
973 | | * \if Function name : ihevce_ipe_process \endif |
974 | | * |
975 | | * \brief |
976 | | * Frame level IPE function |
977 | | * |
978 | | * \param[in] pv_ctxt : pointer to IPE module |
979 | | * \param[in] ps_frm_ctb_prms : CTB characteristics parameters |
980 | | * \param[in] ps_inp : pointer to input yuv buffer (frame buffer) |
981 | | * \param[out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer) |
982 | | * \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer) |
983 | | * |
984 | | * \return |
985 | | * None |
986 | | * |
987 | | * Note : Currently the frame level calculations done assumes that |
988 | | * framewidth of the input are excat multiple of ctbsize |
989 | | * |
990 | | * \author |
991 | | * Ittiam |
992 | | * |
993 | | ***************************************************************************** |
994 | | */ |
995 | | void ihevce_ipe_process( |
996 | | void *pv_ctxt, |
997 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
998 | | frm_lambda_ctxt_t *ps_frm_lamda, |
999 | | ihevce_lap_enc_buf_t *ps_curr_inp, |
1000 | | pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc, |
1001 | | ctb_analyse_t *ps_ctb_out, |
1002 | | //cu_analyse_t *ps_cu_out, |
1003 | | ipe_l0_ctb_analyse_for_me_t *ps_ipe_ctb_out, |
1004 | | void *pv_multi_thrd_ctxt, |
1005 | | WORD32 slice_type, |
1006 | | ihevce_ed_blk_t *ps_ed_pic_l1, |
1007 | | ihevce_ed_blk_t *ps_ed_pic_l2, |
1008 | | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_pic, |
1009 | | WORD32 thrd_id, |
1010 | | WORD32 i4_ping_pong) |
1011 | 123k | { |
1012 | | /* local variables */ |
1013 | 123k | ihevce_ipe_master_ctxt_t *ps_master_ctxt; |
1014 | 123k | iv_enc_yuv_buf_t *ps_inp = &ps_curr_inp->s_lap_out.s_input_buf; |
1015 | 123k | ihevce_ipe_ctxt_t *ps_ctxt; |
1016 | 123k | iv_enc_yuv_buf_t s_curr_src_bufs; |
1017 | 123k | WORD32 end_of_frame; |
1018 | | |
1019 | 123k | ihevce_ed_blk_t *ps_ed_l1_row; |
1020 | 123k | ihevce_ed_blk_t *ps_ed_l2_row; |
1021 | 123k | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row; |
1022 | 123k | WORD32 blk_inc_ctb_l1 = 0; |
1023 | 123k | WORD32 blk_inc_ctb_l2 = 0; |
1024 | | |
1025 | | /* Layer 1 pre intra analysis related initilization. |
1026 | | * Compute no of 8x8 blks in the ctb which which is |
1027 | | * same as no of 4x4 blks in the ctb in layer 1 */ |
1028 | 123k | blk_inc_ctb_l1 = ps_frm_ctb_prms->i4_ctb_size >> 3; |
1029 | 123k | blk_inc_ctb_l1 = blk_inc_ctb_l1 * blk_inc_ctb_l1; |
1030 | | |
1031 | | /* Layer 2 pre intra analysis related initilization. |
1032 | | * Compute no of 16x16 blks in the ctb which which is |
1033 | | * same as no of 8x8 blks in the ctb in layer 2 */ |
1034 | 123k | blk_inc_ctb_l2 = ps_frm_ctb_prms->i4_ctb_size >> 4; |
1035 | 123k | blk_inc_ctb_l2 = blk_inc_ctb_l2 * blk_inc_ctb_l2; |
1036 | | |
1037 | | /* ----------------------------------------------------- */ |
1038 | | /* store the stride and dimensions of source */ |
1039 | | /* buffer pointers will be over written at every CTB row */ |
1040 | | /* ----------------------------------------------------- */ |
1041 | 123k | memcpy(&s_curr_src_bufs, ps_inp, sizeof(iv_enc_yuv_buf_t)); |
1042 | | |
1043 | 123k | ps_master_ctxt = (ihevce_ipe_master_ctxt_t *)pv_ctxt; |
1044 | 123k | ps_ctxt = ps_master_ctxt->aps_ipe_thrd_ctxt[thrd_id]; |
1045 | 123k | end_of_frame = 0; |
1046 | | |
1047 | 123k | if(ISLICE == slice_type) |
1048 | 42.6k | { |
1049 | 42.6k | ps_ctxt->b_sad_type = IPE_SAD_TYPE; |
1050 | 42.6k | ps_ctxt->i4_ol_satd_lambda = ps_frm_lamda->i4_ol_satd_lambda_qf; |
1051 | 42.6k | ps_ctxt->i4_ol_sad_lambda = ps_frm_lamda->i4_ol_sad_lambda_qf; |
1052 | 42.6k | } |
1053 | 80.9k | else |
1054 | 80.9k | { |
1055 | 80.9k | ps_ctxt->b_sad_type = IPE_SAD_TYPE; /* SAD */ |
1056 | 80.9k | ps_ctxt->i4_ol_satd_lambda = ps_frm_lamda->i4_ol_satd_lambda_qf; |
1057 | 80.9k | ps_ctxt->i4_ol_sad_lambda = ps_frm_lamda->i4_ol_sad_lambda_qf; |
1058 | 80.9k | } |
1059 | | |
1060 | 123k | ihevce_populate_ipe_ol_cu_lambda_prms( |
1061 | 123k | (void *)ps_ctxt, |
1062 | 123k | ps_frm_lamda, |
1063 | 123k | slice_type, |
1064 | 123k | ps_curr_inp->s_lap_out.i4_temporal_lyr_id, |
1065 | 123k | IPE_LAMBDA_TYPE); |
1066 | | |
1067 | | /* register the slice type in the ctxt */ |
1068 | 123k | ps_ctxt->i4_slice_type = slice_type; |
1069 | | |
1070 | | /** Frame-levelSATD cost accumalator init to 0 */ |
1071 | 123k | ps_ctxt->i8_frame_acc_satd_cost = 0; |
1072 | | |
1073 | | /** Frame-levelSATD accumalator init to 0 */ |
1074 | 123k | ps_ctxt->i8_frame_acc_satd = 0; |
1075 | | |
1076 | | /** Frame-level Activity factor accumalator init to 1 */ |
1077 | 123k | ps_ctxt->i8_frame_acc_act_factor = 1; |
1078 | | |
1079 | | /** Frame-levelMode Bits cost accumalator init to 0 */ |
1080 | 123k | ps_ctxt->i8_frame_acc_mode_bits_cost = 0; |
1081 | | |
1082 | | /** Frame -level SATD/qp acc init to 0*/ |
1083 | 123k | ps_ctxt->i8_frame_acc_satd_by_modqp_q10 = 0; |
1084 | | |
1085 | | /* ------------ Loop over all the CTB rows --------------- */ |
1086 | 390k | while(0 == end_of_frame) |
1087 | 266k | { |
1088 | 266k | UWORD8 *pu1_tmp; |
1089 | 266k | WORD32 vert_ctr; |
1090 | | //cu_analyse_t *ps_row_cu; |
1091 | 266k | ctb_analyse_t *ps_ctb_out_row; |
1092 | 266k | job_queue_t *ps_job; |
1093 | 266k | ipe_l0_ctb_analyse_for_me_t *ps_ipe_ctb_out_row; |
1094 | | |
1095 | | /* Get the current row from the job queue */ |
1096 | 266k | ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job( |
1097 | 266k | pv_multi_thrd_ctxt, IPE_JOB_LYR0, 1, i4_ping_pong); |
1098 | | |
1099 | | /* If all rows are done, set the end of process flag to 1, */ |
1100 | | /* and the current row to -1 */ |
1101 | 266k | if(NULL == ps_job) |
1102 | 123k | { |
1103 | 123k | vert_ctr = -1; |
1104 | 123k | end_of_frame = 1; |
1105 | 123k | } |
1106 | 142k | else |
1107 | 142k | { |
1108 | 142k | ASSERT(IPE_JOB_LYR0 == ps_job->i4_pre_enc_task_type); |
1109 | | |
1110 | | /* Obtain the current row's details from the job */ |
1111 | 142k | vert_ctr = ps_job->s_job_info.s_ipe_job_info.i4_ctb_row_no; |
1112 | | //DBG_PRINTF("IPE PASS : Thread id %d, Vert Ctr %d\n",thrd_id,vert_ctr); |
1113 | | |
1114 | | /* Update the ipe context with current row number */ |
1115 | 142k | ps_ctxt->u2_ctb_row_num = vert_ctr; |
1116 | | |
1117 | | /* derive the current ctb row pointers */ |
1118 | | |
1119 | | /* luma src */ |
1120 | 142k | pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf; |
1121 | 142k | pu1_tmp += (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_inp->i4_y_strd); |
1122 | | |
1123 | 142k | s_curr_src_bufs.pv_y_buf = pu1_tmp; |
1124 | | |
1125 | | /* Cb & CR pixel interleaved src */ |
1126 | 142k | pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf; |
1127 | 142k | pu1_tmp += (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> 1) * ps_inp->i4_uv_strd); |
1128 | | |
1129 | 142k | s_curr_src_bufs.pv_u_buf = pu1_tmp; |
1130 | | |
1131 | | /* row intra analyse cost buffer */ |
1132 | 142k | ps_ipe_ctb_out_row = ps_ipe_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
1133 | | |
1134 | | /* row ctb out structure */ |
1135 | 142k | ps_ctb_out_row = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
1136 | | |
1137 | | /* call the row level processing function */ |
1138 | 142k | ps_ed_l1_row = |
1139 | 142k | ps_ed_pic_l1 + ps_frm_ctb_prms->i4_num_ctbs_horz * blk_inc_ctb_l1 * vert_ctr; |
1140 | 142k | ps_ed_l2_row = |
1141 | 142k | ps_ed_pic_l2 + ps_frm_ctb_prms->i4_num_ctbs_horz * blk_inc_ctb_l2 * vert_ctr; |
1142 | 142k | ps_ed_ctb_l1_row = ps_ed_ctb_l1_pic + ps_frm_ctb_prms->i4_num_ctbs_horz * vert_ctr; |
1143 | 142k | ihevce_ipe_process_row( |
1144 | 142k | ps_ctxt, |
1145 | 142k | ps_frm_ctb_prms, |
1146 | 142k | &s_curr_src_bufs, |
1147 | 142k | ps_ipe_ctb_out_row, |
1148 | 142k | ps_ctb_out_row, |
1149 | | //ps_row_cu, |
1150 | 142k | ps_ed_l1_row, |
1151 | 142k | ps_ed_l2_row, |
1152 | 142k | ps_ed_ctb_l1_row, |
1153 | 142k | blk_inc_ctb_l1, |
1154 | 142k | blk_inc_ctb_l2); |
1155 | | |
1156 | 142k | memset( |
1157 | 142k | ps_ed_l1_row, |
1158 | 142k | 0, |
1159 | 142k | ps_frm_ctb_prms->i4_num_ctbs_horz * blk_inc_ctb_l1 * sizeof(ihevce_ed_blk_t)); |
1160 | 142k | memset( |
1161 | 142k | ps_ed_l2_row, |
1162 | 142k | 0, |
1163 | 142k | ps_frm_ctb_prms->i4_num_ctbs_horz * blk_inc_ctb_l2 * sizeof(ihevce_ed_blk_t)); |
1164 | | |
1165 | | /* set the output dependency */ |
1166 | 142k | ihevce_pre_enc_grp_job_set_out_dep(pv_multi_thrd_ctxt, ps_job, i4_ping_pong); |
1167 | 142k | } |
1168 | 266k | } |
1169 | | |
1170 | | /* EIID: Print stat regarding how many 16x16 blocks are skipped in the frame, valid for single thread only */ |
1171 | | //DBG_PRINTF("num_16x16_analyze_skipped: %d\n",ps_ctxt->u4_num_16x16_skips_at_L0_IPE); |
1172 | | |
1173 | 123k | return; |
1174 | 123k | } |
1175 | | |
1176 | | /*! |
1177 | | ****************************************************************************** |
1178 | | * \if Function name : ihevce_get_frame_lambda_prms \endif |
1179 | | * |
1180 | | * \brief |
1181 | | * Function whihc calculates the Lambda params for current picture |
1182 | | * |
1183 | | * \param[in] ps_enc_ctxt : encoder ctxt pointer |
1184 | | * \param[in] ps_cur_pic_ctxt : current pic ctxt |
1185 | | * \param[in] i4_cur_frame_qp : current pic QP |
1186 | | * \param[in] first_field : is first field flag |
1187 | | * \param[in] i4_temporal_lyr_id : Current picture layer id |
1188 | | * |
1189 | | * \return |
1190 | | * None |
1191 | | * |
1192 | | * \author |
1193 | | * Ittiam |
1194 | | * |
1195 | | ***************************************************************************** |
1196 | | */ |
1197 | | void ihevce_get_ipe_ol_cu_lambda_prms(void *pv_ctxt, WORD32 i4_cur_cu_qp) |
1198 | 74.9k | { |
1199 | 74.9k | ihevce_ipe_ctxt_t *ps_ctxt = (ihevce_ipe_ctxt_t *)pv_ctxt; |
1200 | | //WORD32 chroma_qp = gau1_ihevc_chroma_qp_scale[i4_cur_cu_qp]; |
1201 | | |
1202 | | /* Store the params for IPE pass */ |
1203 | 74.9k | ps_ctxt->i4_ol_satd_lambda = ps_ctxt->i4_ol_satd_lambda_qf_array[i4_cur_cu_qp]; |
1204 | 74.9k | ps_ctxt->i4_ol_sad_lambda = ps_ctxt->i4_ol_sad_lambda_qf_array[i4_cur_cu_qp]; |
1205 | 74.9k | } |
1206 | | |
1207 | | /*! |
1208 | | ****************************************************************************** |
1209 | | * \if Function name : ihevce_get_frame_lambda_prms \endif |
1210 | | * |
1211 | | * \brief |
1212 | | * Function whihc calculates the Lambda params for current picture |
1213 | | * |
1214 | | * \param[in] ps_enc_ctxt : encoder ctxt pointer |
1215 | | * \param[in] ps_cur_pic_ctxt : current pic ctxt |
1216 | | * \param[in] i4_cur_frame_qp : current pic QP |
1217 | | * \param[in] first_field : is first field flag |
1218 | | * \param[in] i4_temporal_lyr_id : Current picture layer id |
1219 | | * |
1220 | | * \return |
1221 | | * None |
1222 | | * |
1223 | | * \author |
1224 | | * Ittiam |
1225 | | * |
1226 | | ***************************************************************************** |
1227 | | */ |
1228 | | void ihevce_populate_ipe_ol_cu_lambda_prms( |
1229 | | void *pv_ctxt, |
1230 | | frm_lambda_ctxt_t *ps_frm_lamda, |
1231 | | WORD32 i4_slice_type, |
1232 | | WORD32 i4_temporal_lyr_id, |
1233 | | WORD32 i4_lambda_type) |
1234 | 123k | { |
1235 | 123k | WORD32 i4_curr_cu_qp; |
1236 | 123k | double lambda_modifier; |
1237 | 123k | double lambda_uv_modifier; |
1238 | 123k | double lambda; |
1239 | 123k | double lambda_uv; |
1240 | | |
1241 | 123k | ihevce_ipe_ctxt_t *ps_ctxt = (ihevce_ipe_ctxt_t *)pv_ctxt; |
1242 | | |
1243 | 123k | WORD32 i4_qp_bd_offset = 6 * (ps_ctxt->u1_bit_depth - 8); |
1244 | | |
1245 | 123k | for(i4_curr_cu_qp = |
1246 | 123k | ps_ctxt->ps_rc_quant_ctxt->i2_min_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset; |
1247 | 6.42M | i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; |
1248 | 6.30M | i4_curr_cu_qp++) |
1249 | 6.30M | { |
1250 | 6.30M | WORD32 chroma_qp = i4_curr_cu_qp; |
1251 | | |
1252 | 6.30M | if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id)) |
1253 | 720k | { |
1254 | 720k | lambda_modifier = ps_frm_lamda->lambda_modifier * |
1255 | 720k | CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00); |
1256 | 720k | lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier * |
1257 | 720k | CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00); |
1258 | 720k | } |
1259 | 5.58M | else |
1260 | 5.58M | { |
1261 | 5.58M | lambda_modifier = ps_frm_lamda->lambda_modifier; |
1262 | 5.58M | lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier; |
1263 | 5.58M | } |
1264 | 6.30M | if(ps_ctxt->i4_use_const_lamda_modifier) |
1265 | 0 | { |
1266 | 0 | if(ISLICE == i4_slice_type) |
1267 | 0 | { |
1268 | 0 | lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
1269 | 0 | lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
1270 | 0 | } |
1271 | 0 | else |
1272 | 0 | { |
1273 | 0 | lambda_modifier = CONST_LAMDA_MOD_VAL; |
1274 | 0 | lambda_uv_modifier = CONST_LAMDA_MOD_VAL; |
1275 | 0 | } |
1276 | 0 | } |
1277 | | |
1278 | 6.30M | switch(i4_lambda_type) |
1279 | 6.30M | { |
1280 | 6.30M | case 0: |
1281 | 6.30M | { |
1282 | 6.30M | i4_qp_bd_offset = 0; |
1283 | | |
1284 | 6.30M | lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bd_offset - 12)) / 3.0)); |
1285 | | |
1286 | 6.30M | lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bd_offset - 12)) / 3.0)); |
1287 | | |
1288 | 6.30M | lambda *= lambda_modifier; |
1289 | 6.30M | lambda_uv *= lambda_uv_modifier; |
1290 | 6.30M | if(ps_ctxt->i4_use_const_lamda_modifier) |
1291 | 0 | { |
1292 | 0 | ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] = |
1293 | 0 | (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT)); |
1294 | |
|
1295 | 0 | ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] = |
1296 | 0 | (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT)); |
1297 | 0 | } |
1298 | 6.30M | else |
1299 | 6.30M | { |
1300 | 6.30M | ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] = |
1301 | 6.30M | (WORD32)((sqrt(lambda) / 2) * (1 << LAMBDA_Q_SHIFT)); |
1302 | | |
1303 | 6.30M | ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] = |
1304 | 6.30M | (WORD32)((sqrt(lambda * 1.9) / 2) * (1 << LAMBDA_Q_SHIFT)); |
1305 | 6.30M | } |
1306 | | |
1307 | 6.30M | ps_ctxt->i4_ol_sad_type2_lambda_qf_array[i4_curr_cu_qp] = |
1308 | 6.30M | ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp]; |
1309 | | |
1310 | 6.30M | ps_ctxt->i4_ol_satd_type2_lambda_qf_array[i4_curr_cu_qp] = |
1311 | 6.30M | ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp]; |
1312 | | |
1313 | 6.30M | break; |
1314 | 0 | } |
1315 | 0 | case 1: |
1316 | 0 | { |
1317 | 0 | ASSERT(0); /* should not enter the path for IPE*/ |
1318 | 0 | lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bd_offset - 12)) / 3.0)); |
1319 | |
|
1320 | 0 | lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bd_offset - 12)) / 3.0)); |
1321 | |
|
1322 | 0 | lambda *= lambda_modifier; |
1323 | 0 | lambda_uv *= lambda_uv_modifier; |
1324 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
1325 | 0 | { |
1326 | 0 | ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] = |
1327 | 0 | (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT)); |
1328 | |
|
1329 | 0 | ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] = |
1330 | 0 | (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT)); |
1331 | 0 | } |
1332 | 0 | else |
1333 | 0 | { |
1334 | 0 | ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] = |
1335 | 0 | (WORD32)((sqrt(lambda) / 2) * (1 << LAMBDA_Q_SHIFT)); |
1336 | |
|
1337 | 0 | ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] = |
1338 | 0 | (WORD32)((sqrt(lambda * 1.9) / 2) * (1 << LAMBDA_Q_SHIFT)); |
1339 | 0 | } |
1340 | |
|
1341 | 0 | ps_ctxt->i4_ol_sad_type2_lambda_qf_array[i4_curr_cu_qp] = |
1342 | 0 | ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp]; |
1343 | |
|
1344 | 0 | ps_ctxt->i4_ol_satd_type2_lambda_qf_array[i4_curr_cu_qp] = |
1345 | 0 | ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp]; |
1346 | |
|
1347 | 0 | break; |
1348 | 0 | } |
1349 | 0 | case 2: |
1350 | 0 | { |
1351 | 0 | ASSERT(0); /* should not enter the path for IPE*/ |
1352 | 0 | lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bd_offset - 12)) / 3.0)); |
1353 | |
|
1354 | 0 | lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bd_offset - 12)) / 3.0)); |
1355 | |
|
1356 | 0 | lambda *= lambda_modifier; |
1357 | 0 | lambda_uv *= lambda_uv_modifier; |
1358 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
1359 | 0 | { |
1360 | 0 | ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] = |
1361 | 0 | (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT)); |
1362 | |
|
1363 | 0 | ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] = |
1364 | 0 | (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT)); |
1365 | 0 | } |
1366 | 0 | else |
1367 | 0 | { |
1368 | 0 | ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] = |
1369 | 0 | (WORD32)((sqrt(lambda) / 2) * (1 << LAMBDA_Q_SHIFT)); |
1370 | |
|
1371 | 0 | ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] = |
1372 | 0 | (WORD32)((sqrt(lambda * 1.9) / 2) * (1 << LAMBDA_Q_SHIFT)); |
1373 | 0 | } |
1374 | 0 | i4_qp_bd_offset = 0; |
1375 | |
|
1376 | 0 | lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bd_offset - 12)) / 3.0)); |
1377 | |
|
1378 | 0 | lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bd_offset - 12)) / 3.0)); |
1379 | |
|
1380 | 0 | lambda *= lambda_modifier; |
1381 | 0 | lambda_uv *= lambda_uv_modifier; |
1382 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
1383 | 0 | { |
1384 | 0 | ps_ctxt->i4_ol_sad_type2_lambda_qf_array[i4_curr_cu_qp] = |
1385 | 0 | (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT)); |
1386 | |
|
1387 | 0 | ps_ctxt->i4_ol_satd_type2_lambda_qf_array[i4_curr_cu_qp] = |
1388 | 0 | (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT)); |
1389 | 0 | } |
1390 | 0 | else |
1391 | 0 | { |
1392 | 0 | ps_ctxt->i4_ol_sad_type2_lambda_qf_array[i4_curr_cu_qp] = |
1393 | 0 | (WORD32)((sqrt(lambda) / 2) * (1 << LAMBDA_Q_SHIFT)); |
1394 | |
|
1395 | 0 | ps_ctxt->i4_ol_satd_type2_lambda_qf_array[i4_curr_cu_qp] = |
1396 | 0 | (WORD32)((sqrt(lambda * 1.9) / 2) * (1 << LAMBDA_Q_SHIFT)); |
1397 | 0 | } |
1398 | 0 | break; |
1399 | 0 | } |
1400 | 0 | default: |
1401 | 0 | { |
1402 | | /* Intended to be a barren wasteland! */ |
1403 | 0 | ASSERT(0); |
1404 | 0 | } |
1405 | 6.30M | } |
1406 | 6.30M | } |
1407 | 123k | } |
1408 | | |
1409 | | #define ME_COST_THRSHOLD 7 |
1410 | | /*! |
1411 | | ****************************************************************************** |
1412 | | * \if Function name : ihevce_get_frame_lambda_prms \endif |
1413 | | * |
1414 | | * \brief |
1415 | | * Function whihc calculates the Lambda params for current picture |
1416 | | * |
1417 | | * \param[in] ps_enc_ctxt : encoder ctxt pointer |
1418 | | * \param[in] ps_cur_pic_ctxt : current pic ctxt |
1419 | | * \param[in] i4_cur_frame_qp : current pic QP |
1420 | | * \param[in] first_field : is first field flag |
1421 | | * \param[in] i4_temporal_lyr_id : Current picture layer id |
1422 | | * |
1423 | | * \return |
1424 | | * None |
1425 | | * |
1426 | | * \author |
1427 | | * Ittiam |
1428 | | * |
1429 | | ***************************************************************************** |
1430 | | */ |
1431 | | void ihevce_populate_ipe_frame_init( |
1432 | | void *pv_ctxt, |
1433 | | ihevce_static_cfg_params_t *ps_stat_prms, |
1434 | | WORD32 i4_curr_frm_qp, |
1435 | | WORD32 i4_slice_type, |
1436 | | WORD32 i4_thrd_id, |
1437 | | pre_enc_me_ctxt_t *ps_curr_out, |
1438 | | WORD8 i1_cu_qp_delta_enabled_flag, |
1439 | | rc_quant_t *ps_rc_quant_ctxt, |
1440 | | WORD32 i4_quality_preset, |
1441 | | WORD32 i4_temporal_lyr_id, |
1442 | | ihevce_lap_output_params_t *ps_lap_out) |
1443 | 123k | { |
1444 | 123k | ihevce_ipe_master_ctxt_t *ps_master_ctxt = (ihevce_ipe_master_ctxt_t *)pv_ctxt; |
1445 | 123k | WORD32 i4_i; |
1446 | 123k | WORD32 ai4_mod_factor_num[2]; |
1447 | | |
1448 | 123k | ihevce_ipe_ctxt_t *ps_ctxt = ps_master_ctxt->aps_ipe_thrd_ctxt[i4_thrd_id]; |
1449 | 123k | ps_ctxt->i4_hevc_qp = i4_curr_frm_qp; |
1450 | 123k | ps_ctxt->i4_quality_preset = i4_quality_preset; |
1451 | 123k | ps_ctxt->i4_temporal_lyr_id = i4_temporal_lyr_id; |
1452 | 123k | ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt; |
1453 | 123k | ps_ctxt->i4_qscale = |
1454 | 123k | ps_ctxt->ps_rc_quant_ctxt |
1455 | 123k | ->pi4_qp_to_qscale[i4_curr_frm_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
1456 | | |
1457 | 123k | ps_ctxt->i4_frm_qp = i4_curr_frm_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset; |
1458 | 123k | ps_ctxt->i4_slice_type = i4_slice_type; //EIID |
1459 | 123k | ps_ctxt->i4_temporal_layer = ps_lap_out->i4_temporal_lyr_id; |
1460 | 123k | ps_ctxt->i4_is_ref_pic = ps_lap_out->i4_is_ref_pic; |
1461 | 123k | ps_ctxt->u4_num_16x16_skips_at_L0_IPE = 0; |
1462 | 123k | ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER; |
1463 | 123k | ps_ctxt->i4_use_const_lamda_modifier = |
1464 | 123k | ps_ctxt->i4_use_const_lamda_modifier || |
1465 | 123k | ((ps_stat_prms->s_coding_tools_prms.i4_vqet & |
1466 | 123k | (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) && |
1467 | 123k | ((ps_stat_prms->s_coding_tools_prms.i4_vqet & |
1468 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) || |
1469 | 0 | (ps_stat_prms->s_coding_tools_prms.i4_vqet & |
1470 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) || |
1471 | 0 | (ps_stat_prms->s_coding_tools_prms.i4_vqet & |
1472 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) || |
1473 | 0 | (ps_stat_prms->s_coding_tools_prms.i4_vqet & |
1474 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3)))); |
1475 | 123k | { |
1476 | 123k | ps_ctxt->f_i_pic_lamda_modifier = ps_lap_out->f_i_pic_lamda_modifier; |
1477 | 123k | } |
1478 | 123k | #if POW_OPT |
1479 | 370k | for(i4_i = 0; i4_i < 2; i4_i++) |
1480 | 247k | { |
1481 | 247k | ps_ctxt->ld_curr_frame_8x8_log_avg[i4_i] = ps_curr_out->ld_curr_frame_8x8_log_avg[i4_i]; |
1482 | 247k | ps_ctxt->ld_curr_frame_16x16_log_avg[i4_i] = ps_curr_out->ld_curr_frame_16x16_log_avg[i4_i]; |
1483 | 247k | ps_ctxt->ld_curr_frame_32x32_log_avg[i4_i] = ps_curr_out->ld_curr_frame_32x32_log_avg[i4_i]; |
1484 | 247k | } |
1485 | | |
1486 | 123k | ps_ctxt->ld_curr_frame_16x16_log_avg[2] = ps_curr_out->ld_curr_frame_16x16_log_avg[2]; |
1487 | 123k | ps_ctxt->ld_curr_frame_32x32_log_avg[2] = ps_curr_out->ld_curr_frame_32x32_log_avg[2]; |
1488 | 123k | ps_ctxt->i8_curr_frame_avg_mean_act = ps_curr_out->i8_curr_frame_avg_mean_act; |
1489 | | #else |
1490 | | for(i4_i = 0; i4_i < 2; i4_i++) |
1491 | | { |
1492 | | ps_ctxt->i8_curr_frame_8x8_avg_act[i4_i] = ps_curr_out->i8_curr_frame_8x8_avg_act[i4_i]; |
1493 | | ps_ctxt->i8_curr_frame_16x16_avg_act[i4_i] = ps_curr_out->i8_curr_frame_16x16_avg_act[i4_i]; |
1494 | | ps_ctxt->i8_curr_frame_32x32_avg_act[i4_i] = ps_curr_out->i8_curr_frame_32x32_avg_act[i4_i]; |
1495 | | } |
1496 | | |
1497 | | ps_ctxt->i8_curr_frame_16x16_avg_act[2] = ps_curr_out->i8_curr_frame_16x16_avg_act[2]; |
1498 | | ps_ctxt->i8_curr_frame_32x32_avg_act[2] = ps_curr_out->i8_curr_frame_32x32_avg_act[2]; |
1499 | | #endif |
1500 | | |
1501 | 123k | ps_ctxt->pi2_trans_out = |
1502 | 123k | (WORD16 *)&ps_ctxt->au1_pred_samples[0]; //overlaying trans coeff memory with pred_samples |
1503 | 123k | ps_ctxt->pi2_trans_tmp = (WORD16 *)&ps_ctxt->au1_pred_samples[2048]; |
1504 | | |
1505 | | /*Mod factor NUM */ |
1506 | 123k | ps_ctxt->ai4_mod_factor_derived_by_variance[0] = |
1507 | 123k | ps_curr_out->ai4_mod_factor_derived_by_variance[0]; |
1508 | 123k | ps_ctxt->ai4_mod_factor_derived_by_variance[1] = |
1509 | 123k | ps_curr_out->ai4_mod_factor_derived_by_variance[1]; |
1510 | | |
1511 | 123k | ps_ctxt->f_strength = ps_curr_out->f_strength; |
1512 | | |
1513 | 123k | if(ps_stat_prms->s_coding_tools_prms.i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) |
1514 | 0 | { |
1515 | 0 | if(ps_stat_prms->s_coding_tools_prms.i4_vqet & |
1516 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) |
1517 | 0 | { |
1518 | 0 | ps_ctxt->i4_enable_noise_detection = 1; |
1519 | 0 | } |
1520 | 0 | else |
1521 | 0 | { |
1522 | 0 | ps_ctxt->i4_enable_noise_detection = 0; |
1523 | 0 | } |
1524 | 0 | } |
1525 | 123k | else |
1526 | 123k | { |
1527 | 123k | ps_ctxt->i4_enable_noise_detection = 0; |
1528 | 123k | } |
1529 | | |
1530 | 123k | { |
1531 | 123k | if(ISLICE == ps_ctxt->i4_slice_type) |
1532 | 42.6k | { |
1533 | 42.6k | ai4_mod_factor_num[0] = INTRA_QP_MOD_FACTOR_NUM; //16; |
1534 | 42.6k | ai4_mod_factor_num[1] = INTRA_QP_MOD_FACTOR_NUM; //16; |
1535 | 42.6k | } |
1536 | 80.9k | else |
1537 | 80.9k | { |
1538 | 80.9k | ai4_mod_factor_num[0] = INTER_QP_MOD_FACTOR_NUM; //4; |
1539 | 80.9k | ai4_mod_factor_num[1] = INTER_QP_MOD_FACTOR_NUM; //4; |
1540 | 80.9k | } |
1541 | | |
1542 | | #if ENABLE_QP_MOD_BASED_ON_SPATIAL_VARIANCE |
1543 | | for(i4_i = 0; i4_i < 2; i4_i++) |
1544 | | { |
1545 | | WORD32 mod_factor_num_val = |
1546 | | ps_ctxt->ai4_mod_factor_derived_by_variance[i4_i] * QP_MOD_FACTOR_DEN; |
1547 | | |
1548 | | ai4_mod_factor_num[i4_i] = CLIP3(mod_factor_num_val, 1, ai4_mod_factor_num[i4_i]); |
1549 | | ps_ctxt->ai4_mod_factor_derived_by_variance[i4_i] = ai4_mod_factor_num[i4_i]; |
1550 | | } |
1551 | | #else |
1552 | 370k | for(i4_i = 0; i4_i < 2; i4_i++) |
1553 | 247k | { |
1554 | 247k | ps_ctxt->ai4_mod_factor_derived_by_variance[i4_i] = ai4_mod_factor_num[i4_i]; |
1555 | 247k | } |
1556 | 123k | #endif |
1557 | 123k | } |
1558 | | |
1559 | 123k | ps_ctxt->u1_use_lambda_derived_from_min_8x8_act_in_ctb = MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && |
1560 | 123k | i1_cu_qp_delta_enabled_flag; |
1561 | | |
1562 | 123k | ps_ctxt->u1_use_satd = 1; |
1563 | 123k | ps_ctxt->u1_level_1_refine_on = 1; |
1564 | 123k | ps_ctxt->u1_disable_child_cu_decide = 0; |
1565 | | |
1566 | 123k | #if !OLD_XTREME_SPEED |
1567 | 123k | if(((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) || |
1568 | 123k | (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) && |
1569 | 123k | (ps_ctxt->i4_slice_type != ISLICE)) |
1570 | 24.4k | { |
1571 | 24.4k | ps_ctxt->u1_use_satd = 0; |
1572 | 24.4k | ps_ctxt->u1_level_1_refine_on = 1; |
1573 | 24.4k | ps_ctxt->u1_disable_child_cu_decide = 0; |
1574 | 24.4k | } |
1575 | | |
1576 | 123k | #endif |
1577 | | |
1578 | 123k | if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P4) && (ps_ctxt->i4_slice_type != ISLICE)) |
1579 | 6.92k | ps_ctxt->u1_use_satd = 0; |
1580 | 123k | if(ps_ctxt->i4_quality_preset > IHEVCE_QUALITY_P3) |
1581 | 64.4k | ps_ctxt->u1_use_satd = 0; |
1582 | 123k | } |