/src/libhevc/encoder/ihevce_enc_loop_utils.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*! |
22 | | ****************************************************************************** |
23 | | * \file ihevce_enc_loop_utils.c |
24 | | * |
25 | | * \brief |
26 | | * This file contains utility functions of Encode loop |
27 | | * |
28 | | * \date |
29 | | * 18/09/2012 |
30 | | * |
31 | | * \author |
32 | | * Ittiam |
33 | | * |
34 | | * |
35 | | * List of Functions |
36 | | * |
37 | | * |
38 | | ****************************************************************************** |
39 | | */ |
40 | | |
41 | | /*****************************************************************************/ |
42 | | /* File Includes */ |
43 | | /*****************************************************************************/ |
44 | | /* System include files */ |
45 | | #include <stdio.h> |
46 | | #include <string.h> |
47 | | #include <stdlib.h> |
48 | | #include <assert.h> |
49 | | #include <stdarg.h> |
50 | | #include <math.h> |
51 | | #include <limits.h> |
52 | | |
53 | | /* User include files */ |
54 | | #include "ihevc_typedefs.h" |
55 | | #include "itt_video_api.h" |
56 | | #include "ihevce_api.h" |
57 | | |
58 | | #include "rc_cntrl_param.h" |
59 | | #include "rc_frame_info_collector.h" |
60 | | #include "rc_look_ahead_params.h" |
61 | | |
62 | | #include "ihevc_defs.h" |
63 | | #include "ihevc_macros.h" |
64 | | #include "ihevc_debug.h" |
65 | | #include "ihevc_structs.h" |
66 | | #include "ihevc_platform_macros.h" |
67 | | #include "ihevc_deblk.h" |
68 | | #include "ihevc_itrans_recon.h" |
69 | | #include "ihevc_chroma_itrans_recon.h" |
70 | | #include "ihevc_chroma_intra_pred.h" |
71 | | #include "ihevc_intra_pred.h" |
72 | | #include "ihevc_inter_pred.h" |
73 | | #include "ihevc_mem_fns.h" |
74 | | #include "ihevc_padding.h" |
75 | | #include "ihevc_weighted_pred.h" |
76 | | #include "ihevc_sao.h" |
77 | | #include "ihevc_resi_trans.h" |
78 | | #include "ihevc_quant_iquant_ssd.h" |
79 | | #include "ihevc_cabac_tables.h" |
80 | | #include "ihevc_common_tables.h" |
81 | | |
82 | | #include "ihevce_defs.h" |
83 | | #include "ihevce_hle_interface.h" |
84 | | #include "ihevce_lap_enc_structs.h" |
85 | | #include "ihevce_multi_thrd_structs.h" |
86 | | #include "ihevce_multi_thrd_funcs.h" |
87 | | #include "ihevce_me_common_defs.h" |
88 | | #include "ihevce_had_satd.h" |
89 | | #include "ihevce_error_codes.h" |
90 | | #include "ihevce_bitstream.h" |
91 | | #include "ihevce_cabac.h" |
92 | | #include "ihevce_rdoq_macros.h" |
93 | | #include "ihevce_function_selector.h" |
94 | | #include "ihevce_enc_structs.h" |
95 | | #include "ihevce_entropy_structs.h" |
96 | | #include "ihevce_cmn_utils_instr_set_router.h" |
97 | | #include "ihevce_ipe_instr_set_router.h" |
98 | | #include "ihevce_decomp_pre_intra_structs.h" |
99 | | #include "ihevce_decomp_pre_intra_pass.h" |
100 | | #include "ihevce_enc_loop_structs.h" |
101 | | #include "ihevce_nbr_avail.h" |
102 | | #include "ihevce_enc_loop_utils.h" |
103 | | #include "ihevce_sub_pic_rc.h" |
104 | | #include "ihevce_global_tables.h" |
105 | | #include "ihevce_bs_compute_ctb.h" |
106 | | #include "ihevce_cabac_rdo.h" |
107 | | #include "ihevce_deblk.h" |
108 | | #include "ihevce_frame_process.h" |
109 | | #include "ihevce_rc_enc_structs.h" |
110 | | #include "hme_datatype.h" |
111 | | #include "hme_interface.h" |
112 | | #include "hme_common_defs.h" |
113 | | #include "hme_defs.h" |
114 | | #include "hme_common_utils.h" |
115 | | #include "ihevce_me_instr_set_router.h" |
116 | | #include "ihevce_enc_subpel_gen.h" |
117 | | #include "ihevce_inter_pred.h" |
118 | | #include "ihevce_mv_pred.h" |
119 | | #include "ihevce_mv_pred_merge.h" |
120 | | #include "ihevce_enc_loop_inter_mode_sifter.h" |
121 | | #include "ihevce_enc_cu_recursion.h" |
122 | | #include "ihevce_enc_loop_pass.h" |
123 | | #include "ihevce_common_utils.h" |
124 | | #include "ihevce_dep_mngr_interface.h" |
125 | | #include "ihevce_sao.h" |
126 | | #include "ihevce_tile_interface.h" |
127 | | #include "ihevce_profile.h" |
128 | | #include "ihevce_stasino_helpers.h" |
129 | | #include "ihevce_tu_tree_selector.h" |
130 | | |
131 | | /*****************************************************************************/ |
132 | | /* Globals */ |
133 | | /*****************************************************************************/ |
134 | | |
135 | | extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2]; |
136 | | extern const UWORD8 gu1_hevce_scan4x4[3][16]; |
137 | | extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16]; |
138 | | extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16]; |
139 | | extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16]; |
140 | | |
141 | | /*****************************************************************************/ |
142 | | /* Constant Macros */ |
143 | | /*****************************************************************************/ |
144 | | #define ENABLE_ZERO_CBF 1 |
145 | | #define DISABLE_RDOQ_INTRA 0 |
146 | | |
147 | | /*****************************************************************************/ |
148 | | /* Function Definitions */ |
149 | | /*****************************************************************************/ |
150 | | void *ihevce_tu_tree_update( |
151 | | tu_prms_t *ps_tu_prms, |
152 | | WORD32 *pnum_tu_in_cu, |
153 | | WORD32 depth, |
154 | | WORD32 tu_split_flag, |
155 | | WORD32 tu_early_cbf, |
156 | | WORD32 i4_x_off, |
157 | | WORD32 i4_y_off) |
158 | 0 | { |
159 | | //WORD32 tu_split_flag = p_tu_split_flag[0]; |
160 | 0 | WORD32 p_tu_split_flag[4]; |
161 | 0 | WORD32 p_tu_early_cbf[4]; |
162 | |
|
163 | 0 | WORD32 tu_size = ps_tu_prms->u1_tu_size; |
164 | |
|
165 | 0 | if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1)) |
166 | 0 | { |
167 | 0 | if((tu_size >> depth) == 32) |
168 | 0 | { |
169 | | /* Get the individual TU split flags */ |
170 | 0 | p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F; |
171 | 0 | p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F; |
172 | 0 | p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F; |
173 | 0 | p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F; |
174 | | |
175 | | /* Get the early CBF flags */ |
176 | 0 | p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F; |
177 | 0 | p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F; |
178 | 0 | p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F; |
179 | 0 | p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F; |
180 | 0 | } |
181 | 0 | else |
182 | 0 | { |
183 | | /* Get the individual TU split flags */ |
184 | 0 | p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1); |
185 | 0 | p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1); |
186 | 0 | p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1); |
187 | 0 | p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1); |
188 | | |
189 | | /* Get the early CBF flags */ |
190 | 0 | p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1); |
191 | 0 | p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1); |
192 | 0 | p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1); |
193 | 0 | p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1); |
194 | 0 | } |
195 | |
|
196 | 0 | ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
197 | 0 | ps_tu_prms, |
198 | 0 | pnum_tu_in_cu, |
199 | 0 | depth + 1, |
200 | 0 | p_tu_split_flag[0], |
201 | 0 | p_tu_early_cbf[0], |
202 | 0 | i4_x_off, |
203 | 0 | i4_y_off); |
204 | |
|
205 | 0 | ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
206 | 0 | ps_tu_prms, |
207 | 0 | pnum_tu_in_cu, |
208 | 0 | depth + 1, |
209 | 0 | p_tu_split_flag[1], |
210 | 0 | p_tu_early_cbf[1], |
211 | 0 | (i4_x_off + (tu_size >> (depth + 1))), |
212 | 0 | i4_y_off); |
213 | |
|
214 | 0 | ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
215 | 0 | ps_tu_prms, |
216 | 0 | pnum_tu_in_cu, |
217 | 0 | depth + 1, |
218 | 0 | p_tu_split_flag[2], |
219 | 0 | p_tu_early_cbf[2], |
220 | 0 | i4_x_off, |
221 | 0 | (i4_y_off + (tu_size >> (depth + 1)))); |
222 | |
|
223 | 0 | ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
224 | 0 | ps_tu_prms, |
225 | 0 | pnum_tu_in_cu, |
226 | 0 | depth + 1, |
227 | 0 | p_tu_split_flag[3], |
228 | 0 | p_tu_early_cbf[3], |
229 | 0 | (i4_x_off + (tu_size >> (depth + 1))), |
230 | 0 | (i4_y_off + (tu_size >> (depth + 1)))); |
231 | 0 | } |
232 | 0 | else |
233 | 0 | { |
234 | 0 | if(tu_split_flag & 0x1) |
235 | 0 | { |
236 | | /* This piece of code will be entered for the 8x8, if it is split |
237 | | Update the 4 child TU's accordingly. */ |
238 | |
|
239 | 0 | (*pnum_tu_in_cu) += 4; |
240 | | |
241 | | /* TL TU update */ |
242 | 0 | ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); |
243 | |
|
244 | 0 | ps_tu_prms->u1_x_off = i4_x_off; |
245 | |
|
246 | 0 | ps_tu_prms->u1_y_off = i4_y_off; |
247 | | |
248 | | /* Early CBF is not done for 4x4 transforms */ |
249 | 0 | ps_tu_prms->i4_early_cbf = 1; |
250 | |
|
251 | 0 | ps_tu_prms++; |
252 | | |
253 | | /* TR TU update */ |
254 | 0 | ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); |
255 | |
|
256 | 0 | ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1)); |
257 | |
|
258 | 0 | ps_tu_prms->u1_y_off = i4_y_off; |
259 | | |
260 | | /* Early CBF is not done for 4x4 transforms */ |
261 | 0 | ps_tu_prms->i4_early_cbf = 1; |
262 | |
|
263 | 0 | ps_tu_prms++; |
264 | | |
265 | | /* BL TU update */ |
266 | 0 | ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); |
267 | |
|
268 | 0 | ps_tu_prms->u1_x_off = i4_x_off; |
269 | |
|
270 | 0 | ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1)); |
271 | | |
272 | | /* Early CBF is not done for 4x4 transforms */ |
273 | 0 | ps_tu_prms->i4_early_cbf = 1; |
274 | |
|
275 | 0 | ps_tu_prms++; |
276 | | |
277 | | /* BR TU update */ |
278 | 0 | ps_tu_prms->u1_tu_size = tu_size >> (depth + 1); |
279 | |
|
280 | 0 | ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1)); |
281 | |
|
282 | 0 | ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1)); |
283 | | |
284 | | /* Early CBF is not done for 4x4 transforms */ |
285 | 0 | ps_tu_prms->i4_early_cbf = 1; |
286 | 0 | } |
287 | 0 | else |
288 | 0 | { |
289 | | /* Update the TU params */ |
290 | 0 | ps_tu_prms->u1_tu_size = tu_size >> depth; |
291 | |
|
292 | 0 | ps_tu_prms->u1_x_off = i4_x_off; |
293 | |
|
294 | 0 | ps_tu_prms->u1_y_off = i4_y_off; |
295 | |
|
296 | 0 | (*pnum_tu_in_cu)++; |
297 | | |
298 | | /* Early CBF update for current TU */ |
299 | 0 | ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1; |
300 | 0 | } |
301 | 0 | if((*pnum_tu_in_cu) < MAX_TU_IN_CTB) |
302 | 0 | { |
303 | 0 | ps_tu_prms++; |
304 | |
|
305 | 0 | ps_tu_prms->u1_tu_size = tu_size; |
306 | 0 | } |
307 | 0 | } |
308 | |
|
309 | 0 | return ps_tu_prms; |
310 | 0 | } |
311 | | |
312 | | /*! |
313 | | ****************************************************************************** |
314 | | * \if Function name : ihevce_compute_quant_rel_param \endif |
315 | | * |
316 | | * \brief |
317 | | * This function updates quantization related parameters like qp_mod_6 etc in |
318 | | * context according to new qp |
319 | | * |
320 | | * \date |
321 | | * 08/01/2013 |
322 | | * |
323 | | * \author |
324 | | * Ittiam |
325 | | * |
326 | | * \return |
327 | | * |
328 | | * List of Functions |
329 | | * |
330 | | * |
331 | | ****************************************************************************** |
332 | | */ |
333 | | void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp) |
334 | 0 | { |
335 | 0 | WORD32 i4_div_factor; |
336 | |
|
337 | 0 | ps_ctxt->i4_chrm_cu_qp = |
338 | 0 | (ps_ctxt->u1_chroma_array_type == 2) |
339 | 0 | ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51) |
340 | 0 | : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET]; |
341 | 0 | ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; |
342 | 0 | i4_div_factor = (i1_cu_qp + 3) / 6; |
343 | 0 | i4_div_factor = CLIP3(i4_div_factor, 3, 6); |
344 | 0 | ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; |
345 | 0 | ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6; |
346 | 0 | ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6; |
347 | |
|
348 | 0 | #define INTER_RND_QP_BY_6 |
349 | 0 | #ifdef INTER_RND_QP_BY_6 |
350 | | /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */ |
351 | 0 | { |
352 | 0 | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = |
353 | 0 | (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f); |
354 | 0 | } |
355 | | #else |
356 | | /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */ |
357 | | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3; |
358 | | #endif |
359 | |
|
360 | 0 | if(ISLICE == ps_ctxt->i1_slice_type) |
361 | 0 | { |
362 | | /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */ |
363 | 0 | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = |
364 | 0 | (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f); |
365 | 0 | } |
366 | 0 | else |
367 | 0 | { |
368 | 0 | if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/ |
369 | 0 | { |
370 | | /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */ |
371 | 0 | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = |
372 | 0 | (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f); |
373 | 0 | } |
374 | 0 | else |
375 | 0 | { |
376 | | /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */ |
377 | 0 | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = |
378 | 0 | ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER]; |
379 | | /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */ |
380 | 0 | } |
381 | 0 | } |
382 | 0 | } |
383 | | |
384 | | /*! |
385 | | ****************************************************************************** |
386 | | * \if Function name : ihevce_populate_cl_cu_lambda_prms \endif |
387 | | * |
388 | | * \brief |
389 | | * Function whihc calculates the Lambda params for current picture |
390 | | * |
391 | | * \param[in] ps_enc_ctxt : encoder ctxt pointer |
392 | | * \param[in] ps_cur_pic_ctxt : current pic ctxt |
393 | | * \param[in] i4_cur_frame_qp : current pic QP |
394 | | * \param[in] first_field : is first field flag |
395 | | * \param[in] i4_temporal_lyr_id : Current picture layer id |
396 | | * |
397 | | * \return |
398 | | * None |
399 | | * |
400 | | * \author |
401 | | * Ittiam |
402 | | * |
403 | | ***************************************************************************** |
404 | | */ |
405 | | void ihevce_populate_cl_cu_lambda_prms( |
406 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
407 | | frm_lambda_ctxt_t *ps_frm_lamda, |
408 | | WORD32 i4_slice_type, |
409 | | WORD32 i4_temporal_lyr_id, |
410 | | WORD32 i4_lambda_type) |
411 | 0 | { |
412 | 0 | WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset; |
413 | 0 | double lambda_modifier; |
414 | 0 | double lambda_uv_modifier; |
415 | 0 | double lambda; |
416 | 0 | double lambda_uv; |
417 | |
|
418 | 0 | WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8); |
419 | | |
420 | | /*Populate lamda modifier */ |
421 | 0 | ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier; |
422 | 0 | ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier; |
423 | 0 | ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id; |
424 | |
|
425 | 0 | for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; |
426 | 0 | i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; |
427 | 0 | i4_curr_cu_qp++) |
428 | 0 | { |
429 | 0 | WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV) |
430 | 0 | ? MIN(i4_curr_cu_qp, 51) |
431 | 0 | : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET]; |
432 | |
|
433 | 0 | i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset; |
434 | |
|
435 | 0 | lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); |
436 | 0 | lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); |
437 | |
|
438 | 0 | if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id)) |
439 | 0 | { |
440 | 0 | lambda_modifier = ps_frm_lamda->lambda_modifier * |
441 | 0 | CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00); |
442 | 0 | lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier * |
443 | 0 | CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00); |
444 | 0 | } |
445 | 0 | else |
446 | 0 | { |
447 | 0 | lambda_modifier = ps_frm_lamda->lambda_modifier; |
448 | 0 | lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier; |
449 | 0 | } |
450 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
451 | 0 | { |
452 | 0 | if(ISLICE == ps_ctxt->i1_slice_type) |
453 | 0 | { |
454 | 0 | lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
455 | 0 | lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
456 | 0 | } |
457 | 0 | else |
458 | 0 | { |
459 | 0 | lambda_modifier = CONST_LAMDA_MOD_VAL; |
460 | 0 | lambda_uv_modifier = CONST_LAMDA_MOD_VAL; |
461 | 0 | } |
462 | 0 | } |
463 | 0 | switch(i4_lambda_type) |
464 | 0 | { |
465 | 0 | case 0: |
466 | 0 | { |
467 | 0 | i4_qp_bdoffset = 0; |
468 | |
|
469 | 0 | lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); |
470 | 0 | lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); |
471 | |
|
472 | 0 | lambda *= lambda_modifier; |
473 | 0 | lambda_uv *= lambda_uv_modifier; |
474 | |
|
475 | 0 | ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = |
476 | 0 | (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); |
477 | |
|
478 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = |
479 | 0 | (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); |
480 | |
|
481 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
482 | 0 | (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); |
483 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
484 | 0 | { |
485 | 0 | ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
486 | 0 | (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
487 | 0 | } |
488 | 0 | else |
489 | 0 | { |
490 | 0 | ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
491 | 0 | (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); |
492 | 0 | } |
493 | |
|
494 | 0 | ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = |
495 | 0 | (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
496 | |
|
497 | 0 | ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = |
498 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset]; |
499 | |
|
500 | 0 | ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
501 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset]; |
502 | |
|
503 | 0 | ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = |
504 | 0 | ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset]; |
505 | |
|
506 | 0 | ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = |
507 | 0 | ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset]; |
508 | |
|
509 | 0 | break; |
510 | 0 | } |
511 | 0 | case 1: |
512 | 0 | { |
513 | 0 | lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); |
514 | 0 | lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); |
515 | |
|
516 | 0 | lambda *= lambda_modifier; |
517 | 0 | lambda_uv *= lambda_uv_modifier; |
518 | |
|
519 | 0 | ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = |
520 | 0 | (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); |
521 | |
|
522 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = |
523 | 0 | (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); |
524 | |
|
525 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
526 | 0 | (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); |
527 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
528 | 0 | { |
529 | 0 | ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
530 | 0 | (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
531 | 0 | } |
532 | 0 | else |
533 | 0 | { |
534 | 0 | ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
535 | 0 | (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); |
536 | 0 | } |
537 | 0 | ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = |
538 | 0 | (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
539 | |
|
540 | 0 | ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = |
541 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset]; |
542 | |
|
543 | 0 | ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
544 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset]; |
545 | |
|
546 | 0 | ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = |
547 | 0 | ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset]; |
548 | |
|
549 | 0 | ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = |
550 | 0 | ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset]; |
551 | |
|
552 | 0 | break; |
553 | 0 | } |
554 | 0 | case 2: |
555 | 0 | { |
556 | 0 | lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0)); |
557 | 0 | lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0)); |
558 | |
|
559 | 0 | lambda *= lambda_modifier; |
560 | 0 | lambda_uv *= lambda_uv_modifier; |
561 | |
|
562 | 0 | ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = |
563 | 0 | (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); |
564 | |
|
565 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] = |
566 | 0 | (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); |
567 | |
|
568 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
569 | 0 | (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); |
570 | |
|
571 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
572 | 0 | { |
573 | 0 | ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
574 | 0 | (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
575 | 0 | } |
576 | 0 | else |
577 | 0 | { |
578 | 0 | ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] = |
579 | 0 | (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); |
580 | 0 | } |
581 | 0 | ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] = |
582 | 0 | (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
583 | | |
584 | | /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/ |
585 | 0 | lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0)); |
586 | 0 | lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0)); |
587 | |
|
588 | 0 | lambda *= lambda_modifier; |
589 | 0 | lambda_uv *= lambda_uv_modifier; |
590 | |
|
591 | 0 | ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] = |
592 | 0 | (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)); |
593 | |
|
594 | 0 | ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] = |
595 | 0 | (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT)); |
596 | |
|
597 | 0 | ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] = |
598 | 0 | (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT)); |
599 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
600 | 0 | { |
601 | 0 | ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = |
602 | 0 | (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
603 | 0 | } |
604 | 0 | else |
605 | 0 | { |
606 | 0 | ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] = |
607 | 0 | (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT)); |
608 | 0 | } |
609 | |
|
610 | 0 | ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] = |
611 | 0 | (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)); |
612 | |
|
613 | 0 | break; |
614 | 0 | } |
615 | 0 | default: |
616 | 0 | { |
617 | | /* Intended to be a barren wasteland! */ |
618 | 0 | ASSERT(0); |
619 | 0 | } |
620 | 0 | } |
621 | 0 | } |
622 | 0 | } |
623 | | |
624 | | /*! |
625 | | ****************************************************************************** |
626 | | * \if Function name : ihevce_get_cl_cu_lambda_prms \endif |
627 | | * |
628 | | * \brief |
629 | | * Function whihc calculates the Lambda params for current picture |
630 | | * |
631 | | * \param[in] ps_enc_ctxt : encoder ctxt pointer |
632 | | * \param[in] ps_cur_pic_ctxt : current pic ctxt |
633 | | * \param[in] i4_cur_frame_qp : current pic QP |
634 | | * \param[in] first_field : is first field flag |
635 | | * \param[in] i4_temporal_lyr_id : Current picture layer id |
636 | | * |
637 | | * \return |
638 | | * None |
639 | | * |
640 | | * \author |
641 | | * Ittiam |
642 | | * |
643 | | ***************************************************************************** |
644 | | */ |
645 | | void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp) |
646 | 0 | { |
647 | 0 | WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2) |
648 | 0 | ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51) |
649 | 0 | : gai1_ihevc_chroma_qp_scale |
650 | 0 | [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET]; |
651 | | |
652 | | /* closed loop ssd lambda is same as final lambda */ |
653 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = |
654 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
655 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
656 | 0 | ps_ctxt |
657 | 0 | ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
658 | 0 | ps_ctxt->u4_chroma_cost_weighing_factor = |
659 | 0 | ps_ctxt->au4_chroma_cost_weighing_factor_array |
660 | 0 | [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
661 | | /* --- Initialized the lambda for SATD computations --- */ |
662 | | /* --- 0.95 is the multiplication factor as per HM --- */ |
663 | | /* --- 1.9 is the multiplication factor for Hadamard Transform --- */ |
664 | 0 | ps_ctxt->i4_satd_lamda = |
665 | 0 | ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
666 | 0 | ps_ctxt->i4_sad_lamda = |
667 | 0 | ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]; |
668 | 0 | } |
669 | | |
670 | | /*! |
671 | | ****************************************************************************** |
672 | | * \if Function name : ihevce_update_pred_qp \endif |
673 | | * |
674 | | * \brief |
675 | | * Computes pred qp for the given CU |
676 | | * |
677 | | * \param[in] |
678 | | * |
679 | | * \return |
680 | | * |
681 | | * |
682 | | * \author |
683 | | * Ittiam |
684 | | * |
685 | | ***************************************************************************** |
686 | | */ |
687 | | void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y) |
688 | 0 | { |
689 | 0 | WORD32 i4_pred_qp = 0x7FFFFFFF; |
690 | 0 | WORD32 i4_top, i4_left; |
691 | 0 | if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/ |
692 | 0 | { |
693 | 0 | i4_pred_qp = ps_ctxt->i4_prev_QP; |
694 | 0 | } |
695 | 0 | else |
696 | 0 | { |
697 | 0 | if(cu_pos_y == 0) /*CTB boundary*/ |
698 | 0 | { |
699 | 0 | i4_top = ps_ctxt->i4_prev_QP; |
700 | 0 | } |
701 | 0 | else /*within CTB*/ |
702 | 0 | { |
703 | 0 | i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)]; |
704 | 0 | } |
705 | 0 | if(cu_pos_x == 0) /*CTB boundary*/ |
706 | 0 | { |
707 | 0 | i4_left = ps_ctxt->i4_prev_QP; |
708 | 0 | } |
709 | 0 | else /*within CTB*/ |
710 | 0 | { |
711 | 0 | i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)]; |
712 | 0 | } |
713 | 0 | i4_pred_qp = (i4_left + i4_top + 1) >> 1; |
714 | 0 | } |
715 | 0 | ps_ctxt->i4_pred_qp = i4_pred_qp; |
716 | 0 | return; |
717 | 0 | } |
718 | | /*! |
719 | | ****************************************************************************** |
720 | | * \if Function name : ihevce_compute_cu_level_QP \endif |
721 | | * |
722 | | * \brief |
723 | | * Computes cu level QP with Traqo,Spatial Mod and In-frame RC |
724 | | * |
725 | | * \param[in] |
726 | | * |
727 | | * \return |
728 | | * |
729 | | * |
730 | | * \author |
731 | | * Ittiam |
732 | | * |
733 | | ***************************************************************************** |
734 | | */ |
735 | | void ihevce_compute_cu_level_QP( |
736 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
737 | | WORD32 i4_activity_for_qp, |
738 | | WORD32 i4_activity_for_lamda, |
739 | | WORD32 i4_reduce_qp) |
740 | 0 | { |
741 | | /*modify quant related param in ctxt based on current cu qp*/ |
742 | 0 | WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp; |
743 | 0 | WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset; |
744 | |
|
745 | 0 | WORD32 i4_max_qp_allowed; |
746 | 0 | WORD32 i4_min_qp_allowed; |
747 | 0 | WORD32 i4_pred_qp; |
748 | |
|
749 | 0 | i4_pred_qp = ps_ctxt->i4_pred_qp; |
750 | |
|
751 | 0 | if(ps_ctxt->i4_sub_pic_level_rc) |
752 | 0 | { |
753 | 0 | i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2))); |
754 | 0 | i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2))); |
755 | 0 | } |
756 | 0 | else |
757 | 0 | { |
758 | 0 | i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4))); |
759 | 0 | i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4))); |
760 | 0 | } |
761 | 0 | if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) |
762 | 0 | return; |
763 | | |
764 | | #if LAMDA_BASED_ON_QUANT |
765 | | i4_activity_for_lamda = i4_activity_for_qp; |
766 | | #endif |
767 | | |
768 | 0 | if(i4_activity_for_qp != -1) |
769 | 0 | { |
770 | 0 | cu_qp = (ps_ctxt->ps_rc_quant_ctxt |
771 | 0 | ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]); |
772 | 0 | if(ps_ctxt->i4_qp_mod) |
773 | 0 | { |
774 | | /*Recompute the Qp as per enc thread's frame level Qp*/ |
775 | 0 | ASSERT(i4_activity_for_qp > 0); |
776 | 0 | cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >> |
777 | 0 | QP_LEVEL_MOD_ACT_FACTOR; |
778 | 0 | } |
779 | | |
780 | | // To avoid access of uninitialised Qscale to qp conversion table |
781 | 0 | if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale) |
782 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale; |
783 | 0 | else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale) |
784 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale; |
785 | |
|
786 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp]; |
787 | |
|
788 | 0 | if((1 == i4_reduce_qp) && (cu_qp > 1)) |
789 | 0 | cu_qp--; |
790 | | |
791 | | /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/ |
792 | 0 | if(cu_qp > i4_max_qp_allowed) |
793 | 0 | cu_qp = i4_max_qp_allowed; |
794 | 0 | else if(cu_qp < i4_min_qp_allowed) |
795 | 0 | cu_qp = i4_min_qp_allowed; |
796 | | |
797 | | /* CLIP to maintain Qp between user configured and min and max Qp values*/ |
798 | 0 | if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp) |
799 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; |
800 | 0 | else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp) |
801 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; |
802 | | |
803 | | /*cu qp must be populated in cu_analyse_t struct*/ |
804 | 0 | ps_ctxt->i4_cu_qp = cu_qp; |
805 | | /*recompute quant related param at every cu level*/ |
806 | 0 | ihevce_compute_quant_rel_param(ps_ctxt, cu_qp); |
807 | 0 | } |
808 | | |
809 | | /*Decoupling qp and lamda calculation */ |
810 | 0 | if(i4_activity_for_lamda != -1) |
811 | 0 | { |
812 | 0 | cu_qp = (ps_ctxt->ps_rc_quant_ctxt |
813 | 0 | ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]); |
814 | |
|
815 | 0 | if(ps_ctxt->i4_qp_mod) |
816 | 0 | { |
817 | 0 | #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON |
818 | | /*Recompute the Qp as per enc thread's frame level Qp*/ |
819 | 0 | ASSERT(i4_activity_for_lamda > 0); |
820 | 0 | cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >> |
821 | 0 | QP_LEVEL_MOD_ACT_FACTOR; |
822 | 0 | #endif |
823 | 0 | } |
824 | 0 | if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale) |
825 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale; |
826 | 0 | else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale) |
827 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale; |
828 | |
|
829 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp]; |
830 | | |
831 | | /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/ |
832 | 0 | if(cu_qp > i4_max_qp_allowed) |
833 | 0 | cu_qp = i4_max_qp_allowed; |
834 | 0 | else if(cu_qp < i4_min_qp_allowed) |
835 | 0 | cu_qp = i4_min_qp_allowed; |
836 | | |
837 | | /* CLIP to maintain Qp between user configured and min and max Qp values*/ |
838 | 0 | if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp) |
839 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp; |
840 | 0 | else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp) |
841 | 0 | cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp; |
842 | | /* get frame level lambda params */ |
843 | 0 | ihevce_get_cl_cu_lambda_prms( |
844 | 0 | ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp); |
845 | 0 | } |
846 | 0 | } |
847 | | |
848 | | void ihevce_update_cu_level_qp_lamda( |
849 | | ihevce_enc_loop_ctxt_t *ps_ctxt, cu_analyse_t *ps_cu_analyse, WORD32 trans_size, WORD32 is_intra) |
850 | 0 | { |
851 | 0 | WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0; |
852 | |
|
853 | 0 | if(ps_cu_analyse->u1_cu_size == 64) |
854 | 0 | { |
855 | 0 | ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4)); |
856 | 0 | i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4)); |
857 | 0 | i4_act_counter_lamda = 3; |
858 | 0 | } |
859 | 0 | else if(ps_cu_analyse->u1_cu_size == 32) |
860 | 0 | { |
861 | 0 | ASSERT((trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4)); |
862 | 0 | i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4)); |
863 | 0 | i4_act_counter_lamda = 0; |
864 | 0 | } |
865 | 0 | else if(ps_cu_analyse->u1_cu_size == 16) |
866 | 0 | { |
867 | 0 | ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4)); |
868 | 0 | i4_act_counter = (trans_size == 8) || (trans_size == 4); |
869 | 0 | i4_act_counter_lamda = 0; |
870 | 0 | } |
871 | 0 | else if(ps_cu_analyse->u1_cu_size == 8) |
872 | 0 | { |
873 | 0 | ASSERT((trans_size == 8) || (trans_size == 4)); |
874 | 0 | i4_act_counter = 1; |
875 | 0 | i4_act_counter_lamda = 0; |
876 | 0 | } |
877 | 0 | else |
878 | 0 | { |
879 | 0 | ASSERT(0); |
880 | 0 | } |
881 | | |
882 | 0 | if(ps_ctxt->i4_use_ctb_level_lamda) |
883 | 0 | { |
884 | 0 | ihevce_compute_cu_level_QP( |
885 | 0 | ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], -1, 0); |
886 | 0 | } |
887 | 0 | else |
888 | 0 | { |
889 | 0 | ihevce_compute_cu_level_QP( |
890 | 0 | ps_ctxt, |
891 | 0 | ps_cu_analyse->i4_act_factor[i4_act_counter][is_intra], |
892 | 0 | ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][is_intra], |
893 | 0 | 0); |
894 | 0 | } |
895 | |
|
896 | 0 | ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp; |
897 | 0 | } |
898 | | |
899 | | /** |
900 | | ******************************************************************************* |
901 | | * \if Function name : ihevce_scan_coeffs \endif |
902 | | * |
903 | | * @brief * Computes the coeff buffer for a coded TU for entropy coding |
904 | | * |
905 | | * @par Description |
906 | | * Computes the coeff buffer for a coded TU for entropy coding |
907 | | * |
908 | | * \param[in] pi2_quan_coeffs Quantized coefficient context |
909 | | * |
910 | | * \param[in] scan_idx Scan index specifying the scan order |
911 | | * |
912 | | * \param[in] trans_size Transform unit size |
913 | | * |
914 | | * \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding |
915 | | * |
916 | | * \param[in] pu1_csbf_buf csb flag buffer |
917 | | * |
918 | | * @returns num_bytes |
919 | | * Number of bytes written to pu1_out_data |
920 | | * |
921 | | * @remarks |
922 | | * |
923 | | * \author |
924 | | * Ittiam |
925 | | * |
926 | | ******************************************************************************* |
927 | | */ |
928 | | |
929 | | WORD32 ihevce_scan_coeffs( |
930 | | WORD16 *pi2_quant_coeffs, |
931 | | WORD32 *pi4_subBlock2csbfId_map, |
932 | | WORD32 scan_idx, |
933 | | WORD32 trans_size, |
934 | | UWORD8 *pu1_out_data, |
935 | | UWORD8 *pu1_csbf_buf, |
936 | | WORD32 i4_csbf_stride) |
937 | 0 | { |
938 | 0 | WORD32 i, trans_unit_idx, num_gt1_flag; |
939 | 0 | UWORD16 u2_csbf0flags; |
940 | 0 | WORD32 num_bytes = 0; |
941 | 0 | UWORD8 *pu1_trans_table; |
942 | 0 | UWORD8 *pu1_csb_table; |
943 | 0 | WORD32 shift_value, mask_value; |
944 | 0 | UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0; |
945 | 0 | UWORD16 u2_sign_flags; |
946 | 0 | UWORD16 u2_abs_coeff_remaining[16]; |
947 | 0 | WORD32 blk_row, blk_col; |
948 | |
|
949 | 0 | UWORD8 *pu1_out_data_header; |
950 | 0 | UWORD16 *pu2_out_data_coeff; |
951 | |
|
952 | 0 | WORD32 x_pos, y_pos; |
953 | 0 | WORD32 quant_coeff; |
954 | |
|
955 | 0 | WORD32 num_gt0_flag; |
956 | 0 | (void)i4_csbf_stride; |
957 | 0 | pu1_out_data_header = pu1_out_data; |
958 | | /* Need only last 3 bits, rest are reserved for debugging and making */ |
959 | | /* WORD alignment */ |
960 | 0 | u2_csbf0flags = 0xBAD0; |
961 | | |
962 | | /* Select proper order for your transform unit and csb based on scan_idx*/ |
963 | | /* and the trans_size */ |
964 | | |
965 | | /* scan order inside a csb */ |
966 | 0 | pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]); |
967 | | /* GETRANGE will give the log_2 of trans_size to shift_value */ |
968 | 0 | GETRANGE(shift_value, trans_size); |
969 | 0 | shift_value = shift_value - 3; /* for finding. row no. from scan index */ |
970 | 0 | mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/ |
971 | 0 | switch(trans_size) |
972 | 0 | { |
973 | 0 | case 32: |
974 | 0 | pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]); |
975 | 0 | break; |
976 | 0 | case 16: |
977 | 0 | pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]); |
978 | 0 | break; |
979 | 0 | case 8: |
980 | 0 | pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]); |
981 | 0 | break; |
982 | 0 | case 4: |
983 | 0 | pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]); |
984 | 0 | break; |
985 | 0 | default: |
986 | 0 | DBG_PRINTF("Invalid Trans Size\n"); |
987 | 0 | return -1; |
988 | 0 | break; |
989 | 0 | } |
990 | | |
991 | | /*go through each csb in the scan order for first non-zero coded sub-block*/ |
992 | 0 | for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--) |
993 | 0 | { |
994 | | /* check for the first csb flag in our scan order */ |
995 | 0 | if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]) |
996 | 0 | { |
997 | 0 | UWORD8 u1_last_x, u1_last_y; |
998 | | /* row of csb */ |
999 | 0 | blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; |
1000 | | /* col of csb */ |
1001 | 0 | blk_col = pu1_trans_table[trans_unit_idx] & mask_value; |
1002 | | |
1003 | | /*check for the 1st non-0 values inside the csb in our scan order*/ |
1004 | 0 | for(i = 15; i >= 0; i--) |
1005 | 0 | { |
1006 | 0 | x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; |
1007 | 0 | y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; |
1008 | |
|
1009 | 0 | quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; |
1010 | |
|
1011 | 0 | if(quant_coeff != 0) |
1012 | 0 | break; |
1013 | 0 | } |
1014 | |
|
1015 | 0 | ASSERT(i >= 0); |
1016 | | |
1017 | 0 | u1_last_x = x_pos; |
1018 | 0 | u1_last_y = y_pos; |
1019 | | |
1020 | | /* storing last_x and last_y */ |
1021 | 0 | *pu1_out_data_header = u1_last_x; |
1022 | 0 | pu1_out_data_header++; |
1023 | 0 | num_bytes++; |
1024 | 0 | *pu1_out_data_header = u1_last_y; |
1025 | 0 | pu1_out_data_header++; |
1026 | 0 | num_bytes++; |
1027 | | |
1028 | | /* storing the scan order */ |
1029 | 0 | *pu1_out_data_header = scan_idx; |
1030 | 0 | pu1_out_data_header++; |
1031 | 0 | num_bytes++; |
1032 | | /* storing last_sub_block pos. in scan order count */ |
1033 | 0 | *pu1_out_data_header = trans_unit_idx; |
1034 | 0 | pu1_out_data_header++; |
1035 | 0 | num_bytes++; |
1036 | | |
1037 | | /*stored the first 4 bytes, now all are word16. So word16 pointer*/ |
1038 | 0 | pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header; |
1039 | | |
1040 | | /* u2_csbf0flags word */ |
1041 | 0 | u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/ |
1042 | | /* storing u2_csbf0flags word */ |
1043 | 0 | *pu2_out_data_coeff = u2_csbf0flags; |
1044 | 0 | pu2_out_data_coeff++; |
1045 | 0 | num_bytes += 2; |
1046 | |
|
1047 | 0 | num_gt0_flag = 1; |
1048 | 0 | num_gt1_flag = 0; |
1049 | 0 | u2_sign_flags = 0; |
1050 | | |
1051 | | /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ |
1052 | 0 | u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i); |
1053 | 0 | if(abs(quant_coeff) > 1) |
1054 | 0 | { |
1055 | | /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ |
1056 | 0 | u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i); |
1057 | | /* update u2_abs_coeff_remaining */ |
1058 | 0 | u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; |
1059 | |
|
1060 | 0 | num_gt1_flag++; |
1061 | 0 | } |
1062 | |
|
1063 | 0 | if(quant_coeff < 0) |
1064 | 0 | { |
1065 | | /* set the i th bit of u2_sign_flags */ |
1066 | 0 | u2_sign_flags = u2_sign_flags | (1 << i); |
1067 | 0 | } |
1068 | | |
1069 | | /* Test remaining elements in our scan order */ |
1070 | | /* Can optimize further by CLZ macro */ |
1071 | 0 | for(i = i - 1; i >= 0; i--) |
1072 | 0 | { |
1073 | 0 | x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; |
1074 | 0 | y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; |
1075 | |
|
1076 | 0 | quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; |
1077 | |
|
1078 | 0 | if(quant_coeff != 0) |
1079 | 0 | { |
1080 | | /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ |
1081 | 0 | u2_sig_coeff_abs_gt0_flags |= (1 << i); |
1082 | |
|
1083 | 0 | if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE)) |
1084 | 0 | { |
1085 | | /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ |
1086 | 0 | u2_sig_coeff_abs_gt1_flags |= (1 << i); |
1087 | | |
1088 | | /* update u2_abs_coeff_remaining */ |
1089 | 0 | u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; |
1090 | |
|
1091 | 0 | num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/ |
1092 | 0 | } |
1093 | |
|
1094 | 0 | if(quant_coeff < 0) |
1095 | 0 | { |
1096 | | /* set the i th bit of u2_sign_flags */ |
1097 | 0 | u2_sign_flags |= (1 << i); |
1098 | 0 | } |
1099 | |
|
1100 | 0 | num_gt0_flag++; |
1101 | 0 | } |
1102 | 0 | } |
1103 | | |
1104 | | /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */ |
1105 | 0 | *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags; |
1106 | 0 | pu2_out_data_coeff++; |
1107 | 0 | num_bytes += 2; |
1108 | | /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */ |
1109 | 0 | *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags; |
1110 | 0 | pu2_out_data_coeff++; |
1111 | 0 | num_bytes += 2; |
1112 | | /* storing u2_sign_flags 2 bytes */ |
1113 | 0 | *pu2_out_data_coeff = u2_sign_flags; |
1114 | 0 | pu2_out_data_coeff++; |
1115 | 0 | num_bytes += 2; |
1116 | | |
1117 | | /* Store the u2_abs_coeff_remaining[] */ |
1118 | 0 | for(i = 0; i < num_gt1_flag; i++) |
1119 | 0 | { |
1120 | | /* storing u2_abs_coeff_remaining[i] 2 bytes */ |
1121 | 0 | *pu2_out_data_coeff = u2_abs_coeff_remaining[i]; |
1122 | 0 | pu2_out_data_coeff++; |
1123 | 0 | num_bytes += 2; |
1124 | 0 | } |
1125 | |
|
1126 | 0 | break; /*We just need this loop for finding 1st non-zero csb only*/ |
1127 | 0 | } |
1128 | 0 | } |
1129 | | |
1130 | | /* go through remaining csb in the scan order */ |
1131 | 0 | for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--) |
1132 | 0 | { |
1133 | 0 | blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/ |
1134 | 0 | blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/ |
1135 | | |
1136 | | /* u2_csbf0flags word */ |
1137 | 0 | u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */ |
1138 | 0 | (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]); |
1139 | | |
1140 | | /********************************************************************/ |
1141 | | /* Minor hack: As per HEVC spec csbf in not signalled in stream for */ |
1142 | | /* block0, instead sig coeff map is directly signalled. This is */ |
1143 | | /* taken care by forcing csbf for block0 to be 1 even if it is 0 */ |
1144 | | /********************************************************************/ |
1145 | 0 | if(0 == trans_unit_idx) |
1146 | 0 | { |
1147 | 0 | u2_csbf0flags |= 1; |
1148 | 0 | } |
1149 | |
|
1150 | 0 | if((blk_col + 1 < trans_size / 4)) /* checking right boundary */ |
1151 | 0 | { |
1152 | 0 | if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]]) |
1153 | 0 | { |
1154 | | /* set the 2nd bit of u2_csbf0flags for right csbf */ |
1155 | 0 | u2_csbf0flags = u2_csbf0flags | (1 << 1); |
1156 | 0 | } |
1157 | 0 | } |
1158 | 0 | if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */ |
1159 | 0 | { |
1160 | 0 | if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]]) |
1161 | 0 | { |
1162 | | /* set the 3rd bit of u2_csbf0flags for bottom csbf */ |
1163 | 0 | u2_csbf0flags = u2_csbf0flags | (1 << 2); |
1164 | 0 | } |
1165 | 0 | } |
1166 | | |
1167 | | /* storing u2_csbf0flags word */ |
1168 | 0 | *pu2_out_data_coeff = u2_csbf0flags; |
1169 | 0 | pu2_out_data_coeff++; |
1170 | 0 | num_bytes += 2; |
1171 | | |
1172 | | /* check for the csb flag in our scan order */ |
1173 | 0 | if(u2_csbf0flags & 0x1) |
1174 | 0 | { |
1175 | 0 | u2_sig_coeff_abs_gt0_flags = 0; |
1176 | 0 | u2_sig_coeff_abs_gt1_flags = 0; |
1177 | 0 | u2_sign_flags = 0; |
1178 | |
|
1179 | 0 | num_gt0_flag = 0; |
1180 | 0 | num_gt1_flag = 0; |
1181 | | /* check for the non-0 values inside the csb in our scan order */ |
1182 | | /* Can optimize further by CLZ macro */ |
1183 | 0 | for(i = 15; i >= 0; i--) |
1184 | 0 | { |
1185 | 0 | x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4; |
1186 | 0 | y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4; |
1187 | |
|
1188 | 0 | quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)]; |
1189 | |
|
1190 | 0 | if(quant_coeff != 0) |
1191 | 0 | { |
1192 | | /* set the i th bit of u2_sig_coeff_abs_gt0_flags */ |
1193 | 0 | u2_sig_coeff_abs_gt0_flags |= (1 << i); |
1194 | |
|
1195 | 0 | if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE)) |
1196 | 0 | { |
1197 | | /* set the i th bit of u2_sig_coeff_abs_gt1_flags */ |
1198 | 0 | u2_sig_coeff_abs_gt1_flags |= (1 << i); |
1199 | | |
1200 | | /* update u2_abs_coeff_remaining */ |
1201 | 0 | u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1; |
1202 | |
|
1203 | 0 | num_gt1_flag++; |
1204 | 0 | } |
1205 | |
|
1206 | 0 | if(quant_coeff < 0) |
1207 | 0 | { |
1208 | | /* set the i th bit of u2_sign_flags */ |
1209 | 0 | u2_sign_flags = u2_sign_flags | (1 << i); |
1210 | 0 | } |
1211 | |
|
1212 | 0 | num_gt0_flag++; |
1213 | 0 | } |
1214 | 0 | } |
1215 | | |
1216 | | /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */ |
1217 | 0 | *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags; |
1218 | 0 | pu2_out_data_coeff++; |
1219 | 0 | num_bytes += 2; |
1220 | | |
1221 | | /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */ |
1222 | 0 | *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags; |
1223 | 0 | pu2_out_data_coeff++; |
1224 | 0 | num_bytes += 2; |
1225 | | |
1226 | | /* storing u2_sign_flags 2 bytes */ |
1227 | 0 | *pu2_out_data_coeff = u2_sign_flags; |
1228 | 0 | pu2_out_data_coeff++; |
1229 | 0 | num_bytes += 2; |
1230 | | |
1231 | | /* Store the u2_abs_coeff_remaining[] */ |
1232 | 0 | for(i = 0; i < num_gt1_flag; i++) |
1233 | 0 | { |
1234 | | /* storing u2_abs_coeff_remaining[i] 2 bytes */ |
1235 | 0 | *pu2_out_data_coeff = u2_abs_coeff_remaining[i]; |
1236 | 0 | pu2_out_data_coeff++; |
1237 | 0 | num_bytes += 2; |
1238 | 0 | } |
1239 | 0 | } |
1240 | 0 | } |
1241 | |
|
1242 | 0 | return num_bytes; /* Return the number of bytes written to out_data */ |
1243 | 0 | } |
1244 | | |
1245 | | /** |
1246 | | ******************************************************************************* |
1247 | | * \if Function name : ihevce_populate_intra_pred_mode \endif |
1248 | | * |
1249 | | * \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag & |
1250 | | * b5_rem_intra_pred_mode for a CU based on nieghbouring CUs, |
1251 | | * |
1252 | | * \par Description |
1253 | | * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode |
1254 | | * for a CU |
1255 | | * |
1256 | | * \param[in] top_intra_mode Top intra mode |
1257 | | * \param[in] left_intra_mode Left intra mode |
1258 | | * \param[in] available_top Top availability flag |
1259 | | * \param[in] available_left Left availability flag |
1260 | | * \param[in] cu_pos_y CU 'y' position |
1261 | | * \param[in] ps_cand_mode_list pointer to populate candidate list |
1262 | | * |
1263 | | * \returns none |
1264 | | * |
1265 | | * \author |
1266 | | * Ittiam |
1267 | | * |
1268 | | ******************************************************************************* |
1269 | | */ |
1270 | | |
1271 | | void ihevce_populate_intra_pred_mode( |
1272 | | WORD32 top_intra_mode, |
1273 | | WORD32 left_intra_mode, |
1274 | | WORD32 available_top, |
1275 | | WORD32 available_left, |
1276 | | WORD32 cu_pos_y, |
1277 | | WORD32 *ps_cand_mode_list) |
1278 | 0 | { |
1279 | | /* local variables */ |
1280 | 0 | WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; |
1281 | | |
1282 | | /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
1283 | | /* N = top */ |
1284 | 0 | if(0 == available_top) |
1285 | 0 | { |
1286 | 0 | cand_intra_pred_mode_top = INTRA_DC; |
1287 | 0 | } |
1288 | | /* for neighbour != INTRA, setting DC is done outside */ |
1289 | 0 | else if(0 == cu_pos_y) /* It's on the CTB boundary */ |
1290 | 0 | { |
1291 | 0 | cand_intra_pred_mode_top = INTRA_DC; |
1292 | 0 | } |
1293 | 0 | else |
1294 | 0 | { |
1295 | 0 | cand_intra_pred_mode_top = top_intra_mode; |
1296 | 0 | } |
1297 | | |
1298 | | /* N = left */ |
1299 | 0 | if(0 == available_left) |
1300 | 0 | { |
1301 | 0 | cand_intra_pred_mode_left = INTRA_DC; |
1302 | 0 | } |
1303 | | /* for neighbour != INTRA, setting DC is done outside */ |
1304 | 0 | else |
1305 | 0 | { |
1306 | 0 | cand_intra_pred_mode_left = left_intra_mode; |
1307 | 0 | } |
1308 | | |
1309 | | /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
1310 | 0 | if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) |
1311 | 0 | { |
1312 | 0 | if(cand_intra_pred_mode_left < 2) |
1313 | 0 | { |
1314 | 0 | ps_cand_mode_list[0] = INTRA_PLANAR; |
1315 | 0 | ps_cand_mode_list[1] = INTRA_DC; |
1316 | 0 | ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ |
1317 | 0 | } |
1318 | 0 | else |
1319 | 0 | { |
1320 | 0 | ps_cand_mode_list[0] = cand_intra_pred_mode_left; |
1321 | 0 | ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); |
1322 | 0 | ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); |
1323 | 0 | } |
1324 | 0 | } |
1325 | 0 | else |
1326 | 0 | { |
1327 | 0 | ps_cand_mode_list[0] = cand_intra_pred_mode_left; |
1328 | 0 | ps_cand_mode_list[1] = cand_intra_pred_mode_top; |
1329 | |
|
1330 | 0 | if((cand_intra_pred_mode_left != INTRA_PLANAR) && |
1331 | 0 | (cand_intra_pred_mode_top != INTRA_PLANAR)) |
1332 | 0 | { |
1333 | 0 | ps_cand_mode_list[2] = INTRA_PLANAR; |
1334 | 0 | } |
1335 | 0 | else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) |
1336 | 0 | { |
1337 | 0 | ps_cand_mode_list[2] = INTRA_DC; |
1338 | 0 | } |
1339 | 0 | else |
1340 | 0 | { |
1341 | 0 | ps_cand_mode_list[2] = INTRA_ANGULAR(26); |
1342 | 0 | } |
1343 | 0 | } |
1344 | 0 | } |
1345 | | /** |
1346 | | ******************************************************************************* |
1347 | | * \if Function name : ihevce_intra_pred_mode_signaling \endif |
1348 | | * |
1349 | | * \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & |
1350 | | * b5_rem_intra_pred_mode for a CU |
1351 | | * |
1352 | | * \par Description |
1353 | | * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode |
1354 | | * for a CU |
1355 | | * |
1356 | | * \param[in] ps_nbr_top Top neighbour context |
1357 | | * \param[in] ps_nbr_left Left neighbour context |
1358 | | * \param[in] available_top Top availability flag |
1359 | | * \param[in] available_left Left availability flag |
1360 | | * \param[in] cu_pos_y CU 'y' position |
1361 | | * \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block |
1362 | | * \param[inout] ps_intra_pred_mode_current |
1363 | | * Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and |
1364 | | * b5_rem_intra_pred_mode |
1365 | | * |
1366 | | * \returns none |
1367 | | * |
1368 | | * \author |
1369 | | * Ittiam |
1370 | | * |
1371 | | ******************************************************************************* |
1372 | | */ |
1373 | | |
1374 | | void ihevce_intra_pred_mode_signaling( |
1375 | | WORD32 top_intra_mode, |
1376 | | WORD32 left_intra_mode, |
1377 | | WORD32 available_top, |
1378 | | WORD32 available_left, |
1379 | | WORD32 cu_pos_y, |
1380 | | WORD32 luma_intra_pred_mode_current, |
1381 | | intra_prev_rem_flags_t *ps_intra_pred_mode_current) |
1382 | 0 | { |
1383 | | /* local variables */ |
1384 | 0 | WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; |
1385 | 0 | WORD32 cand_mode_list[3]; |
1386 | |
|
1387 | 0 | ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0; |
1388 | 0 | ps_intra_pred_mode_current->b2_mpm_idx = 0; // for safety purpose |
1389 | 0 | ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0; |
1390 | | |
1391 | | /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
1392 | | /* N = top */ |
1393 | 0 | if(0 == available_top) |
1394 | 0 | { |
1395 | 0 | cand_intra_pred_mode_top = INTRA_DC; |
1396 | 0 | } |
1397 | | /* for neighbour != INTRA, setting DC is done outside */ |
1398 | 0 | else if(0 == cu_pos_y) /* It's on the CTB boundary */ |
1399 | 0 | { |
1400 | 0 | cand_intra_pred_mode_top = INTRA_DC; |
1401 | 0 | } |
1402 | 0 | else |
1403 | 0 | { |
1404 | 0 | cand_intra_pred_mode_top = top_intra_mode; |
1405 | 0 | } |
1406 | | |
1407 | | /* N = left */ |
1408 | 0 | if(0 == available_left) |
1409 | 0 | { |
1410 | 0 | cand_intra_pred_mode_left = INTRA_DC; |
1411 | 0 | } |
1412 | | /* for neighbour != INTRA, setting DC is done outside */ |
1413 | 0 | else |
1414 | 0 | { |
1415 | 0 | cand_intra_pred_mode_left = left_intra_mode; |
1416 | 0 | } |
1417 | | |
1418 | | /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
1419 | 0 | if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) |
1420 | 0 | { |
1421 | 0 | if(cand_intra_pred_mode_left < 2) |
1422 | 0 | { |
1423 | 0 | cand_mode_list[0] = INTRA_PLANAR; |
1424 | 0 | cand_mode_list[1] = INTRA_DC; |
1425 | 0 | cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ |
1426 | 0 | } |
1427 | 0 | else |
1428 | 0 | { |
1429 | 0 | cand_mode_list[0] = cand_intra_pred_mode_left; |
1430 | 0 | cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); |
1431 | 0 | cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); |
1432 | 0 | } |
1433 | 0 | } |
1434 | 0 | else |
1435 | 0 | { |
1436 | 0 | cand_mode_list[0] = cand_intra_pred_mode_left; |
1437 | 0 | cand_mode_list[1] = cand_intra_pred_mode_top; |
1438 | |
|
1439 | 0 | if((cand_intra_pred_mode_left != INTRA_PLANAR) && |
1440 | 0 | (cand_intra_pred_mode_top != INTRA_PLANAR)) |
1441 | 0 | { |
1442 | 0 | cand_mode_list[2] = INTRA_PLANAR; |
1443 | 0 | } |
1444 | 0 | else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) |
1445 | 0 | { |
1446 | 0 | cand_mode_list[2] = INTRA_DC; |
1447 | 0 | } |
1448 | 0 | else |
1449 | 0 | { |
1450 | 0 | cand_mode_list[2] = INTRA_ANGULAR(26); |
1451 | 0 | } |
1452 | 0 | } |
1453 | | |
1454 | | /* Signal Generation */ |
1455 | | |
1456 | | /* Flag & mpm_index generation */ |
1457 | 0 | if(cand_mode_list[0] == luma_intra_pred_mode_current) |
1458 | 0 | { |
1459 | 0 | ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; |
1460 | 0 | ps_intra_pred_mode_current->b2_mpm_idx = 0; |
1461 | 0 | } |
1462 | 0 | else if(cand_mode_list[1] == luma_intra_pred_mode_current) |
1463 | 0 | { |
1464 | 0 | ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; |
1465 | 0 | ps_intra_pred_mode_current->b2_mpm_idx = 1; |
1466 | 0 | } |
1467 | 0 | else if(cand_mode_list[2] == luma_intra_pred_mode_current) |
1468 | 0 | { |
1469 | 0 | ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1; |
1470 | 0 | ps_intra_pred_mode_current->b2_mpm_idx = 2; |
1471 | 0 | } |
1472 | | /* Flag & b5_rem_intra_pred_mode generation */ |
1473 | 0 | else |
1474 | 0 | { |
1475 | 0 | WORD32 rem_mode; |
1476 | |
|
1477 | 0 | ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0; |
1478 | | |
1479 | | /* sorting cand_mode_list */ |
1480 | 0 | if(cand_mode_list[0] > cand_mode_list[1]) |
1481 | 0 | { |
1482 | 0 | SWAP(cand_mode_list[0], cand_mode_list[1]); |
1483 | 0 | } |
1484 | 0 | if(cand_mode_list[0] > cand_mode_list[2]) |
1485 | 0 | { |
1486 | 0 | SWAP(cand_mode_list[0], cand_mode_list[2]); |
1487 | 0 | } |
1488 | 0 | if(cand_mode_list[1] > cand_mode_list[2]) |
1489 | 0 | { |
1490 | 0 | SWAP(cand_mode_list[1], cand_mode_list[2]); |
1491 | 0 | } |
1492 | |
|
1493 | 0 | rem_mode = luma_intra_pred_mode_current; |
1494 | |
|
1495 | 0 | if((rem_mode) >= cand_mode_list[2]) |
1496 | 0 | { |
1497 | 0 | (rem_mode)--; |
1498 | 0 | } |
1499 | 0 | if((rem_mode) >= cand_mode_list[1]) |
1500 | 0 | { |
1501 | 0 | (rem_mode)--; |
1502 | 0 | } |
1503 | 0 | if((rem_mode) >= cand_mode_list[0]) |
1504 | 0 | { |
1505 | 0 | (rem_mode)--; |
1506 | 0 | } |
1507 | 0 | ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode; |
1508 | 0 | } |
1509 | 0 | } |
1510 | | |
1511 | | void ihevce_quant_rounding_factor_gen( |
1512 | | WORD32 i4_trans_size, |
1513 | | WORD32 is_luma, |
1514 | | rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, |
1515 | | WORD32 *pi4_quant_round_0_1, |
1516 | | WORD32 *pi4_quant_round_1_2, |
1517 | | double i4_lamda_modifier, |
1518 | | UWORD8 i4_is_tu_level_quant_rounding) |
1519 | 0 | { |
1520 | | //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx; |
1521 | 0 | UWORD8 *pu1_ctxt_model; |
1522 | 0 | WORD32 scan_pos; |
1523 | 0 | WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */ |
1524 | 0 | WORD32 abs_gt1_base_ctxt; |
1525 | 0 | WORD32 log2_tr_size, i; |
1526 | 0 | UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2; |
1527 | 0 | UWORD16 u4_bits_estimated_r1_temp; |
1528 | 0 | WORD32 j = 0; |
1529 | 0 | WORD32 k = 0; |
1530 | 0 | WORD32 temp2; |
1531 | |
|
1532 | 0 | double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0)); |
1533 | 0 | LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT)); |
1534 | | /* transform size to log2transform size */ |
1535 | 0 | GETRANGE(log2_tr_size, i4_trans_size); |
1536 | 0 | log2_tr_size -= 1; |
1537 | |
|
1538 | 0 | if(1 == i4_is_tu_level_quant_rounding) |
1539 | 0 | { |
1540 | 0 | entropy_context_t *ps_cur_tu_entropy; |
1541 | 0 | cab_ctxt_t *ps_cabac; |
1542 | 0 | WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx; |
1543 | 0 | ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx]; |
1544 | |
|
1545 | 0 | ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt; |
1546 | |
|
1547 | 0 | pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0]; |
1548 | 0 | } |
1549 | 0 | else |
1550 | 0 | { |
1551 | 0 | pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0]; |
1552 | 0 | } |
1553 | | /*If transform size is 4x4, then only one sub-block*/ |
1554 | 0 | if(is_luma) |
1555 | 0 | { |
1556 | 0 | sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG; |
1557 | 0 | abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG; |
1558 | |
|
1559 | 0 | if(3 == log2_tr_size) |
1560 | 0 | { |
1561 | | /* 8x8 transform size */ |
1562 | | /* Assuming diagnol scan idx for now */ |
1563 | 0 | sig_coeff_base_ctxt += 9; |
1564 | 0 | } |
1565 | 0 | else if(3 < log2_tr_size) |
1566 | 0 | { |
1567 | | /* larger transform sizes */ |
1568 | 0 | sig_coeff_base_ctxt += 21; |
1569 | 0 | } |
1570 | 0 | } |
1571 | 0 | else |
1572 | 0 | { |
1573 | | /* chroma context initializations */ |
1574 | 0 | sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27; |
1575 | 0 | abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16; |
1576 | |
|
1577 | 0 | if(3 == log2_tr_size) |
1578 | 0 | { |
1579 | | /* 8x8 transform size */ |
1580 | 0 | sig_coeff_base_ctxt += 9; |
1581 | 0 | } |
1582 | 0 | else if(3 < log2_tr_size) |
1583 | 0 | { |
1584 | | /* larger transform sizes */ |
1585 | 0 | sig_coeff_base_ctxt += 12; |
1586 | 0 | } |
1587 | 0 | } |
1588 | | |
1589 | | /*Transform size of 4x4 will have only a single CSB */ |
1590 | | /* derive the context inc as per section 9.3.3.1.4 */ |
1591 | |
|
1592 | 0 | if(2 == log2_tr_size) |
1593 | 0 | { |
1594 | 0 | UWORD8 sig_ctxinc; |
1595 | 0 | WORD32 state_mps; |
1596 | 0 | WORD32 gt1_ctxt = 0; |
1597 | 0 | WORD32 ctxt_set = 0; |
1598 | 0 | WORD32 ctxt_idx = 0; |
1599 | | |
1600 | | /* context set based on luma subblock pos */ |
1601 | | |
1602 | | /* Encodet the abs level gt1 bins */ |
1603 | | /* Currently calculating trade off between mps(2) and mps(1)*/ |
1604 | | /* The estimation has to be further done for mps(11) and mps(111)*/ |
1605 | | /*ctxt_set = 0 as transform 4x4 has only one csb with DC */ |
1606 | | /* gt1_ctxt = 0 for the co-ef value to be 2 */ |
1607 | |
|
1608 | 0 | ctxt_set = gt1_ctxt = 0; |
1609 | 0 | ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
1610 | |
|
1611 | 0 | state_mps = pu1_ctxt_model[ctxt_idx]; |
1612 | |
|
1613 | 0 | u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; |
1614 | |
|
1615 | 0 | u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1616 | |
|
1617 | 0 | QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod); |
1618 | 0 | for(scan_pos = 0; scan_pos < 16; scan_pos++) |
1619 | 0 | { |
1620 | 0 | *(pi4_quant_round_1_2 + scan_pos) = temp2; |
1621 | 0 | } |
1622 | |
|
1623 | 0 | for(scan_pos = 0; scan_pos < 16; scan_pos++) |
1624 | 0 | { |
1625 | | //UWORD8 nbr_csbf = 1; |
1626 | | /* derive the x,y pos */ |
1627 | 0 | UWORD8 y_pos_x_pos = scan_pos; //gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; |
1628 | | |
1629 | | /* 4x4 transform size increment uses lookup */ |
1630 | 0 | sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos]; |
1631 | | |
1632 | | /*Get the mps state based on ctxt modes */ |
1633 | 0 | state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt]; |
1634 | | |
1635 | | /* Bits taken to encode sig co-ef flag as 0 */ |
1636 | 0 | u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1637 | | |
1638 | | /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
1639 | | // |
1640 | 0 | u4_bits_estimated_r1 = |
1641 | 0 | (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); |
1642 | | |
1643 | | /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
1644 | 0 | u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; |
1645 | |
|
1646 | 0 | QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); |
1647 | 0 | *(pi4_quant_round_0_1 + scan_pos) = temp2; |
1648 | 0 | } |
1649 | 0 | } |
1650 | 0 | else |
1651 | 0 | { |
1652 | 0 | UWORD8 *pu1_hevce_sigcoeff_ctxtinc; |
1653 | 0 | WORD32 is_nbr_csb_state_mps; |
1654 | |
|
1655 | 0 | WORD32 state_mps; |
1656 | 0 | WORD32 gt1_ctxt = 0; |
1657 | 0 | WORD32 ctxt_set = 0; |
1658 | 0 | WORD32 ctxt_idx; |
1659 | | /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/ |
1660 | | /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/ |
1661 | | |
1662 | | /*ctxt_set = 0 DC subblock, the previous state did not have 2 |
1663 | | ctxt_set = 1 DC subblock, the previous state did have >= 2 |
1664 | | ctxt_set = 2 AC subblock, the previous state did not have 2 |
1665 | | ctxt_set = 3 AC subblock, the previous state did have >= 2*/ |
1666 | 0 | i = 1; |
1667 | 0 | ctxt_set = (i && is_luma) ? 2 : 0; |
1668 | |
|
1669 | 0 | ctxt_set++; |
1670 | | |
1671 | | /*0th position indicates the probability of 2 */ |
1672 | | /*1th position indicates the probability of 1 */ |
1673 | | /*2th position indicates the probability of 11 */ |
1674 | | /*3th position indicates the probability of 111 */ |
1675 | |
|
1676 | 0 | gt1_ctxt = 0; |
1677 | 0 | ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
1678 | |
|
1679 | 0 | state_mps = pu1_ctxt_model[ctxt_idx]; |
1680 | |
|
1681 | 0 | u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; |
1682 | |
|
1683 | 0 | u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1684 | 0 | QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod); |
1685 | |
|
1686 | 0 | for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++) |
1687 | 0 | { |
1688 | 0 | *(pi4_quant_round_1_2 + scan_pos) = temp2; |
1689 | 0 | } |
1690 | |
|
1691 | 0 | i = 0; |
1692 | 0 | ctxt_set = (i && is_luma) ? 2 : 0; |
1693 | 0 | ctxt_set++; |
1694 | | |
1695 | | /*0th position indicates the probability of 2 */ |
1696 | | /*1th position indicates the probability of 1 */ |
1697 | | /*2th position indicates the probability of 11 */ |
1698 | | /*3th position indicates the probability of 111 */ |
1699 | |
|
1700 | 0 | gt1_ctxt = 0; |
1701 | 0 | ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
1702 | |
|
1703 | 0 | state_mps = pu1_ctxt_model[ctxt_idx]; |
1704 | |
|
1705 | 0 | u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1]; |
1706 | |
|
1707 | 0 | u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1708 | 0 | QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod); |
1709 | |
|
1710 | 0 | for(scan_pos = 0; scan_pos < 16; scan_pos++) |
1711 | 0 | { |
1712 | 0 | *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2; |
1713 | 0 | } |
1714 | |
|
1715 | 0 | { |
1716 | 0 | WORD32 ctxt_idx; |
1717 | |
|
1718 | 0 | WORD32 nbr_csbf_0, nbr_csbf_1; |
1719 | 0 | WORD32 state_mps_0, state_mps_1; |
1720 | 0 | ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX; |
1721 | 0 | ctxt_idx += is_luma ? 0 : 2; |
1722 | | |
1723 | | /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */ |
1724 | | /* if neibhor not available, ctxt idx = 0*/ |
1725 | 0 | nbr_csbf_0 = 0; |
1726 | 0 | ctxt_idx += nbr_csbf_0 ? 1 : 0; |
1727 | 0 | state_mps_0 = pu1_ctxt_model[ctxt_idx]; |
1728 | |
|
1729 | 0 | nbr_csbf_1 = 1; |
1730 | 0 | ctxt_idx += nbr_csbf_1 ? 1 : 0; |
1731 | 0 | state_mps_1 = pu1_ctxt_model[ctxt_idx]; |
1732 | |
|
1733 | 0 | is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1); |
1734 | 0 | } |
1735 | |
|
1736 | 0 | if(1 == is_nbr_csb_state_mps) |
1737 | 0 | { |
1738 | 0 | for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++) |
1739 | 0 | { |
1740 | 0 | UWORD8 sig_ctxinc; |
1741 | 0 | WORD32 state_mps; |
1742 | 0 | WORD32 gt1_ctxt = 0; |
1743 | 0 | WORD32 ctxt_set = 0; |
1744 | |
|
1745 | 0 | WORD32 ctxt_idx; |
1746 | | |
1747 | | /*Check if the cabac states had previous nbr available */ |
1748 | |
|
1749 | 0 | if(i == 0) |
1750 | 0 | pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0]; |
1751 | 0 | else if(i < (i4_trans_size >> 2)) |
1752 | 0 | pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0]; |
1753 | 0 | else if((i % (i4_trans_size >> 2)) == 0) |
1754 | 0 | pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0]; |
1755 | 0 | else |
1756 | 0 | pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0]; |
1757 | |
|
1758 | 0 | if(((i % (i4_trans_size >> 2)) == 0) && (i != 0)) |
1759 | 0 | k++; |
1760 | |
|
1761 | 0 | j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4); |
1762 | | /*ctxt_set = 0 DC subblock, the previous state did not have 2 |
1763 | | ctxt_set = 1 DC subblock, the previous state did have >= 2 |
1764 | | ctxt_set = 2 AC subblock, the previous state did not have 2 |
1765 | | ctxt_set = 3 AC subblock, the previous state did have >= 2*/ |
1766 | |
|
1767 | 0 | ctxt_set = (i && is_luma) ? 2 : 0; |
1768 | | |
1769 | | /* gt1_ctxt = 1 for the co-ef value to be 1 */ |
1770 | 0 | gt1_ctxt = 0; |
1771 | 0 | ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
1772 | |
|
1773 | 0 | state_mps = pu1_ctxt_model[ctxt_idx]; |
1774 | | |
1775 | | /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
1776 | 0 | u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1777 | |
|
1778 | 0 | for(scan_pos = 0; scan_pos < 16; scan_pos++) |
1779 | 0 | { |
1780 | 0 | UWORD8 y_pos_x_pos; |
1781 | |
|
1782 | 0 | if(scan_pos || i) |
1783 | 0 | { |
1784 | 0 | y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; |
1785 | | /* ctxt for AC coeff depends on curpos and neigbour csbf */ |
1786 | 0 | sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; |
1787 | | |
1788 | | /* based on luma subblock pos */ |
1789 | 0 | sig_ctxinc += (i && is_luma) ? 3 : 0; |
1790 | |
|
1791 | 0 | sig_ctxinc += sig_coeff_base_ctxt; |
1792 | 0 | } |
1793 | 0 | else |
1794 | 0 | { |
1795 | | /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */ |
1796 | | /* DC coeff has fixed context for luma and chroma */ |
1797 | 0 | sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27; |
1798 | 0 | } |
1799 | | |
1800 | | /*Get the mps state based on ctxt modes */ |
1801 | 0 | state_mps = pu1_ctxt_model[sig_ctxinc]; |
1802 | | |
1803 | | /* Bits taken to encode sig co-ef flag as 0 */ |
1804 | 0 | u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1805 | |
|
1806 | 0 | u4_bits_estimated_r1 = |
1807 | 0 | (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); |
1808 | | |
1809 | | /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
1810 | 0 | u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; |
1811 | 0 | { |
1812 | 0 | QUANT_ROUND_FACTOR( |
1813 | 0 | temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); |
1814 | 0 | *(pi4_quant_round_0_1 + |
1815 | 0 | ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2; |
1816 | 0 | } |
1817 | 0 | } |
1818 | 0 | } |
1819 | 0 | } |
1820 | 0 | else |
1821 | 0 | { |
1822 | | /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock, |
1823 | | Hence will write the same value to all sub block, and overwrite for the 1st one */ |
1824 | 0 | i = 1; |
1825 | 0 | { |
1826 | 0 | UWORD8 sig_ctxinc; |
1827 | 0 | UWORD8 y_pos_x_pos; |
1828 | 0 | WORD32 quant_rounding_0_1; |
1829 | |
|
1830 | 0 | pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0]; |
1831 | |
|
1832 | 0 | scan_pos = 0; |
1833 | 0 | y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; |
1834 | | /* ctxt for AC coeff depends on curpos and neigbour csbf */ |
1835 | 0 | sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; |
1836 | | |
1837 | | /* based on luma subblock pos */ |
1838 | 0 | sig_ctxinc += (is_luma) ? 3 : 0; |
1839 | |
|
1840 | 0 | sig_ctxinc += sig_coeff_base_ctxt; |
1841 | | |
1842 | | /*Get the mps state based on ctxt modes */ |
1843 | 0 | state_mps = pu1_ctxt_model[sig_ctxinc]; |
1844 | | |
1845 | | /* Bits taken to encode sig co-ef flag as 0 */ |
1846 | 0 | u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1847 | |
|
1848 | 0 | u4_bits_estimated_r1 = |
1849 | 0 | (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); |
1850 | | |
1851 | | /*ctxt_set = 0 DC subblock, the previous state did not have 2 |
1852 | | ctxt_set = 1 DC subblock, the previous state did have >= 2 |
1853 | | ctxt_set = 2 AC subblock, the previous state did not have 2 |
1854 | | ctxt_set = 3 AC subblock, the previous state did have >= 2*/ |
1855 | |
|
1856 | 0 | ctxt_set = (i && is_luma) ? 2 : 0; |
1857 | | |
1858 | | /* gt1_ctxt = 1 for the co-ef value to be 1 */ |
1859 | 0 | gt1_ctxt = 0; |
1860 | 0 | ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
1861 | |
|
1862 | 0 | state_mps = pu1_ctxt_model[ctxt_idx]; |
1863 | | |
1864 | | /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
1865 | 0 | u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1866 | |
|
1867 | 0 | QUANT_ROUND_FACTOR( |
1868 | 0 | quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); |
1869 | |
|
1870 | 0 | for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); |
1871 | 0 | scan_pos++) |
1872 | 0 | { |
1873 | 0 | *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1; |
1874 | 0 | } |
1875 | 0 | } |
1876 | | |
1877 | | /*First Subblock*/ |
1878 | 0 | i = 0; |
1879 | |
|
1880 | 0 | { |
1881 | 0 | UWORD8 sig_ctxinc; |
1882 | 0 | WORD32 state_mps; |
1883 | 0 | WORD32 gt1_ctxt = 0; |
1884 | 0 | WORD32 ctxt_set = 0; |
1885 | |
|
1886 | 0 | WORD32 ctxt_idx; |
1887 | | |
1888 | | /*Check if the cabac states had previous nbr available */ |
1889 | |
|
1890 | 0 | { |
1891 | 0 | pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0]; |
1892 | | |
1893 | | /*ctxt_set = 0 DC subblock, the previous state did not have 2 |
1894 | | ctxt_set = 1 DC subblock, the previous state did have >= 2 |
1895 | | ctxt_set = 2 AC subblock, the previous state did not have 2 |
1896 | | ctxt_set = 3 AC subblock, the previous state did have >= 2*/ |
1897 | 0 | ctxt_set = (i && is_luma) ? 2 : 0; |
1898 | | |
1899 | | /* gt1_ctxt = 1 for the co-ef value to be 1 */ |
1900 | 0 | gt1_ctxt = 0; |
1901 | 0 | ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt; |
1902 | |
|
1903 | 0 | state_mps = pu1_ctxt_model[ctxt_idx]; |
1904 | | |
1905 | | /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
1906 | 0 | u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1907 | |
|
1908 | 0 | for(scan_pos = 0; scan_pos < 16; scan_pos++) |
1909 | 0 | { |
1910 | 0 | UWORD8 y_pos_x_pos; |
1911 | |
|
1912 | 0 | if(scan_pos) |
1913 | 0 | { |
1914 | 0 | y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos]; |
1915 | | /* ctxt for AC coeff depends on curpos and neigbour csbf */ |
1916 | 0 | sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos]; |
1917 | | |
1918 | | /* based on luma subblock pos */ |
1919 | 0 | sig_ctxinc += (i && is_luma) ? 3 : 0; |
1920 | |
|
1921 | 0 | sig_ctxinc += sig_coeff_base_ctxt; |
1922 | 0 | } |
1923 | 0 | else |
1924 | 0 | { |
1925 | | /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */ |
1926 | | /* DC coeff has fixed context for luma and chroma */ |
1927 | 0 | sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27; |
1928 | 0 | } |
1929 | | |
1930 | | /*Get the mps state based on ctxt modes */ |
1931 | 0 | state_mps = pu1_ctxt_model[sig_ctxinc]; |
1932 | | |
1933 | | /* Bits taken to encode sig co-ef flag as 0 */ |
1934 | 0 | u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0]; |
1935 | |
|
1936 | 0 | u4_bits_estimated_r1 = |
1937 | 0 | (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000)); |
1938 | | |
1939 | | /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */ |
1940 | 0 | u4_bits_estimated_r1 += u4_bits_estimated_r1_temp; |
1941 | 0 | { |
1942 | 0 | QUANT_ROUND_FACTOR( |
1943 | 0 | temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod); |
1944 | 0 | *(pi4_quant_round_0_1 + |
1945 | 0 | ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2; |
1946 | 0 | } |
1947 | 0 | } |
1948 | 0 | } |
1949 | 0 | } |
1950 | 0 | } |
1951 | 0 | } |
1952 | 0 | return; |
1953 | 0 | } |
1954 | | |
1955 | | /*! |
1956 | | ****************************************************************************** |
1957 | | * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif |
1958 | | * |
1959 | | * \brief |
1960 | | * Transform unit level (Luma) enc_loop function |
1961 | | * |
1962 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
1963 | | * \param[in] pu1_pred pointer to predicted data buffer |
1964 | | * \param[in] pred_strd predicted buffer stride |
1965 | | * \param[in] pu1_src pointer to source data buffer |
1966 | | * \param[in] src_strd source buffer stride |
1967 | | * \param[in] pi2_deq_data pointer to store iq data |
1968 | | * \param[in] deq_data_strd iq data buffer stride |
1969 | | * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) |
1970 | | * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current |
1971 | | * block |
1972 | | * \param[out] csbf_strd csbf buffer stride |
1973 | | * \param[in] trans_size transform size (4, 8, 16,32) |
1974 | | * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip |
1975 | | * \param[out] pi4_cost pointer to store the cost |
1976 | | * \param[out] pi4_coeff_off pointer to store the number of bytes produced in |
1977 | | * coeff buffer |
1978 | | * \param[out] pu4_tu_bits pointer to store the best TU bits required encode |
1979 | | the current TU in RDopt Mode |
1980 | | * \param[out] pu4_blk_sad pointer to store the block sad for RC |
1981 | | * \param[out] pi4_zero_col pointer to store the zero_col info for the TU |
1982 | | * \param[out] pi4_zero_row pointer to store the zero_row info for the TU |
1983 | | * \param[in] i4_perform_rdoq Indicates if RDOQ should be performed or not |
1984 | | * \param[in] i4_perform_sbh Indicates if SBH should be performed or not |
1985 | | * |
1986 | | * \return |
1987 | | * CBF of the current block |
1988 | | * |
1989 | | * \author |
1990 | | * Ittiam |
1991 | | * |
1992 | | ***************************************************************************** |
1993 | | */ |
1994 | | |
1995 | | WORD32 ihevce_t_q_iq_ssd_scan_fxn( |
1996 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
1997 | | UWORD8 *pu1_pred, |
1998 | | WORD32 pred_strd, |
1999 | | UWORD8 *pu1_src, |
2000 | | WORD32 src_strd, |
2001 | | WORD16 *pi2_deq_data, |
2002 | | WORD32 deq_data_strd, |
2003 | | UWORD8 *pu1_recon, |
2004 | | WORD32 i4_recon_stride, |
2005 | | UWORD8 *pu1_ecd_data, |
2006 | | UWORD8 *pu1_csbf_buf, |
2007 | | WORD32 csbf_strd, |
2008 | | WORD32 trans_size, |
2009 | | WORD32 packed_pred_mode, |
2010 | | LWORD64 *pi8_cost, |
2011 | | WORD32 *pi4_coeff_off, |
2012 | | WORD32 *pi4_tu_bits, |
2013 | | UWORD32 *pu4_blk_sad, |
2014 | | WORD32 *pi4_zero_col, |
2015 | | WORD32 *pi4_zero_row, |
2016 | | UWORD8 *pu1_is_recon_available, |
2017 | | WORD32 i4_perform_rdoq, |
2018 | | WORD32 i4_perform_sbh, |
2019 | | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
2020 | | WORD32 i4_alpha_stim_multiplier, |
2021 | | UWORD8 u1_is_cu_noisy, |
2022 | | #endif |
2023 | | SSD_TYPE_T e_ssd_type, |
2024 | | WORD32 early_cbf) |
2025 | 0 | { |
2026 | 0 | WORD32 cbf = 0; |
2027 | 0 | WORD32 trans_idx; |
2028 | 0 | WORD32 quant_scale_mat_offset; |
2029 | 0 | WORD32 *pi4_trans_scratch; |
2030 | 0 | WORD16 *pi2_trans_values; |
2031 | 0 | WORD16 *pi2_quant_coeffs; |
2032 | 0 | WORD32 *pi4_subBlock2csbfId_map = NULL; |
2033 | |
|
2034 | | #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
2035 | | WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i; |
2036 | | #endif |
2037 | |
|
2038 | 0 | rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt; |
2039 | |
|
2040 | 0 | WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) || |
2041 | 0 | (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE); |
2042 | 0 | WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING); |
2043 | 0 | WORD8 intra_flag = 0; |
2044 | 0 | ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW); |
2045 | | |
2046 | 0 | *pi4_tu_bits = 0; |
2047 | 0 | *pi4_coeff_off = 0; |
2048 | 0 | pu1_is_recon_available[0] = 0; |
2049 | |
|
2050 | 0 | if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf)) |
2051 | 0 | { |
2052 | 0 | if(e_ssd_type != NULL_TYPE) |
2053 | 0 | { |
2054 | | /* SSD cost is stored to the pointer */ |
2055 | 0 | pi8_cost[0] = |
2056 | |
|
2057 | 0 | ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator( |
2058 | 0 | pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad); |
2059 | |
|
2060 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
2061 | 0 | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
2062 | 0 | { |
2063 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
2064 | 0 | pu1_src, |
2065 | 0 | src_strd, |
2066 | 0 | pu1_pred, |
2067 | 0 | pred_strd, |
2068 | 0 | pi8_cost[0], |
2069 | 0 | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
2070 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
2071 | 0 | (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
2072 | 0 | 100.0, |
2073 | 0 | trans_size, |
2074 | 0 | 0, |
2075 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
2076 | 0 | NULL_PLANE); |
2077 | 0 | } |
2078 | 0 | #endif |
2079 | | |
2080 | | /* copy pred to recon for skip mode */ |
2081 | 0 | if(SPATIAL_DOMAIN_SSD == e_ssd_type) |
2082 | 0 | { |
2083 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
2084 | 0 | pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size); |
2085 | 0 | pu1_is_recon_available[0] = 1; |
2086 | 0 | } |
2087 | 0 | else |
2088 | 0 | { |
2089 | 0 | pu1_is_recon_available[0] = 0; |
2090 | 0 | } |
2091 | |
|
2092 | 0 | #if ENABLE_INTER_ZCU_COST |
2093 | 0 | ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; |
2094 | 0 | #endif |
2095 | 0 | } |
2096 | 0 | else |
2097 | 0 | { |
2098 | 0 | pi8_cost[0] = UINT_MAX; |
2099 | 0 | } |
2100 | | |
2101 | | /* cbf is returned as 0 */ |
2102 | 0 | return (0); |
2103 | 0 | } |
2104 | | |
2105 | | /* derive context variables */ |
2106 | 0 | pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0]; |
2107 | 0 | pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; |
2108 | 0 | pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2); |
2109 | | |
2110 | | /* translate the transform size to index for 4x4 and 8x8 */ |
2111 | 0 | trans_idx = trans_size >> 2; |
2112 | |
|
2113 | 0 | if(PRED_MODE_INTRA == packed_pred_mode) |
2114 | 0 | { |
2115 | 0 | quant_scale_mat_offset = 0; |
2116 | 0 | intra_flag = 1; |
2117 | | #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
2118 | | ai4_quant_rounding_factors[0][0] = |
2119 | | MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3); |
2120 | | |
2121 | | for(i = 0; i < trans_size * trans_size; i++) |
2122 | | { |
2123 | | ai4_quant_rounding_factors[1][i] = |
2124 | | MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i], |
2125 | | (1 << QUANT_ROUND_FACTOR_Q) / 3); |
2126 | | ai4_quant_rounding_factors[2][i] = |
2127 | | MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i], |
2128 | | (1 << QUANT_ROUND_FACTOR_Q) / 3); |
2129 | | } |
2130 | | #endif |
2131 | 0 | } |
2132 | 0 | else |
2133 | 0 | { |
2134 | 0 | quant_scale_mat_offset = NUM_TRANS_TYPES; |
2135 | 0 | } |
2136 | | /* for intra 4x4 DST transform should be used */ |
2137 | 0 | if((1 == trans_idx) && (1 == intra_flag)) |
2138 | 0 | { |
2139 | 0 | trans_idx = 0; |
2140 | 0 | } |
2141 | | /* for 16x16 cases */ |
2142 | 0 | else if(16 == trans_size) |
2143 | 0 | { |
2144 | 0 | trans_idx = 3; |
2145 | 0 | } |
2146 | | /* for 32x32 cases */ |
2147 | 0 | else if(32 == trans_size) |
2148 | 0 | { |
2149 | 0 | trans_idx = 4; |
2150 | 0 | } |
2151 | |
|
2152 | 0 | switch(trans_size) |
2153 | 0 | { |
2154 | 0 | case 4: |
2155 | 0 | { |
2156 | 0 | pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU; |
2157 | |
|
2158 | 0 | break; |
2159 | 0 | } |
2160 | 0 | case 8: |
2161 | 0 | { |
2162 | 0 | pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU; |
2163 | |
|
2164 | 0 | break; |
2165 | 0 | } |
2166 | 0 | case 16: |
2167 | 0 | { |
2168 | 0 | pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU; |
2169 | |
|
2170 | 0 | break; |
2171 | 0 | } |
2172 | 0 | case 32: |
2173 | 0 | { |
2174 | 0 | pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU; |
2175 | |
|
2176 | 0 | break; |
2177 | 0 | } |
2178 | 0 | } |
2179 | | |
2180 | | /* Do not call the FT and Quant functions if early_cbf is 0 */ |
2181 | 0 | if(1 == early_cbf) |
2182 | 0 | { |
2183 | | /* ---------- call residue and transform block ------- */ |
2184 | 0 | *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx]( |
2185 | 0 | pu1_src, |
2186 | 0 | pu1_pred, |
2187 | 0 | pi4_trans_scratch, |
2188 | 0 | pi2_trans_values, |
2189 | 0 | src_strd, |
2190 | 0 | pred_strd, |
2191 | 0 | trans_size, |
2192 | 0 | NULL_PLANE); |
2193 | |
|
2194 | 0 | cbf = ps_ctxt->apf_quant_iquant_ssd |
2195 | 0 | [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]( |
2196 | 0 | pi2_trans_values, |
2197 | 0 | ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset], |
2198 | 0 | pi2_quant_coeffs, |
2199 | 0 | pi2_deq_data, |
2200 | 0 | trans_size, |
2201 | 0 | ps_ctxt->i4_cu_qp_div6, |
2202 | 0 | ps_ctxt->i4_cu_qp_mod6, |
2203 | 0 | #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
2204 | 0 | ps_ctxt->i4_quant_rnd_factor[intra_flag], |
2205 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
2206 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
2207 | | #else |
2208 | | intra_flag ? ai4_quant_rounding_factors[0][0] |
2209 | | : ps_ctxt->i4_quant_rnd_factor[intra_flag], |
2210 | | intra_flag ? ai4_quant_rounding_factors[1] |
2211 | | : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
2212 | | intra_flag ? ai4_quant_rounding_factors[2] |
2213 | | : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
2214 | | #endif |
2215 | 0 | trans_size, |
2216 | 0 | trans_size, |
2217 | 0 | deq_data_strd, |
2218 | 0 | pu1_csbf_buf, |
2219 | 0 | csbf_strd, |
2220 | 0 | pi4_zero_col, |
2221 | 0 | pi4_zero_row, |
2222 | 0 | ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset], |
2223 | 0 | pi8_cost); |
2224 | |
|
2225 | 0 | if(e_ssd_type != FREQUENCY_DOMAIN_SSD) |
2226 | 0 | { |
2227 | 0 | pi8_cost[0] = UINT_MAX; |
2228 | 0 | } |
2229 | 0 | } |
2230 | |
|
2231 | 0 | if(0 != cbf) |
2232 | 0 | { |
2233 | 0 | if(i4_perform_sbh || i4_perform_rdoq) |
2234 | 0 | { |
2235 | 0 | ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd; |
2236 | 0 | ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size; |
2237 | 0 | ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map; |
2238 | |
|
2239 | 0 | ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6; |
2240 | 0 | ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6; |
2241 | 0 | ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx; |
2242 | 0 | ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; |
2243 | 0 | ps_rdoq_sbh_ctxt->i4_trans_size = trans_size; |
2244 | |
|
2245 | 0 | ps_rdoq_sbh_ctxt->pi2_dequant_coeff = |
2246 | 0 | ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset]; |
2247 | 0 | ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data; |
2248 | 0 | ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs; |
2249 | 0 | ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values; |
2250 | 0 | ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf; |
2251 | | |
2252 | | /* ------- call coeffs scan function ------- */ |
2253 | 0 | if((!i4_perform_rdoq)) |
2254 | 0 | { |
2255 | 0 | ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); |
2256 | |
|
2257 | 0 | pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; |
2258 | 0 | } |
2259 | 0 | } |
2260 | |
|
2261 | 0 | *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( |
2262 | 0 | pi2_quant_coeffs, |
2263 | 0 | pi4_subBlock2csbfId_map, |
2264 | 0 | ps_ctxt->i4_scan_idx, |
2265 | 0 | trans_size, |
2266 | 0 | pu1_ecd_data, |
2267 | 0 | pu1_csbf_buf, |
2268 | 0 | csbf_strd); |
2269 | 0 | } |
2270 | 0 | *pi8_cost >>= ga_trans_shift[trans_idx]; |
2271 | |
|
2272 | 0 | #if RDOPT_ZERO_CBF_ENABLE |
2273 | | /* compare null cbf cost with encode tu rd-cost */ |
2274 | 0 | if(cbf != 0) |
2275 | 0 | { |
2276 | 0 | WORD32 tu_bits; |
2277 | 0 | LWORD64 tu_rd_cost; |
2278 | |
|
2279 | 0 | LWORD64 zero_cbf_cost = 0; |
2280 | | |
2281 | | /*Populating the feilds of rdoq_ctxt structure*/ |
2282 | 0 | if(i4_perform_rdoq) |
2283 | 0 | { |
2284 | | /* transform size to log2transform size */ |
2285 | 0 | GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size); |
2286 | 0 | ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1; |
2287 | 0 | ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf; |
2288 | 0 | ps_rdoq_sbh_ctxt->i4_is_luma = 1; |
2289 | 0 | ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx]; |
2290 | 0 | ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td = |
2291 | 0 | (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2; |
2292 | 0 | ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0; |
2293 | 0 | ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col; |
2294 | 0 | ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row; |
2295 | 0 | } |
2296 | 0 | else if(i4_perform_zcbf) |
2297 | 0 | { |
2298 | 0 | zero_cbf_cost = |
2299 | |
|
2300 | 0 | ps_ctxt->s_cmn_opt_func.pf_ssd_calculator( |
2301 | 0 | pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size, NULL_PLANE); |
2302 | 0 | } |
2303 | | |
2304 | | /************************************************************************/ |
2305 | | /* call the entropy rdo encode to get the bit estimate for current tu */ |
2306 | | /* note that tu includes only residual coding bits and does not include */ |
2307 | | /* tu split, cbf and qp delta encoding bits for a TU */ |
2308 | | /************************************************************************/ |
2309 | 0 | if(i4_perform_rdoq) |
2310 | 0 | { |
2311 | 0 | tu_bits = ihevce_entropy_rdo_encode_tu_rdoq( |
2312 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, |
2313 | 0 | (pu1_ecd_data), |
2314 | 0 | trans_size, |
2315 | 0 | 1, |
2316 | 0 | ps_rdoq_sbh_ctxt, |
2317 | 0 | pi8_cost, |
2318 | 0 | &zero_cbf_cost, |
2319 | 0 | 0); |
2320 | |
|
2321 | 0 | if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0) |
2322 | 0 | { |
2323 | 0 | cbf = 0; |
2324 | 0 | *pi4_coeff_off = 0; |
2325 | 0 | } |
2326 | |
|
2327 | 0 | if((i4_perform_sbh) && (0 != cbf)) |
2328 | 0 | { |
2329 | 0 | ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; |
2330 | 0 | ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); |
2331 | 0 | *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost; |
2332 | 0 | } |
2333 | | |
2334 | | /*Add round value before normalizing*/ |
2335 | 0 | *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td; |
2336 | 0 | *pi8_cost >>= ga_trans_shift[trans_idx]; |
2337 | |
|
2338 | 0 | if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1) |
2339 | 0 | { |
2340 | 0 | pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; |
2341 | 0 | *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( |
2342 | 0 | pi2_quant_coeffs, |
2343 | 0 | pi4_subBlock2csbfId_map, |
2344 | 0 | ps_ctxt->i4_scan_idx, |
2345 | 0 | trans_size, |
2346 | 0 | pu1_ecd_data, |
2347 | 0 | pu1_csbf_buf, |
2348 | 0 | csbf_strd); |
2349 | 0 | } |
2350 | 0 | } |
2351 | 0 | else |
2352 | 0 | { |
2353 | 0 | tu_bits = ihevce_entropy_rdo_encode_tu( |
2354 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh); |
2355 | 0 | } |
2356 | |
|
2357 | 0 | *pi4_tu_bits = tu_bits; |
2358 | |
|
2359 | 0 | if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
2360 | 0 | { |
2361 | 0 | *pi8_cost = ihevce_it_recon_ssd( |
2362 | 0 | ps_ctxt, |
2363 | 0 | pu1_src, |
2364 | 0 | src_strd, |
2365 | 0 | pu1_pred, |
2366 | 0 | pred_strd, |
2367 | 0 | pi2_deq_data, |
2368 | 0 | deq_data_strd, |
2369 | 0 | pu1_recon, |
2370 | 0 | i4_recon_stride, |
2371 | 0 | pu1_ecd_data, |
2372 | 0 | trans_size, |
2373 | 0 | packed_pred_mode, |
2374 | 0 | cbf, |
2375 | 0 | *pi4_zero_col, |
2376 | 0 | *pi4_zero_row, |
2377 | 0 | NULL_PLANE); |
2378 | |
|
2379 | 0 | pu1_is_recon_available[0] = 1; |
2380 | 0 | } |
2381 | |
|
2382 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
2383 | 0 | if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
2384 | 0 | { |
2385 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
2386 | 0 | pu1_src, |
2387 | 0 | src_strd, |
2388 | 0 | pu1_recon, |
2389 | 0 | i4_recon_stride, |
2390 | 0 | pi8_cost[0], |
2391 | 0 | i4_alpha_stim_multiplier, |
2392 | 0 | trans_size, |
2393 | 0 | 0, |
2394 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
2395 | 0 | NULL_PLANE); |
2396 | 0 | } |
2397 | 0 | else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
2398 | 0 | { |
2399 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
2400 | 0 | pu1_src, |
2401 | 0 | src_strd, |
2402 | 0 | pu1_pred, |
2403 | 0 | pred_strd, |
2404 | 0 | pi8_cost[0], |
2405 | 0 | i4_alpha_stim_multiplier, |
2406 | 0 | trans_size, |
2407 | 0 | 0, |
2408 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
2409 | 0 | NULL_PLANE); |
2410 | 0 | } |
2411 | 0 | #endif |
2412 | | |
2413 | | /* add the SSD cost to bits estimate given by ECD */ |
2414 | 0 | tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30( |
2415 | 0 | tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
2416 | |
|
2417 | 0 | if(i4_perform_zcbf) |
2418 | 0 | { |
2419 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
2420 | 0 | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
2421 | 0 | { |
2422 | 0 | zero_cbf_cost = ihevce_inject_stim_into_distortion( |
2423 | 0 | pu1_src, |
2424 | 0 | src_strd, |
2425 | 0 | pu1_pred, |
2426 | 0 | pred_strd, |
2427 | 0 | zero_cbf_cost, |
2428 | 0 | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
2429 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
2430 | 0 | (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
2431 | 0 | 100.0, |
2432 | 0 | trans_size, |
2433 | 0 | 0, |
2434 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
2435 | 0 | NULL_PLANE); |
2436 | 0 | } |
2437 | 0 | #endif |
2438 | | |
2439 | | /* force the tu as zero cbf if zero_cbf_cost is lower */ |
2440 | 0 | if(zero_cbf_cost < tu_rd_cost) |
2441 | 0 | { |
2442 | | /* num bytes is set to 0 */ |
2443 | 0 | *pi4_coeff_off = 0; |
2444 | | |
2445 | | /* cbf is returned as 0 */ |
2446 | 0 | cbf = 0; |
2447 | | |
2448 | | /* cost is returned as 0 cbf cost */ |
2449 | 0 | *pi8_cost = zero_cbf_cost; |
2450 | | |
2451 | | /* TU bits is set to 0 */ |
2452 | 0 | *pi4_tu_bits = 0; |
2453 | 0 | pu1_is_recon_available[0] = 0; |
2454 | |
|
2455 | 0 | if(SPATIAL_DOMAIN_SSD == e_ssd_type) |
2456 | 0 | { |
2457 | | /* copy pred to recon for zcbf mode */ |
2458 | |
|
2459 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
2460 | 0 | pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size); |
2461 | |
|
2462 | 0 | pu1_is_recon_available[0] = 1; |
2463 | 0 | } |
2464 | 0 | } |
2465 | | /* accumulate cu not coded cost with zcbf cost */ |
2466 | 0 | #if ENABLE_INTER_ZCU_COST |
2467 | 0 | ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost; |
2468 | 0 | #endif |
2469 | 0 | } |
2470 | 0 | } |
2471 | 0 | else |
2472 | 0 | { |
2473 | | /* cbf = 0, accumulate cu not coded cost */ |
2474 | 0 | if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
2475 | 0 | { |
2476 | 0 | *pi8_cost = ihevce_it_recon_ssd( |
2477 | 0 | ps_ctxt, |
2478 | 0 | pu1_src, |
2479 | 0 | src_strd, |
2480 | 0 | pu1_pred, |
2481 | 0 | pred_strd, |
2482 | 0 | pi2_deq_data, |
2483 | 0 | deq_data_strd, |
2484 | 0 | pu1_recon, |
2485 | 0 | i4_recon_stride, |
2486 | 0 | pu1_ecd_data, |
2487 | 0 | trans_size, |
2488 | 0 | packed_pred_mode, |
2489 | 0 | cbf, |
2490 | 0 | *pi4_zero_col, |
2491 | 0 | *pi4_zero_row, |
2492 | 0 | NULL_PLANE); |
2493 | |
|
2494 | 0 | pu1_is_recon_available[0] = 1; |
2495 | 0 | } |
2496 | |
|
2497 | 0 | #if ENABLE_INTER_ZCU_COST |
2498 | 0 | { |
2499 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
2500 | 0 | if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
2501 | 0 | { |
2502 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
2503 | 0 | pu1_src, |
2504 | 0 | src_strd, |
2505 | 0 | pu1_recon, |
2506 | 0 | i4_recon_stride, |
2507 | 0 | pi8_cost[0], |
2508 | 0 | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
2509 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
2510 | 0 | (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
2511 | 0 | 100.0, |
2512 | 0 | trans_size, |
2513 | 0 | 0, |
2514 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
2515 | 0 | NULL_PLANE); |
2516 | 0 | } |
2517 | 0 | else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
2518 | 0 | { |
2519 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
2520 | 0 | pu1_src, |
2521 | 0 | src_strd, |
2522 | 0 | pu1_pred, |
2523 | 0 | pred_strd, |
2524 | 0 | pi8_cost[0], |
2525 | 0 | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
2526 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
2527 | 0 | (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
2528 | 0 | 100.0, |
2529 | 0 | trans_size, |
2530 | 0 | 0, |
2531 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
2532 | 0 | NULL_PLANE); |
2533 | 0 | } |
2534 | 0 | #endif |
2535 | |
|
2536 | 0 | ps_ctxt->i8_cu_not_coded_cost += *pi8_cost; |
2537 | 0 | } |
2538 | 0 | #endif /* ENABLE_INTER_ZCU_COST */ |
2539 | 0 | } |
2540 | 0 | #endif |
2541 | |
|
2542 | 0 | return (cbf); |
2543 | 0 | } |
2544 | | |
2545 | | /*! |
2546 | | ****************************************************************************** |
2547 | | * \if Function name : ihevce_it_recon_fxn \endif |
2548 | | * |
2549 | | * \brief |
2550 | | * Transform unit level (Luma) IT Recon function |
2551 | | * |
2552 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
2553 | | * \param[in] pi2_deq_data pointer to iq data |
2554 | | * \param[in] deq_data_strd iq data buffer stride |
2555 | | * \param[in] pu1_pred pointer to predicted data buffer |
2556 | | * \param[in] pred_strd predicted buffer stride |
2557 | | * \param[in] pu1_recon pointer to recon buffer |
2558 | | * \param[in] recon_strd recon buffer stride |
2559 | | * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) |
2560 | | * \param[in] trans_size transform size (4, 8, 16,32) |
2561 | | * \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip |
2562 | | * \param[in] cbf CBF of the current block |
2563 | | * \param[in] zero_cols zero_cols of the current block |
2564 | | * \param[in] zero_rows zero_rows of the current block |
2565 | | * |
2566 | | * \return |
2567 | | * |
2568 | | * \author |
2569 | | * Ittiam |
2570 | | * |
2571 | | ***************************************************************************** |
2572 | | */ |
2573 | | |
2574 | | void ihevce_it_recon_fxn( |
2575 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
2576 | | WORD16 *pi2_deq_data, |
2577 | | WORD32 deq_dat_strd, |
2578 | | UWORD8 *pu1_pred, |
2579 | | WORD32 pred_strd, |
2580 | | UWORD8 *pu1_recon, |
2581 | | WORD32 recon_strd, |
2582 | | UWORD8 *pu1_ecd_data, |
2583 | | WORD32 trans_size, |
2584 | | WORD32 packed_pred_mode, |
2585 | | WORD32 cbf, |
2586 | | WORD32 zero_cols, |
2587 | | WORD32 zero_rows) |
2588 | 0 | { |
2589 | 0 | WORD32 dc_add_flag = 0; |
2590 | 0 | WORD32 trans_idx; |
2591 | | |
2592 | | /* translate the transform size to index for 4x4 and 8x8 */ |
2593 | 0 | trans_idx = trans_size >> 2; |
2594 | | |
2595 | | /* if SKIP mode needs to be evaluated the pred is copied to recon */ |
2596 | 0 | if(PRED_MODE_SKIP == packed_pred_mode) |
2597 | 0 | { |
2598 | 0 | UWORD8 *pu1_curr_recon, *pu1_curr_pred; |
2599 | |
|
2600 | 0 | pu1_curr_pred = pu1_pred; |
2601 | 0 | pu1_curr_recon = pu1_recon; |
2602 | | |
2603 | | /* 2D copy of data */ |
2604 | |
|
2605 | 0 | ps_ctxt->s_cmn_opt_func.pf_2d_square_copy( |
2606 | 0 | pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8)); |
2607 | |
|
2608 | 0 | return; |
2609 | 0 | } |
2610 | | |
2611 | | /* for intra 4x4 DST transform should be used */ |
2612 | 0 | if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode)) |
2613 | 0 | { |
2614 | 0 | trans_idx = 0; |
2615 | 0 | } |
2616 | | /* for 16x16 cases */ |
2617 | 0 | else if(16 == trans_size) |
2618 | 0 | { |
2619 | 0 | trans_idx = 3; |
2620 | 0 | } |
2621 | | /* for 32x32 cases */ |
2622 | 0 | else if(32 == trans_size) |
2623 | 0 | { |
2624 | 0 | trans_idx = 4; |
2625 | 0 | } |
2626 | | |
2627 | | /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */ |
2628 | 0 | if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1])) |
2629 | 0 | { |
2630 | 0 | dc_add_flag = 1; |
2631 | 0 | } |
2632 | |
|
2633 | 0 | if(0 == cbf) |
2634 | 0 | { |
2635 | | /* buffer copy */ |
2636 | 0 | ps_ctxt->s_cmn_opt_func.pf_2d_square_copy( |
2637 | 0 | pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1); |
2638 | 0 | } |
2639 | 0 | else if((1 == dc_add_flag) && (0 != trans_idx)) |
2640 | 0 | { |
2641 | | /* dc add */ |
2642 | 0 | ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc( |
2643 | 0 | pu1_pred, |
2644 | 0 | pred_strd, |
2645 | 0 | pu1_recon, |
2646 | 0 | recon_strd, |
2647 | 0 | trans_size, |
2648 | 0 | pi2_deq_data[0], |
2649 | 0 | NULL_PLANE /* luma */ |
2650 | 0 | ); |
2651 | 0 | } |
2652 | 0 | else |
2653 | 0 | { |
2654 | 0 | ps_ctxt->apf_it_recon[trans_idx]( |
2655 | 0 | pi2_deq_data, |
2656 | 0 | &ps_ctxt->ai2_scratch[0], |
2657 | 0 | pu1_pred, |
2658 | 0 | pu1_recon, |
2659 | 0 | deq_dat_strd, |
2660 | 0 | pred_strd, |
2661 | 0 | recon_strd, |
2662 | 0 | zero_cols, |
2663 | 0 | zero_rows); |
2664 | 0 | } |
2665 | 0 | } |
2666 | | |
2667 | | /*! |
2668 | | ****************************************************************************** |
2669 | | * \if Function name : ihevce_chroma_it_recon_fxn \endif |
2670 | | * |
2671 | | * \brief |
2672 | | * Transform unit level (Chroma) IT Recon function |
2673 | | * |
2674 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
2675 | | * \param[in] pi2_deq_data pointer to iq data |
2676 | | * \param[in] deq_data_strd iq data buffer stride |
2677 | | * \param[in] pu1_pred pointer to predicted data buffer |
2678 | | * \param[in] pred_strd predicted buffer stride |
2679 | | * \param[in] pu1_recon pointer to recon buffer |
2680 | | * \param[in] recon_strd recon buffer stride |
2681 | | * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) |
2682 | | * \param[in] trans_size transform size (4, 8, 16) |
2683 | | * \param[in] cbf CBF of the current block |
2684 | | * \param[in] zero_cols zero_cols of the current block |
2685 | | * \param[in] zero_rows zero_rows of the current block |
2686 | | * |
2687 | | * \return |
2688 | | * |
2689 | | * \author |
2690 | | * Ittiam |
2691 | | * |
2692 | | ***************************************************************************** |
2693 | | */ |
2694 | | |
2695 | | void ihevce_chroma_it_recon_fxn( |
2696 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
2697 | | WORD16 *pi2_deq_data, |
2698 | | WORD32 deq_dat_strd, |
2699 | | UWORD8 *pu1_pred, |
2700 | | WORD32 pred_strd, |
2701 | | UWORD8 *pu1_recon, |
2702 | | WORD32 recon_strd, |
2703 | | UWORD8 *pu1_ecd_data, |
2704 | | WORD32 trans_size, |
2705 | | WORD32 cbf, |
2706 | | WORD32 zero_cols, |
2707 | | WORD32 zero_rows, |
2708 | | CHROMA_PLANE_ID_T e_chroma_plane) |
2709 | 0 | { |
2710 | 0 | WORD32 trans_idx; |
2711 | |
|
2712 | 0 | ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE)); |
2713 | | |
2714 | | /* since 2x2 transform is not allowed for chroma*/ |
2715 | 0 | if(2 == trans_size) |
2716 | 0 | { |
2717 | 0 | trans_size = 4; |
2718 | 0 | } |
2719 | | |
2720 | | /* translate the transform size to index */ |
2721 | 0 | trans_idx = trans_size >> 2; |
2722 | | |
2723 | | /* for 16x16 cases */ |
2724 | 0 | if(16 == trans_size) |
2725 | 0 | { |
2726 | 0 | trans_idx = 3; |
2727 | 0 | } |
2728 | |
|
2729 | 0 | if(0 == cbf) |
2730 | 0 | { |
2731 | | /* buffer copy */ |
2732 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
2733 | 0 | pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane); |
2734 | 0 | } |
2735 | 0 | else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1])) |
2736 | 0 | { |
2737 | | /* dc add */ |
2738 | 0 | ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc( |
2739 | 0 | pu1_pred, |
2740 | 0 | pred_strd, |
2741 | 0 | pu1_recon, |
2742 | 0 | recon_strd, |
2743 | 0 | trans_size, |
2744 | 0 | pi2_deq_data[0], |
2745 | 0 | e_chroma_plane /* chroma plane */ |
2746 | 0 | ); |
2747 | 0 | } |
2748 | 0 | else |
2749 | 0 | { |
2750 | 0 | ps_ctxt->apf_chrm_it_recon[trans_idx - 1]( |
2751 | 0 | pi2_deq_data, |
2752 | 0 | &ps_ctxt->ai2_scratch[0], |
2753 | 0 | pu1_pred + (WORD32)e_chroma_plane, |
2754 | 0 | pu1_recon + (WORD32)e_chroma_plane, |
2755 | 0 | deq_dat_strd, |
2756 | 0 | pred_strd, |
2757 | 0 | recon_strd, |
2758 | 0 | zero_cols, |
2759 | 0 | zero_rows); |
2760 | 0 | } |
2761 | 0 | } |
2762 | | |
2763 | | /** |
2764 | | ******************************************************************************* |
2765 | | * \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif |
2766 | | * |
2767 | | * \brief * Filters the RDOPT candidates based on mpm_idx |
2768 | | * |
2769 | | * \par Description |
2770 | | * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode |
2771 | | * for a CU |
2772 | | * |
2773 | | * \param[in] ps_ctxt : ptr to enc loop context |
2774 | | * \param[in] ps_cu_analyse : ptr to CU analyse structure |
2775 | | * \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer |
2776 | | * \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer |
2777 | | * \param[in] pu1_luma_mode luma mode |
2778 | | * |
2779 | | * \returns none |
2780 | | * |
2781 | | * \author |
2782 | | * Ittiam |
2783 | | * |
2784 | | ******************************************************************************* |
2785 | | */ |
2786 | | |
2787 | | void ihevce_mpm_idx_based_filter_RDOPT_cand( |
2788 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
2789 | | cu_analyse_t *ps_cu_analyse, |
2790 | | nbr_4x4_t *ps_left_nbr_4x4, |
2791 | | nbr_4x4_t *ps_top_nbr_4x4, |
2792 | | UWORD8 *pu1_luma_mode, |
2793 | | UWORD8 *pu1_eval_mark) |
2794 | 0 | { |
2795 | 0 | WORD32 cu_pos_x; |
2796 | 0 | WORD32 cu_pos_y; |
2797 | 0 | nbr_avail_flags_t s_nbr; |
2798 | 0 | WORD32 trans_size; |
2799 | 0 | WORD32 au4_cand_mode_list[3]; |
2800 | 0 | WORD32 nbr_flags; |
2801 | 0 | UWORD8 *pu1_intra_luma_modes; |
2802 | 0 | WORD32 rdopt_cand_ctr = 0; |
2803 | 0 | UWORD8 *pu1_luma_eval_mark; |
2804 | |
|
2805 | 0 | cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1; |
2806 | 0 | cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1; |
2807 | 0 | trans_size = ps_cu_analyse->u1_cu_size; |
2808 | | |
2809 | | /* get the neighbour availability flags */ |
2810 | 0 | nbr_flags = ihevce_get_nbr_intra( |
2811 | 0 | &s_nbr, |
2812 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
2813 | 0 | ps_ctxt->i4_nbr_map_strd, |
2814 | 0 | cu_pos_x, |
2815 | 0 | cu_pos_y, |
2816 | 0 | trans_size >> 2); |
2817 | 0 | (void)nbr_flags; |
2818 | | /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro |
2819 | | *TU=CU/2 also since the modes are same in both the cases. |
2820 | | */ |
2821 | 0 | ihevce_populate_intra_pred_mode( |
2822 | 0 | ps_top_nbr_4x4->b6_luma_intra_mode, |
2823 | 0 | ps_left_nbr_4x4->b6_luma_intra_mode, |
2824 | 0 | s_nbr.u1_top_avail, |
2825 | 0 | s_nbr.u1_left_avail, |
2826 | 0 | cu_pos_y, |
2827 | 0 | &au4_cand_mode_list[0]); |
2828 | | |
2829 | | /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT |
2830 | | *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero |
2831 | | */ |
2832 | |
|
2833 | 0 | pu1_intra_luma_modes = pu1_luma_mode; |
2834 | 0 | pu1_luma_eval_mark = pu1_eval_mark; |
2835 | |
|
2836 | 0 | while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255) |
2837 | 0 | { |
2838 | 0 | WORD32 i; |
2839 | 0 | WORD32 found_flag = 0; |
2840 | | |
2841 | | /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage |
2842 | | *irrespective of whether the cand is present in the mpm idx list or not |
2843 | | */ |
2844 | 0 | if(rdopt_cand_ctr == 0) |
2845 | 0 | { |
2846 | 0 | rdopt_cand_ctr++; |
2847 | 0 | continue; |
2848 | 0 | } |
2849 | | |
2850 | 0 | for(i = 0; i < 3; i++) |
2851 | 0 | { |
2852 | 0 | if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i]) |
2853 | 0 | { |
2854 | 0 | found_flag = 1; |
2855 | 0 | break; |
2856 | 0 | } |
2857 | 0 | } |
2858 | |
|
2859 | 0 | if(found_flag == 0) |
2860 | 0 | { |
2861 | 0 | pu1_luma_eval_mark[rdopt_cand_ctr] = 0; |
2862 | 0 | } |
2863 | |
|
2864 | 0 | rdopt_cand_ctr++; |
2865 | 0 | } |
2866 | 0 | } |
2867 | | |
2868 | | /*! |
2869 | | ****************************************************************************** |
2870 | | * \if Function name : ihevce_intra_rdopt_cu_ntu \endif |
2871 | | * |
2872 | | * \brief |
2873 | | * Intra Coding unit funtion for RD opt mode |
2874 | | * |
2875 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
2876 | | * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure |
2877 | | * \param[in] pu1_luma_mode : pointer to luma mode |
2878 | | * \param[in] ps_cu_analyse pointer to cu analyse pointer |
2879 | | * \param[in] pu1_src pointer to source data buffer |
2880 | | * \param[in] src_strd source buffer stride |
2881 | | * \param[in] pu1_cu_left pointer to left recon data buffer |
2882 | | * \param[in] pu1_cu_top pointer to top recon data buffer |
2883 | | * \param[in] pu1_cu_top_left pointer to top left recon data buffer |
2884 | | * \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer |
2885 | | * \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer |
2886 | | * \param[in] nbr_4x4_left_strd left nbr4x4 stride |
2887 | | * \param[in] cu_left_stride left recon buffer stride |
2888 | | * \param[in] curr_buf_idx RD opt buffer index for current usage |
2889 | | * \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T |
2890 | | * |
2891 | | * \return |
2892 | | * RDopt cost |
2893 | | * |
2894 | | * \author |
2895 | | * Ittiam |
2896 | | * |
2897 | | ***************************************************************************** |
2898 | | */ |
2899 | | LWORD64 ihevce_intra_rdopt_cu_ntu( |
2900 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
2901 | | enc_loop_cu_prms_t *ps_cu_prms, |
2902 | | void *pv_pred_org, |
2903 | | WORD32 pred_strd_org, |
2904 | | enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, |
2905 | | UWORD8 *pu1_luma_mode, |
2906 | | cu_analyse_t *ps_cu_analyse, |
2907 | | void *pv_curr_src, |
2908 | | void *pv_cu_left, |
2909 | | void *pv_cu_top, |
2910 | | void *pv_cu_top_left, |
2911 | | nbr_4x4_t *ps_left_nbr_4x4, |
2912 | | nbr_4x4_t *ps_top_nbr_4x4, |
2913 | | WORD32 nbr_4x4_left_strd, |
2914 | | WORD32 cu_left_stride, |
2915 | | WORD32 curr_buf_idx, |
2916 | | WORD32 func_proc_mode, |
2917 | | WORD32 i4_alpha_stim_multiplier) |
2918 | 0 | { |
2919 | 0 | enc_loop_cu_final_prms_t *ps_final_prms; |
2920 | 0 | nbr_avail_flags_t s_nbr; |
2921 | 0 | nbr_4x4_t *ps_nbr_4x4; |
2922 | 0 | nbr_4x4_t *ps_tmp_lt_4x4; |
2923 | 0 | recon_datastore_t *ps_recon_datastore; |
2924 | |
|
2925 | 0 | ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; |
2926 | |
|
2927 | 0 | UWORD32 *pu4_nbr_flags; |
2928 | 0 | UWORD8 *pu1_intra_pred_mode; |
2929 | 0 | WORD32 cu_pos_x; |
2930 | 0 | WORD32 cu_pos_y; |
2931 | 0 | WORD32 trans_size = 0; |
2932 | 0 | UWORD8 *pu1_left; |
2933 | 0 | UWORD8 *pu1_top; |
2934 | 0 | UWORD8 *pu1_top_left; |
2935 | 0 | UWORD8 *pu1_recon; |
2936 | 0 | UWORD8 *pu1_csbf_buf; |
2937 | 0 | UWORD8 *pu1_ecd_data; |
2938 | 0 | WORD16 *pi2_deq_data; |
2939 | 0 | WORD32 deq_data_strd; |
2940 | 0 | LWORD64 total_rdopt_cost; |
2941 | 0 | WORD32 ctr; |
2942 | 0 | WORD32 left_strd; |
2943 | 0 | WORD32 i4_recon_stride; |
2944 | 0 | WORD32 csbf_strd; |
2945 | 0 | WORD32 ecd_data_bytes_cons; |
2946 | 0 | WORD32 num_4x4_in_tu; |
2947 | 0 | WORD32 num_4x4_in_cu; |
2948 | 0 | WORD32 chrm_present_flag; |
2949 | 0 | WORD32 tx_size; |
2950 | 0 | WORD32 cu_bits; |
2951 | 0 | WORD32 num_cu_parts = 0; |
2952 | 0 | WORD32 num_cands = 0; |
2953 | 0 | WORD32 cu_pos_x_8pelunits; |
2954 | 0 | WORD32 cu_pos_y_8pelunits; |
2955 | 0 | WORD32 i4_perform_rdoq; |
2956 | 0 | WORD32 i4_perform_sbh; |
2957 | 0 | UWORD8 u1_compute_spatial_ssd; |
2958 | 0 | UWORD8 u1_compute_recon; |
2959 | 0 | UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END]; |
2960 | |
|
2961 | 0 | UWORD16 u2_num_tus_in_cu = 0; |
2962 | 0 | WORD32 is_sub_pu_in_hq = 0; |
2963 | | /* Get the RDOPT cost of the best CU mode for early_exit */ |
2964 | 0 | LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; |
2965 | | /* cabac context of prev intra luma pred flag */ |
2966 | 0 | UWORD8 u1_prev_flag_cabac_ctxt = |
2967 | 0 | ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG]; |
2968 | 0 | WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; |
2969 | |
|
2970 | 0 | UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY; |
2971 | |
|
2972 | 0 | total_rdopt_cost = 0; |
2973 | 0 | ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; |
2974 | 0 | ps_recon_datastore = &ps_final_prms->s_recon_datastore; |
2975 | 0 | i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
2976 | 0 | csbf_strd = ps_ctxt->i4_cu_csbf_strd; |
2977 | 0 | pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; |
2978 | 0 | pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; |
2979 | 0 | pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; |
2980 | 0 | deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */ |
2981 | 0 | ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; |
2982 | 0 | ps_tmp_lt_4x4 = ps_left_nbr_4x4; |
2983 | 0 | pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0]; |
2984 | 0 | pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0]; |
2985 | 0 | cu_pos_x = ps_cu_analyse->b3_cu_pos_x; |
2986 | 0 | cu_pos_y = ps_cu_analyse->b3_cu_pos_y; |
2987 | 0 | cu_pos_x_8pelunits = cu_pos_x; |
2988 | 0 | cu_pos_y_8pelunits = cu_pos_y; |
2989 | | |
2990 | | /* reset cu not coded cost */ |
2991 | 0 | ps_ctxt->i8_cu_not_coded_cost = 0; |
2992 | | |
2993 | | /* based on the Processng mode */ |
2994 | 0 | if(TU_EQ_CU == func_proc_mode) |
2995 | 0 | { |
2996 | 0 | ps_final_prms->u1_part_mode = SIZE_2Nx2N; |
2997 | 0 | trans_size = ps_cu_analyse->u1_cu_size; |
2998 | 0 | num_cu_parts = 1; |
2999 | 0 | num_cands = 1; |
3000 | 0 | u2_num_tus_in_cu = 1; |
3001 | 0 | } |
3002 | 0 | else if(TU_EQ_CU_DIV2 == func_proc_mode) |
3003 | 0 | { |
3004 | 0 | ps_final_prms->u1_part_mode = SIZE_2Nx2N; |
3005 | 0 | trans_size = ps_cu_analyse->u1_cu_size >> 1; |
3006 | 0 | num_cu_parts = 4; |
3007 | 0 | num_cands = 1; |
3008 | 0 | u2_num_tus_in_cu = 4; |
3009 | 0 | } |
3010 | 0 | else if(TU_EQ_SUBCU == func_proc_mode) |
3011 | 0 | { |
3012 | 0 | ps_final_prms->u1_part_mode = SIZE_NxN; |
3013 | 0 | trans_size = ps_cu_analyse->u1_cu_size >> 1; |
3014 | 0 | num_cu_parts = 4; |
3015 | | /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */ |
3016 | 0 | if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset) |
3017 | 0 | { |
3018 | 0 | if(ps_ctxt->i1_slice_type != BSLICE) |
3019 | 0 | { |
3020 | 0 | num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2; |
3021 | 0 | } |
3022 | 0 | else |
3023 | 0 | { |
3024 | 0 | num_cands = (2 * MAX_INTRA_CU_CANDIDATES); |
3025 | 0 | } |
3026 | 0 | } |
3027 | 0 | else |
3028 | 0 | { |
3029 | 0 | num_cands = MAX_INTRA_CU_CANDIDATES; |
3030 | 0 | } |
3031 | 0 | u2_num_tus_in_cu = 4; |
3032 | 0 | } |
3033 | 0 | else |
3034 | 0 | { |
3035 | | /* should not enter here */ |
3036 | 0 | ASSERT(0); |
3037 | 0 | } |
3038 | | |
3039 | 0 | if(ps_ctxt->i1_cu_qp_delta_enable) |
3040 | 0 | { |
3041 | 0 | ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, trans_size, 1); |
3042 | 0 | } |
3043 | |
|
3044 | 0 | if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) |
3045 | 0 | { |
3046 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = |
3047 | 0 | ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / |
3048 | 0 | 100.0f); |
3049 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
3050 | 0 | ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * |
3051 | 0 | (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
3052 | 0 | } |
3053 | |
|
3054 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
3055 | 0 | (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
3056 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
3057 | |
|
3058 | 0 | if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
3059 | 0 | { |
3060 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
3061 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
3062 | 0 | } |
3063 | | |
3064 | | /* populate the neigbours */ |
3065 | 0 | pu1_left = (UWORD8 *)pv_cu_left; |
3066 | 0 | pu1_top = (UWORD8 *)pv_cu_top; |
3067 | 0 | pu1_top_left = (UWORD8 *)pv_cu_top_left; |
3068 | 0 | left_strd = cu_left_stride; |
3069 | 0 | num_4x4_in_tu = (trans_size >> 2); |
3070 | 0 | num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2); |
3071 | 0 | chrm_present_flag = 1; |
3072 | 0 | ecd_data_bytes_cons = 0; |
3073 | 0 | cu_bits = 0; |
3074 | | |
3075 | | /* get the 4x4 level postion of current cu */ |
3076 | 0 | cu_pos_x = cu_pos_x << 1; |
3077 | 0 | cu_pos_y = cu_pos_y << 1; |
3078 | | |
3079 | | /* pouplate cu level params knowing that current is intra */ |
3080 | 0 | ps_final_prms->u1_skip_flag = 0; |
3081 | 0 | ps_final_prms->u1_intra_flag = PRED_MODE_INTRA; |
3082 | 0 | ps_final_prms->u2_num_pus_in_cu = 1; |
3083 | | /*init the is_cu_coded flag*/ |
3084 | 0 | ps_final_prms->u1_is_cu_coded = 0; |
3085 | 0 | ps_final_prms->u4_cu_sad = 0; |
3086 | |
|
3087 | 0 | ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA; |
3088 | 0 | ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1; |
3089 | 0 | ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1; |
3090 | 0 | ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x; |
3091 | 0 | ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y; |
3092 | 0 | ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0; |
3093 | |
|
3094 | 0 | ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1; |
3095 | | |
3096 | | /*copy qp directly as intra cant be skip*/ |
3097 | 0 | ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; |
3098 | 0 | ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0; |
3099 | 0 | ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0; |
3100 | 0 | ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0; |
3101 | 0 | ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0; |
3102 | 0 | ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1; |
3103 | 0 | ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1; |
3104 | 0 | ps_nbr_4x4->mv.i1_l0_ref_idx = -1; |
3105 | 0 | ps_nbr_4x4->mv.i1_l1_ref_idx = -1; |
3106 | | |
3107 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
3108 | 0 | memcpy( |
3109 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
3110 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
3111 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
3112 | 0 | IHEVC_CAB_COEFFX_PREFIX); |
3113 | | |
3114 | | /* RDOPT copy States :update to init state if 0 cbf */ |
3115 | 0 | memcpy( |
3116 | 0 | &au1_intra_nxn_rdopt_ctxt_models[0][0], |
3117 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
3118 | 0 | IHEVC_CAB_COEFFX_PREFIX); |
3119 | 0 | memcpy( |
3120 | 0 | &au1_intra_nxn_rdopt_ctxt_models[1][0], |
3121 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
3122 | 0 | IHEVC_CAB_COEFFX_PREFIX); |
3123 | | |
3124 | | /* loop for all partitions in CU blocks */ |
3125 | 0 | for(ctr = 0; ctr < num_cu_parts; ctr++) |
3126 | 0 | { |
3127 | 0 | UWORD8 *pu1_curr_mode; |
3128 | 0 | WORD32 cand_ctr; |
3129 | 0 | WORD32 nbr_flags; |
3130 | | |
3131 | | /* for NxN case to track the best mode */ |
3132 | | /* for other cases zeroth index will be used */ |
3133 | 0 | intra_prev_rem_flags_t as_intra_prev_rem[2]; |
3134 | 0 | LWORD64 ai8_cand_rdopt_cost[2]; |
3135 | 0 | UWORD32 au4_tu_sad[2]; |
3136 | 0 | WORD32 ai4_tu_bits[2]; |
3137 | 0 | WORD32 ai4_cbf[2]; |
3138 | 0 | WORD32 ai4_curr_bytes[2]; |
3139 | 0 | WORD32 ai4_zero_col[2]; |
3140 | 0 | WORD32 ai4_zero_row[2]; |
3141 | | /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul. |
3142 | | cand. are there) ping-pong buffer to store the best and current */ |
3143 | 0 | UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE]; |
3144 | 0 | UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4]; |
3145 | 0 | WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE]; |
3146 | | /* Context models stored for RDopt store and restore purpose */ |
3147 | |
|
3148 | 0 | UWORD8 au1_recon_availability[2]; |
3149 | |
|
3150 | 0 | WORD32 best_cand_idx = 0; |
3151 | 0 | LWORD64 best_cand_cost = MAX_COST_64; |
3152 | | /* counters to toggle b/w best and current */ |
3153 | 0 | WORD32 best_intra_buf_idx = 1; |
3154 | 0 | WORD32 curr_intra_buf_idx = 0; |
3155 | | |
3156 | | /* copy the mode pointer to be used in inner loop */ |
3157 | 0 | pu1_curr_mode = pu1_luma_mode; |
3158 | | |
3159 | | /* get the neighbour availability flags */ |
3160 | 0 | nbr_flags = ihevce_get_nbr_intra( |
3161 | 0 | &s_nbr, |
3162 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
3163 | 0 | ps_ctxt->i4_nbr_map_strd, |
3164 | 0 | cu_pos_x, |
3165 | 0 | cu_pos_y, |
3166 | 0 | num_4x4_in_tu); |
3167 | | |
3168 | | /* copy the nbr flags for chroma reuse */ |
3169 | 0 | if(4 != trans_size) |
3170 | 0 | { |
3171 | 0 | *pu4_nbr_flags = nbr_flags; |
3172 | 0 | } |
3173 | 0 | else if(1 == chrm_present_flag) |
3174 | 0 | { |
3175 | | /* compute the avail flags assuming luma trans is 8x8 */ |
3176 | | /* get the neighbour availability flags */ |
3177 | 0 | *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
3178 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
3179 | 0 | ps_ctxt->i4_nbr_map_strd, |
3180 | 0 | cu_pos_x, |
3181 | 0 | cu_pos_y, |
3182 | 0 | (num_4x4_in_tu << 1), |
3183 | 0 | (num_4x4_in_tu << 1)); |
3184 | 0 | } |
3185 | |
|
3186 | 0 | u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3)); |
3187 | |
|
3188 | 0 | if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon)) |
3189 | 0 | { |
3190 | 0 | ps_recon_datastore->u1_is_lumaRecon_available = 1; |
3191 | 0 | } |
3192 | 0 | else if(!ctr) |
3193 | 0 | { |
3194 | 0 | ps_recon_datastore->u1_is_lumaRecon_available = 0; |
3195 | 0 | } |
3196 | |
|
3197 | 0 | ihevc_intra_pred_luma_ref_substitution_fptr = |
3198 | 0 | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
3199 | | |
3200 | | /* call reference array substitution */ |
3201 | 0 | ihevc_intra_pred_luma_ref_substitution_fptr( |
3202 | 0 | pu1_top_left, |
3203 | 0 | pu1_top, |
3204 | 0 | pu1_left, |
3205 | 0 | left_strd, |
3206 | 0 | trans_size, |
3207 | 0 | nbr_flags, |
3208 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
3209 | 0 | 1); |
3210 | | |
3211 | | /* Intra Mode gating based on MPM cand list and encoder quality preset */ |
3212 | 0 | if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) && |
3213 | 0 | (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)) |
3214 | 0 | { |
3215 | 0 | ihevce_mpm_idx_based_filter_RDOPT_cand( |
3216 | 0 | ps_ctxt, |
3217 | 0 | ps_cu_analyse, |
3218 | 0 | ps_left_nbr_4x4, |
3219 | 0 | ps_top_nbr_4x4, |
3220 | 0 | pu1_luma_mode, |
3221 | 0 | &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]); |
3222 | 0 | } |
3223 | |
|
3224 | 0 | if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
3225 | 0 | (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES)) |
3226 | 0 | { |
3227 | 0 | WORD32 ai4_mpm_mode_list[3]; |
3228 | 0 | WORD32 i; |
3229 | |
|
3230 | 0 | WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]; |
3231 | |
|
3232 | 0 | ihevce_populate_intra_pred_mode( |
3233 | 0 | ps_top_nbr_4x4->b6_luma_intra_mode, |
3234 | 0 | ps_tmp_lt_4x4->b6_luma_intra_mode, |
3235 | 0 | s_nbr.u1_top_avail, |
3236 | 0 | s_nbr.u1_left_avail, |
3237 | 0 | cu_pos_y, |
3238 | 0 | &ai4_mpm_mode_list[0]); |
3239 | |
|
3240 | 0 | for(i = 0; i < 3; i++) |
3241 | 0 | { |
3242 | 0 | if(ps_cu_analyse->s_cu_intra_cand |
3243 | 0 | .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0) |
3244 | 0 | { |
3245 | 0 | ASSERT(ai4_mpm_mode_list[i] < 35); |
3246 | | |
3247 | 0 | ps_cu_analyse->s_cu_intra_cand |
3248 | 0 | .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1; |
3249 | 0 | pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i]; |
3250 | 0 | ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++; |
3251 | 0 | i4_curr_index++; |
3252 | 0 | } |
3253 | 0 | } |
3254 | | |
3255 | 0 | pu1_luma_mode[i4_curr_index] = 255; |
3256 | 0 | } |
3257 | | |
3258 | | /* loop over candidates for each partition */ |
3259 | 0 | for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++) |
3260 | 0 | { |
3261 | 0 | WORD32 curr_pred_mode; |
3262 | 0 | WORD32 bits = 0; |
3263 | 0 | LWORD64 curr_cost; |
3264 | 0 | WORD32 luma_pred_func_idx; |
3265 | 0 | UWORD8 *pu1_curr_ecd_data; |
3266 | 0 | WORD16 *pi2_curr_deq_data; |
3267 | 0 | WORD32 curr_deq_data_strd; |
3268 | 0 | WORD32 pred_strd; |
3269 | 0 | UWORD8 *pu1_pred; |
3270 | | |
3271 | | /* if NXN case the recon and ecd data is stored in temp buffers */ |
3272 | 0 | if(TU_EQ_SUBCU == func_proc_mode) |
3273 | 0 | { |
3274 | 0 | pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0]; |
3275 | 0 | pred_strd = trans_size; |
3276 | 0 | pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0]; |
3277 | 0 | pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0]; |
3278 | 0 | curr_deq_data_strd = trans_size; |
3279 | |
|
3280 | 0 | ASSERT(trans_size == MIN_TU_SIZE); |
3281 | 0 | } |
3282 | 0 | else |
3283 | 0 | { |
3284 | 0 | pu1_pred = (UWORD8 *)pv_pred_org; |
3285 | 0 | pred_strd = pred_strd_org; |
3286 | 0 | pu1_curr_ecd_data = pu1_ecd_data; |
3287 | 0 | pi2_curr_deq_data = pi2_deq_data; |
3288 | 0 | curr_deq_data_strd = deq_data_strd; |
3289 | 0 | } |
3290 | | |
3291 | 0 | pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) + |
3292 | 0 | (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride; |
3293 | |
|
3294 | 0 | if(is_sub_pu_in_hq == 1) |
3295 | 0 | { |
3296 | 0 | curr_pred_mode = cand_ctr; |
3297 | 0 | } |
3298 | 0 | else |
3299 | 0 | { |
3300 | 0 | curr_pred_mode = pu1_curr_mode[cand_ctr]; |
3301 | 0 | } |
3302 | | |
3303 | | /* If the candidate mode is 255, then break */ |
3304 | 0 | if(255 == curr_pred_mode) |
3305 | 0 | { |
3306 | 0 | break; |
3307 | 0 | } |
3308 | 0 | else if(250 == curr_pred_mode) |
3309 | 0 | { |
3310 | 0 | continue; |
3311 | 0 | } |
3312 | | |
3313 | | /* check if this mode needs to be evaluated or not. For 2nx2n cases, this */ |
3314 | | /* function will be called once per candidate, so this check has been done */ |
3315 | | /* outside this function call. For NxN case, this function will be called */ |
3316 | | /* only once, and all the candidates will be evaluated here. */ |
3317 | 0 | if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3) |
3318 | 0 | { |
3319 | 0 | if((TU_EQ_SUBCU == func_proc_mode) && |
3320 | 0 | (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr])) |
3321 | 0 | { |
3322 | 0 | continue; |
3323 | 0 | } |
3324 | 0 | } |
3325 | | |
3326 | | /* call reference filtering */ |
3327 | 0 | ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr( |
3328 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
3329 | 0 | trans_size, |
3330 | 0 | (UWORD8 *)ps_ctxt->pv_ref_filt_out, |
3331 | 0 | curr_pred_mode, |
3332 | 0 | ps_ctxt->i1_strong_intra_smoothing_enable_flag); |
3333 | | |
3334 | | /* use the look up to get the function idx */ |
3335 | 0 | luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode]; |
3336 | | |
3337 | | /* call the intra prediction function */ |
3338 | 0 | ps_ctxt->apf_lum_ip[luma_pred_func_idx]( |
3339 | 0 | (UWORD8 *)ps_ctxt->pv_ref_filt_out, |
3340 | 0 | 1, |
3341 | 0 | pu1_pred, |
3342 | 0 | pred_strd, |
3343 | 0 | trans_size, |
3344 | 0 | curr_pred_mode); |
3345 | | |
3346 | | /* populate the coeffs scan idx */ |
3347 | 0 | ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
3348 | | |
3349 | | /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/ |
3350 | 0 | if(trans_size < 16) |
3351 | 0 | { |
3352 | | /* for modes from 22 upto 30 horizontal scan is used */ |
3353 | 0 | if((curr_pred_mode > 21) && (curr_pred_mode < 31)) |
3354 | 0 | { |
3355 | 0 | ps_ctxt->i4_scan_idx = SCAN_HORZ; |
3356 | 0 | } |
3357 | | /* for modes from 6 upto 14 horizontal scan is used */ |
3358 | 0 | else if((curr_pred_mode > 5) && (curr_pred_mode < 15)) |
3359 | 0 | { |
3360 | 0 | ps_ctxt->i4_scan_idx = SCAN_VERT; |
3361 | 0 | } |
3362 | 0 | } |
3363 | | |
3364 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
3365 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
3366 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
3367 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
3368 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
3369 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
3370 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
3371 | |
|
3372 | 0 | i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; |
3373 | 0 | i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; |
3374 | |
|
3375 | | #if DISABLE_RDOQ_INTRA |
3376 | | i4_perform_rdoq = 0; |
3377 | | #endif |
3378 | | |
3379 | | /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */ |
3380 | | /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */ |
3381 | | /* Currently the complete array will contain only single value*/ |
3382 | | /*The rounding factor is calculated with the formula |
3383 | | Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2 |
3384 | | rounding factor = (1 - DeadZone Val) |
3385 | | |
3386 | | Assumption: Cabac states of All the sub-blocks in the TU are considered independent |
3387 | | */ |
3388 | 0 | if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)) |
3389 | 0 | { |
3390 | 0 | if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0)) |
3391 | 0 | { |
3392 | 0 | double i4_lamda_modifier; |
3393 | |
|
3394 | 0 | if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id)) |
3395 | 0 | { |
3396 | 0 | i4_lamda_modifier = |
3397 | 0 | ps_ctxt->i4_lamda_modifier * |
3398 | 0 | CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00); |
3399 | 0 | } |
3400 | 0 | else |
3401 | 0 | { |
3402 | 0 | i4_lamda_modifier = ps_ctxt->i4_lamda_modifier; |
3403 | 0 | } |
3404 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
3405 | 0 | { |
3406 | 0 | if(ISLICE == ps_ctxt->i1_slice_type) |
3407 | 0 | { |
3408 | 0 | i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
3409 | 0 | } |
3410 | 0 | else |
3411 | 0 | { |
3412 | 0 | i4_lamda_modifier = CONST_LAMDA_MOD_VAL; |
3413 | 0 | } |
3414 | 0 | } |
3415 | |
|
3416 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = |
3417 | 0 | &ps_ctxt->i4_quant_round_tu[0][0]; |
3418 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = |
3419 | 0 | &ps_ctxt->i4_quant_round_tu[1][0]; |
3420 | |
|
3421 | 0 | memset( |
3422 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
3423 | 0 | 0, |
3424 | 0 | trans_size * trans_size * sizeof(WORD32)); |
3425 | 0 | memset( |
3426 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
3427 | 0 | 0, |
3428 | 0 | trans_size * trans_size * sizeof(WORD32)); |
3429 | |
|
3430 | 0 | ihevce_quant_rounding_factor_gen( |
3431 | 0 | trans_size, |
3432 | 0 | 1, |
3433 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, |
3434 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
3435 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
3436 | 0 | i4_lamda_modifier, |
3437 | 0 | 1); |
3438 | 0 | } |
3439 | 0 | else |
3440 | 0 | { |
3441 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = |
3442 | 0 | ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3]; |
3443 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = |
3444 | 0 | ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3]; |
3445 | 0 | } |
3446 | 0 | } |
3447 | | |
3448 | | /* call T Q IT IQ and recon function */ |
3449 | 0 | ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn( |
3450 | 0 | ps_ctxt, |
3451 | 0 | pu1_pred, |
3452 | 0 | pred_strd, |
3453 | 0 | (UWORD8 *)pv_curr_src, |
3454 | 0 | src_strd, |
3455 | 0 | pi2_curr_deq_data, |
3456 | 0 | curr_deq_data_strd, |
3457 | 0 | pu1_recon, |
3458 | 0 | i4_recon_stride, |
3459 | 0 | pu1_curr_ecd_data, |
3460 | 0 | pu1_csbf_buf, |
3461 | 0 | csbf_strd, |
3462 | 0 | trans_size, |
3463 | 0 | PRED_MODE_INTRA, |
3464 | 0 | &ai8_cand_rdopt_cost[curr_intra_buf_idx], |
3465 | 0 | &ai4_curr_bytes[curr_intra_buf_idx], |
3466 | 0 | &ai4_tu_bits[curr_intra_buf_idx], |
3467 | 0 | &au4_tu_sad[curr_intra_buf_idx], |
3468 | 0 | &ai4_zero_col[curr_intra_buf_idx], |
3469 | 0 | &ai4_zero_row[curr_intra_buf_idx], |
3470 | 0 | &au1_recon_availability[curr_intra_buf_idx], |
3471 | 0 | i4_perform_rdoq, |
3472 | 0 | i4_perform_sbh, |
3473 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
3474 | 0 | i4_alpha_stim_multiplier, |
3475 | 0 | u1_is_cu_noisy, |
3476 | 0 | #endif |
3477 | 0 | u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
3478 | 0 | 1 /*early_cbf */ |
3479 | 0 | ); |
3480 | |
|
3481 | | #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
3482 | | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
3483 | | { |
3484 | | #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
3485 | | ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( |
3486 | | pv_curr_src, |
3487 | | src_strd, |
3488 | | pu1_pred, |
3489 | | pred_strd, |
3490 | | ai8_cand_rdopt_cost[curr_intra_buf_idx], |
3491 | | i4_alpha_stim_multiplier, |
3492 | | trans_size, |
3493 | | 0, |
3494 | | ps_ctxt->u1_enable_psyRDOPT, |
3495 | | NULL_PLANE); |
3496 | | #else |
3497 | | if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx]) |
3498 | | { |
3499 | | ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( |
3500 | | pv_curr_src, |
3501 | | src_strd, |
3502 | | pu1_recon, |
3503 | | i4_recon_stride, |
3504 | | ai8_cand_rdopt_cost[curr_intra_buf_idx], |
3505 | | i4_alpha_stim_multiplier, |
3506 | | trans_size, |
3507 | | 0, |
3508 | | ps_ctxt->u1_enable_psyRDOPT, |
3509 | | NULL_PLANE); |
3510 | | } |
3511 | | else |
3512 | | { |
3513 | | ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion( |
3514 | | pv_curr_src, |
3515 | | src_strd, |
3516 | | pu1_pred, |
3517 | | pred_strd, |
3518 | | ai8_cand_rdopt_cost[curr_intra_buf_idx], |
3519 | | i4_alpha_stim_multiplier, |
3520 | | trans_size, |
3521 | | 0, |
3522 | | ps_ctxt->u1_enable_psyRDOPT, |
3523 | | NULL_PLANE); |
3524 | | } |
3525 | | #endif |
3526 | | } |
3527 | | #endif |
3528 | |
|
3529 | 0 | if(TU_EQ_SUBCU == func_proc_mode) |
3530 | 0 | { |
3531 | 0 | ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4); |
3532 | 0 | } |
3533 | | |
3534 | | /* based on CBF/No CBF copy the corresponding state */ |
3535 | 0 | if(0 == ai4_cbf[curr_intra_buf_idx]) |
3536 | 0 | { |
3537 | | /* RDOPT copy States :update to init state if 0 cbf */ |
3538 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
3539 | 0 | &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] + |
3540 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
3541 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
3542 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
3543 | 0 | } |
3544 | 0 | else |
3545 | 0 | { |
3546 | | /* RDOPT copy States :update to new state only if CBF is non zero */ |
3547 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
3548 | 0 | &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] + |
3549 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
3550 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
3551 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
3552 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
3553 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
3554 | 0 | } |
3555 | | |
3556 | | /* call the function which perform intra mode prediction */ |
3557 | 0 | ihevce_intra_pred_mode_signaling( |
3558 | 0 | ps_top_nbr_4x4->b6_luma_intra_mode, |
3559 | 0 | ps_tmp_lt_4x4->b6_luma_intra_mode, |
3560 | 0 | s_nbr.u1_top_avail, |
3561 | 0 | s_nbr.u1_left_avail, |
3562 | 0 | cu_pos_y, |
3563 | 0 | curr_pred_mode, |
3564 | 0 | &as_intra_prev_rem[curr_intra_buf_idx]); |
3565 | | /******************************************************************/ |
3566 | | /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN |
3567 | | The bits for these are evaluated for every RDO mode of current subcu |
3568 | | as they can significantly contribute to RDO cost. Note that these |
3569 | | bits are not accounted for here (ai8_cand_rdopt_cost) as they |
3570 | | are accounted for in encode_cu call later */ |
3571 | | |
3572 | | /******************************************************************/ |
3573 | | /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN |
3574 | | The bits for these are evaluated for every RDO mode of current subcu |
3575 | | as they can significantly contribute to RDO cost. Note that these |
3576 | | bits are not accounted for here (ai8_cand_rdopt_cost) as they |
3577 | | are accounted for in encode_cu call later */ |
3578 | | |
3579 | | /* Estimate bits to encode prev rem flag for NXN mode */ |
3580 | 0 | { |
3581 | 0 | WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits |
3582 | 0 | [u1_prev_flag_cabac_ctxt ^ |
3583 | 0 | as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag]; |
3584 | | |
3585 | | /* rounding the fractional bits to nearest integer */ |
3586 | 0 | bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q); |
3587 | 0 | } |
3588 | | |
3589 | | /* based on prev flag all the mpmidx bits and rem bits */ |
3590 | 0 | if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag) |
3591 | 0 | { |
3592 | | /* mpm_idx */ |
3593 | 0 | bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1; |
3594 | 0 | } |
3595 | 0 | else |
3596 | 0 | { |
3597 | | /* rem intra mode */ |
3598 | 0 | bits += 5; |
3599 | 0 | } |
3600 | |
|
3601 | 0 | bits += ai4_tu_bits[curr_intra_buf_idx]; |
3602 | | |
3603 | | /* compute the total cost for current candidate */ |
3604 | 0 | curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx]; |
3605 | | |
3606 | | /* get the final ssd cost */ |
3607 | 0 | curr_cost += |
3608 | 0 | COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
3609 | | |
3610 | | /* check of the best candidate cost */ |
3611 | 0 | if(curr_cost < best_cand_cost) |
3612 | 0 | { |
3613 | 0 | best_cand_cost = curr_cost; |
3614 | 0 | best_cand_idx = cand_ctr; |
3615 | 0 | best_intra_buf_idx = curr_intra_buf_idx; |
3616 | 0 | curr_intra_buf_idx = !curr_intra_buf_idx; |
3617 | 0 | } |
3618 | 0 | } |
3619 | | |
3620 | | /*************** For TU_EQ_SUBCU case *****************/ |
3621 | | /* Copy the pred for best cand. to the final pred array */ |
3622 | | /* Copy the iq-coeff for best cand. to the final array */ |
3623 | | /* copy the best coeffs data to final buffer */ |
3624 | 0 | if(TU_EQ_SUBCU == func_proc_mode) |
3625 | 0 | { |
3626 | | /* Copy the pred for best cand. to the final pred array */ |
3627 | |
|
3628 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
3629 | 0 | (UWORD8 *)pv_pred_org, |
3630 | 0 | pred_strd_org, |
3631 | 0 | &au1_cur_pred_data[best_intra_buf_idx][0], |
3632 | 0 | trans_size, |
3633 | 0 | trans_size, |
3634 | 0 | trans_size); |
3635 | | |
3636 | | /* Copy the deq-coeff for best cand. to the final array */ |
3637 | |
|
3638 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
3639 | 0 | (UWORD8 *)pi2_deq_data, |
3640 | 0 | deq_data_strd << 1, |
3641 | 0 | (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0], |
3642 | 0 | trans_size << 1, |
3643 | 0 | trans_size << 1, |
3644 | 0 | trans_size); |
3645 | | /* copy the coeffs to final cu ecd bytes buffer */ |
3646 | 0 | memcpy( |
3647 | 0 | pu1_ecd_data, |
3648 | 0 | &au1_intra_coeffs[best_intra_buf_idx][0], |
3649 | 0 | ai4_curr_bytes[best_intra_buf_idx]); |
3650 | |
|
3651 | 0 | pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) + |
3652 | 0 | (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride; |
3653 | 0 | } |
3654 | | |
3655 | | /*---------- Calculate Recon for the best INTRA mode ---------*/ |
3656 | | /* TU_EQ_CU case : No need for recon, otherwise recon is required */ |
3657 | | /* Compute recon only for the best mode for TU_EQ_SUBCU case */ |
3658 | 0 | if(u1_compute_recon) |
3659 | 0 | { |
3660 | 0 | ihevce_it_recon_fxn( |
3661 | 0 | ps_ctxt, |
3662 | 0 | pi2_deq_data, |
3663 | 0 | deq_data_strd, |
3664 | 0 | (UWORD8 *)pv_pred_org, |
3665 | 0 | pred_strd_org, |
3666 | 0 | pu1_recon, |
3667 | 0 | i4_recon_stride, |
3668 | 0 | pu1_ecd_data, |
3669 | 0 | trans_size, |
3670 | 0 | PRED_MODE_INTRA, |
3671 | 0 | ai4_cbf[best_intra_buf_idx], |
3672 | 0 | ai4_zero_col[best_intra_buf_idx], |
3673 | 0 | ai4_zero_row[best_intra_buf_idx]); |
3674 | |
|
3675 | 0 | ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx; |
3676 | 0 | } |
3677 | 0 | else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx]) |
3678 | 0 | { |
3679 | 0 | ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx; |
3680 | 0 | } |
3681 | 0 | else |
3682 | 0 | { |
3683 | 0 | ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; |
3684 | 0 | } |
3685 | | |
3686 | | /* RDOPT copy States :update to best modes state */ |
3687 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
3688 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
3689 | 0 | &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX, |
3690 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
3691 | | |
3692 | | /* copy the prev,mpm_idx and rem modes from best cand */ |
3693 | 0 | ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx]; |
3694 | | |
3695 | | /* update the cabac context of prev intra pred mode flag */ |
3696 | 0 | u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state |
3697 | 0 | [(u1_prev_flag_cabac_ctxt << 1) | |
3698 | 0 | as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag]; |
3699 | | |
3700 | | /* accumulate the TU bits into cu bits */ |
3701 | 0 | cu_bits += ai4_tu_bits[best_intra_buf_idx]; |
3702 | | |
3703 | | /* copy the intra pred mode for chroma reuse */ |
3704 | 0 | if(is_sub_pu_in_hq == 0) |
3705 | 0 | { |
3706 | 0 | *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx]; |
3707 | 0 | } |
3708 | 0 | else |
3709 | 0 | { |
3710 | 0 | *pu1_intra_pred_mode = best_cand_idx; |
3711 | 0 | } |
3712 | | |
3713 | | /* Store luma mode as chroma mode. If chroma prcs happens, and |
3714 | | if a diff. mode wins, it should update this!! */ |
3715 | 0 | if(1 == chrm_present_flag) |
3716 | 0 | { |
3717 | 0 | if(is_sub_pu_in_hq == 0) |
3718 | 0 | { |
3719 | 0 | ps_final_prms->u1_chroma_intra_pred_actual_mode = |
3720 | 0 | ((ps_ctxt->u1_chroma_array_type == 2) |
3721 | 0 | ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]] |
3722 | 0 | : pu1_curr_mode[best_cand_idx]); |
3723 | 0 | } |
3724 | 0 | else |
3725 | 0 | { |
3726 | 0 | ps_final_prms->u1_chroma_intra_pred_actual_mode = |
3727 | 0 | ((ps_ctxt->u1_chroma_array_type == 2) |
3728 | 0 | ? gau1_chroma422_intra_angle_mapping[best_cand_idx] |
3729 | 0 | : best_cand_idx); |
3730 | 0 | } |
3731 | |
|
3732 | 0 | ps_final_prms->u1_chroma_intra_pred_mode = 4; |
3733 | 0 | } |
3734 | | |
3735 | | /*remember the cbf flag to replicate qp for 4x4 neighbour*/ |
3736 | 0 | ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx]; |
3737 | | |
3738 | | /*accumulate ssd over all TU of intra CU*/ |
3739 | 0 | ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx]; |
3740 | | |
3741 | | /* update the bytes */ |
3742 | 0 | ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; |
3743 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = |
3744 | 0 | ai4_curr_bytes[best_intra_buf_idx]; |
3745 | | /* update the zero_row and col info for the final mode */ |
3746 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = |
3747 | 0 | ai4_zero_col[best_intra_buf_idx]; |
3748 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = |
3749 | 0 | ai4_zero_row[best_intra_buf_idx]; |
3750 | |
|
3751 | 0 | ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; |
3752 | | |
3753 | | /* update the total bytes cons */ |
3754 | 0 | ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx]; |
3755 | 0 | pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx]; |
3756 | |
|
3757 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx]; |
3758 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; |
3759 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; |
3760 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; |
3761 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; |
3762 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag; |
3763 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp; |
3764 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0; |
3765 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0; |
3766 | 0 | GETRANGE(tx_size, trans_size); |
3767 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; |
3768 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x; |
3769 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y; |
3770 | | |
3771 | | /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ |
3772 | 0 | ps_nbr_4x4->b1_skip_flag = 0; |
3773 | 0 | ps_nbr_4x4->b1_intra_flag = 1; |
3774 | 0 | ps_nbr_4x4->b1_pred_l0_flag = 0; |
3775 | 0 | ps_nbr_4x4->b1_pred_l1_flag = 0; |
3776 | |
|
3777 | 0 | if(is_sub_pu_in_hq == 0) |
3778 | 0 | { |
3779 | 0 | ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx]; |
3780 | 0 | } |
3781 | 0 | else |
3782 | 0 | { |
3783 | 0 | ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx; |
3784 | 0 | } |
3785 | |
|
3786 | 0 | ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx]; |
3787 | | |
3788 | | /* since tu size can be less than cusize, replication is done with strd */ |
3789 | 0 | { |
3790 | 0 | WORD32 i, j; |
3791 | 0 | nbr_4x4_t *ps_tmp_4x4; |
3792 | |
|
3793 | 0 | ps_tmp_4x4 = ps_nbr_4x4; |
3794 | |
|
3795 | 0 | for(i = 0; i < num_4x4_in_tu; i++) |
3796 | 0 | { |
3797 | 0 | for(j = 0; j < num_4x4_in_tu; j++) |
3798 | 0 | { |
3799 | 0 | ps_tmp_4x4[j] = *ps_nbr_4x4; |
3800 | 0 | } |
3801 | | /* row level update*/ |
3802 | 0 | ps_tmp_4x4 += num_4x4_in_cu; |
3803 | 0 | } |
3804 | 0 | } |
3805 | |
|
3806 | 0 | if(TU_EQ_SUBCU == func_proc_mode) |
3807 | 0 | { |
3808 | 0 | pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1); |
3809 | 0 | } |
3810 | |
|
3811 | 0 | if((num_cu_parts > 1) && (ctr < 3)) |
3812 | 0 | { |
3813 | | /* set the neighbour map to 1 */ |
3814 | 0 | ihevce_set_nbr_map( |
3815 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
3816 | 0 | ps_ctxt->i4_nbr_map_strd, |
3817 | 0 | cu_pos_x, |
3818 | 0 | cu_pos_y, |
3819 | 0 | trans_size >> 2, |
3820 | 0 | 1); |
3821 | | |
3822 | | /* block level updates block number (1 & 3 )*/ |
3823 | 0 | pv_curr_src = (UWORD8 *)pv_curr_src + trans_size; |
3824 | 0 | pv_pred_org = (UWORD8 *)pv_pred_org + trans_size; |
3825 | 0 | pi2_deq_data += trans_size; |
3826 | |
|
3827 | 0 | switch(ctr) |
3828 | 0 | { |
3829 | 0 | case 0: |
3830 | 0 | { |
3831 | 0 | pu1_left = pu1_recon + trans_size - 1; |
3832 | 0 | pu1_top += trans_size; |
3833 | 0 | pu1_top_left = pu1_top - 1; |
3834 | 0 | left_strd = i4_recon_stride; |
3835 | |
|
3836 | 0 | break; |
3837 | 0 | } |
3838 | 0 | case 1: |
3839 | 0 | { |
3840 | 0 | ASSERT( |
3841 | 0 | (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) || |
3842 | 0 | (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1)); |
3843 | | |
3844 | | /* Since the 'lumaRefSubstitution' function expects both Top and */ |
3845 | | /* TopRight recon pixels to be present in the same buffer */ |
3846 | 0 | if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] != |
3847 | 0 | ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]) |
3848 | 0 | { |
3849 | 0 | UWORD8 *pu1_src = |
3850 | 0 | ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
3851 | 0 | [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) + |
3852 | 0 | trans_size; |
3853 | 0 | UWORD8 *pu1_dst = |
3854 | 0 | ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
3855 | 0 | [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) + |
3856 | 0 | trans_size; |
3857 | |
|
3858 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
3859 | 0 | pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size); |
3860 | |
|
3861 | 0 | ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] = |
3862 | 0 | ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]; |
3863 | 0 | } |
3864 | |
|
3865 | 0 | pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride; |
3866 | 0 | pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
3867 | 0 | [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) + |
3868 | 0 | (trans_size - 1) * i4_recon_stride; |
3869 | 0 | pu1_top_left = pu1_left - cu_left_stride; |
3870 | 0 | left_strd = cu_left_stride; |
3871 | |
|
3872 | 0 | break; |
3873 | 0 | } |
3874 | 0 | case 2: |
3875 | 0 | { |
3876 | 0 | ASSERT( |
3877 | 0 | (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) || |
3878 | 0 | (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1)); |
3879 | | |
3880 | 0 | pu1_left = pu1_recon + trans_size - 1; |
3881 | 0 | pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
3882 | 0 | [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) + |
3883 | 0 | (trans_size - 1) * i4_recon_stride + trans_size; |
3884 | 0 | pu1_top_left = pu1_top - 1; |
3885 | 0 | left_strd = i4_recon_stride; |
3886 | |
|
3887 | 0 | break; |
3888 | 0 | } |
3889 | 0 | } |
3890 | | |
3891 | 0 | pu1_csbf_buf += num_4x4_in_tu; |
3892 | 0 | cu_pos_x += num_4x4_in_tu; |
3893 | 0 | ps_nbr_4x4 += num_4x4_in_tu; |
3894 | 0 | ps_top_nbr_4x4 += num_4x4_in_tu; |
3895 | 0 | ps_tmp_lt_4x4 = ps_nbr_4x4 - 1; |
3896 | |
|
3897 | 0 | pu1_intra_pred_mode++; |
3898 | | |
3899 | | /* after 2 blocks increment the pointers to bottom blocks */ |
3900 | 0 | if(1 == ctr) |
3901 | 0 | { |
3902 | 0 | pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1); |
3903 | 0 | pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd); |
3904 | |
|
3905 | 0 | pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1); |
3906 | 0 | pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org); |
3907 | 0 | pi2_deq_data -= (trans_size << 1); |
3908 | 0 | pi2_deq_data += (trans_size * deq_data_strd); |
3909 | |
|
3910 | 0 | pu1_csbf_buf -= (num_4x4_in_tu << 1); |
3911 | 0 | pu1_csbf_buf += (num_4x4_in_tu * csbf_strd); |
3912 | |
|
3913 | 0 | ps_nbr_4x4 -= (num_4x4_in_tu << 1); |
3914 | 0 | ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu); |
3915 | 0 | ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu; |
3916 | 0 | ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd); |
3917 | | |
3918 | | /* decrement pos x to start */ |
3919 | 0 | cu_pos_x -= (num_4x4_in_tu << 1); |
3920 | 0 | cu_pos_y += num_4x4_in_tu; |
3921 | 0 | } |
3922 | 0 | } |
3923 | | |
3924 | 0 | #if RDOPT_ENABLE |
3925 | | /* compute the RDOPT cost for the current TU */ |
3926 | 0 | ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30( |
3927 | 0 | ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
3928 | 0 | #endif |
3929 | | |
3930 | | /* accumulate the costs */ |
3931 | 0 | total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx]; |
3932 | |
|
3933 | 0 | if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
3934 | 0 | { |
3935 | | /* Early exit : If the current running cost exceeds |
3936 | | the prev. best mode cost, break */ |
3937 | 0 | if(total_rdopt_cost > prev_best_rdopt_cost) |
3938 | 0 | { |
3939 | 0 | return (total_rdopt_cost); |
3940 | 0 | } |
3941 | 0 | } |
3942 | | |
3943 | | /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/ |
3944 | 0 | chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE; |
3945 | |
|
3946 | 0 | pu4_nbr_flags++; |
3947 | 0 | } |
3948 | | /* Modify the cost function for this CU. */ |
3949 | | /* loop in for 8x8 blocks */ |
3950 | 0 | if(ps_ctxt->u1_enable_psyRDOPT) |
3951 | 0 | { |
3952 | 0 | UWORD8 *pu1_recon_cu; |
3953 | 0 | WORD32 recon_stride; |
3954 | 0 | WORD32 curr_pos_x; |
3955 | 0 | WORD32 curr_pos_y; |
3956 | 0 | WORD32 start_index; |
3957 | 0 | WORD32 num_horz_cu_in_ctb; |
3958 | 0 | WORD32 cu_size; |
3959 | 0 | WORD32 had_block_size; |
3960 | | |
3961 | | /* tODO: sreenivasa ctb size has to be used appropriately */ |
3962 | 0 | had_block_size = 8; |
3963 | 0 | cu_size = ps_cu_analyse->u1_cu_size; /* todo */ |
3964 | 0 | num_horz_cu_in_ctb = 64 / had_block_size; |
3965 | |
|
3966 | 0 | curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ |
3967 | 0 | curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */ |
3968 | 0 | recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
3969 | 0 | pu1_recon_cu = |
3970 | 0 | ((UWORD8 *)ps_final_prms->s_recon_datastore |
3971 | 0 | .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]); |
3972 | | /* + \ curr_pos_x + curr_pos_y * recon_stride; */ |
3973 | | |
3974 | | /* start index to index the source satd of curr cu int he current ctb*/ |
3975 | 0 | start_index = |
3976 | 0 | (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
3977 | |
|
3978 | 0 | { |
3979 | 0 | total_rdopt_cost += ihevce_psy_rd_cost( |
3980 | 0 | ps_ctxt->ai4_source_satd_8x8, |
3981 | 0 | pu1_recon_cu, |
3982 | 0 | recon_stride, |
3983 | 0 | 1, // |
3984 | 0 | cu_size, |
3985 | 0 | 0, // pic type |
3986 | 0 | 0, //layer id |
3987 | 0 | ps_ctxt->i4_satd_lamda, // lambda |
3988 | 0 | start_index, |
3989 | 0 | ps_ctxt->u1_is_input_data_hbd, |
3990 | 0 | ps_ctxt->u4_psy_strength, |
3991 | 0 | &ps_ctxt->s_cmn_opt_func |
3992 | |
|
3993 | 0 | ); // 8 bit |
3994 | 0 | } |
3995 | 0 | } |
3996 | |
|
3997 | | #if !FORCE_INTRA_TU_DEPTH_TO_0 //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ |
3998 | 0 | if(TU_EQ_SUBCU == func_proc_mode) |
3999 | 0 | { |
4000 | 0 | UWORD8 au1_tu_eq_cu_div2_modes[4]; |
4001 | 0 | UWORD8 au1_freq_of_mode[4]; |
4002 | |
|
4003 | 0 | WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D( |
4004 | 0 | ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4); |
4005 | |
|
4006 | 0 | if(1 == i4_num_clusters) |
4007 | 0 | { |
4008 | 0 | ps_final_prms->u2_num_pus_in_cu = 1; |
4009 | 0 | ps_final_prms->u1_part_mode = SIZE_2Nx2N; |
4010 | 0 | } |
4011 | 0 | } |
4012 | 0 | #endif |
4013 | | |
4014 | | /* store the num TUs*/ |
4015 | 0 | ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu; |
4016 | | |
4017 | | /* update the bytes consumed */ |
4018 | 0 | ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; |
4019 | | |
4020 | | /* store the current cu size to final prms */ |
4021 | 0 | ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size; |
4022 | | |
4023 | | /* cu bits will be having luma residual bits till this point */ |
4024 | | /* if zero_cbf eval is disabled then cu bits will be zero */ |
4025 | 0 | ps_final_prms->u4_cu_luma_res_bits = cu_bits; |
4026 | | |
4027 | | /* ------------- Chroma processing -------------- */ |
4028 | | /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ |
4029 | 0 | if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) |
4030 | 0 | { |
4031 | 0 | LWORD64 chrm_rdopt_cost; |
4032 | 0 | WORD32 chrm_rdopt_tu_bits; |
4033 | | |
4034 | | /* Store the current RDOPT cost to enable early exit in chrom_prcs */ |
4035 | 0 | ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; |
4036 | |
|
4037 | 0 | chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( |
4038 | 0 | ps_ctxt, |
4039 | 0 | curr_buf_idx, |
4040 | 0 | func_proc_mode, |
4041 | 0 | ps_chrm_cu_buf_prms->pu1_curr_src, |
4042 | 0 | ps_chrm_cu_buf_prms->i4_chrm_src_stride, |
4043 | 0 | ps_chrm_cu_buf_prms->pu1_cu_left, |
4044 | 0 | ps_chrm_cu_buf_prms->pu1_cu_top, |
4045 | 0 | ps_chrm_cu_buf_prms->pu1_cu_top_left, |
4046 | 0 | ps_chrm_cu_buf_prms->i4_cu_left_stride, |
4047 | 0 | cu_pos_x_8pelunits, |
4048 | 0 | cu_pos_y_8pelunits, |
4049 | 0 | &chrm_rdopt_tu_bits, |
4050 | 0 | i4_alpha_stim_multiplier, |
4051 | 0 | u1_is_cu_noisy); |
4052 | |
|
4053 | 0 | #if WEIGH_CHROMA_COST |
4054 | 0 | chrm_rdopt_cost = (LWORD64)( |
4055 | 0 | (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
4056 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
4057 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
4058 | 0 | #endif |
4059 | |
|
4060 | 0 | #if CHROMA_RDOPT_ENABLE |
4061 | 0 | total_rdopt_cost += chrm_rdopt_cost; |
4062 | 0 | #endif |
4063 | 0 | cu_bits += chrm_rdopt_tu_bits; |
4064 | | |
4065 | | /* cu bits for chroma residual if chroma rdopt is on */ |
4066 | | /* if zero_cbf eval is disabled then cu bits will be zero */ |
4067 | 0 | ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; |
4068 | |
|
4069 | 0 | if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
4070 | 0 | { |
4071 | | /* Early exit : If the current running cost exceeds |
4072 | | the prev. best mode cost, break */ |
4073 | 0 | if(total_rdopt_cost > prev_best_rdopt_cost) |
4074 | 0 | { |
4075 | 0 | return (total_rdopt_cost); |
4076 | 0 | } |
4077 | 0 | } |
4078 | 0 | } |
4079 | 0 | else |
4080 | 0 | {} |
4081 | | |
4082 | | /* RDOPT copy States : Best after all luma TUs to current */ |
4083 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
4084 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
4085 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
4086 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
4087 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
4088 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
4089 | | |
4090 | | /* get the neighbour availability flags for current cu */ |
4091 | 0 | ihevce_get_only_nbr_flag( |
4092 | 0 | &s_nbr, |
4093 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
4094 | 0 | ps_ctxt->i4_nbr_map_strd, |
4095 | 0 | (cu_pos_x_8pelunits << 1), |
4096 | 0 | (cu_pos_y_8pelunits << 1), |
4097 | 0 | (trans_size << 1), |
4098 | 0 | (trans_size << 1)); |
4099 | | |
4100 | | /* call the entropy rdo encode to get the bit estimate for current cu */ |
4101 | | /*if ZERO_CBF eval is enabled then this function will return only CU header bits */ |
4102 | 0 | { |
4103 | | /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ |
4104 | 0 | WORD32 cbf_bits, header_bits; |
4105 | |
|
4106 | 0 | header_bits = ihevce_entropy_rdo_encode_cu( |
4107 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, |
4108 | 0 | ps_final_prms, |
4109 | 0 | cu_pos_x_8pelunits, |
4110 | 0 | cu_pos_y_8pelunits, |
4111 | 0 | ps_cu_analyse->u1_cu_size, |
4112 | 0 | s_nbr.u1_top_avail, |
4113 | 0 | s_nbr.u1_left_avail, |
4114 | 0 | &ps_final_prms->pu1_cu_coeffs[0], |
4115 | 0 | &cbf_bits); |
4116 | |
|
4117 | 0 | cu_bits += header_bits; |
4118 | | |
4119 | | /* cbf bits are excluded from header bits, instead considered as texture bits */ |
4120 | | /* incase if zero cbf eval is disabled then texture bits gets added here */ |
4121 | 0 | ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); |
4122 | 0 | ps_final_prms->u4_cu_cbf_bits = cbf_bits; |
4123 | |
|
4124 | 0 | #if RDOPT_ENABLE |
4125 | | /* add the cost of coding the cu bits */ |
4126 | 0 | total_rdopt_cost += |
4127 | 0 | COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
4128 | 0 | #endif |
4129 | 0 | } |
4130 | 0 | return (total_rdopt_cost); |
4131 | 0 | } |
4132 | | /*! |
4133 | | ****************************************************************************** |
4134 | | * \if Function name : ihevce_inter_rdopt_cu_ntu \endif |
4135 | | * |
4136 | | * \brief |
4137 | | * Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma |
4138 | | * |
4139 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
4140 | | * \param[in] ps_inter_cand pointer to inter candidate structure |
4141 | | * \param[in] pu1_src pointer to source data buffer |
4142 | | * \param[in] cu_size Current CU size |
4143 | | * \param[in] cu_pos_x cu position x w.r.t to ctb |
4144 | | * \param[in] cu_pos_y cu position y w.r.t to ctb |
4145 | | * \param[in] src_strd source buffer stride |
4146 | | * \param[in] curr_buf_idx buffer index for current output storage |
4147 | | * \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure |
4148 | | * |
4149 | | * \return |
4150 | | * Rdopt cost |
4151 | | * |
4152 | | * \author |
4153 | | * Ittiam |
4154 | | * |
4155 | | ***************************************************************************** |
4156 | | */ |
4157 | | LWORD64 ihevce_inter_rdopt_cu_ntu( |
4158 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
4159 | | enc_loop_cu_prms_t *ps_cu_prms, |
4160 | | void *pv_src, |
4161 | | WORD32 cu_size, |
4162 | | WORD32 cu_pos_x, |
4163 | | WORD32 cu_pos_y, |
4164 | | WORD32 curr_buf_idx, |
4165 | | enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, |
4166 | | cu_inter_cand_t *ps_inter_cand, |
4167 | | cu_analyse_t *ps_cu_analyse, |
4168 | | WORD32 i4_alpha_stim_multiplier) |
4169 | 0 | { |
4170 | 0 | enc_loop_cu_final_prms_t *ps_final_prms; |
4171 | 0 | nbr_4x4_t *ps_nbr_4x4; |
4172 | 0 | tu_prms_t s_tu_prms[64 * 4]; |
4173 | 0 | tu_prms_t *ps_tu_prms; |
4174 | |
|
4175 | 0 | WORD32 i4_perform_rdoq; |
4176 | 0 | WORD32 i4_perform_sbh; |
4177 | 0 | WORD32 ai4_tu_split_flags[4]; |
4178 | 0 | WORD32 ai4_tu_early_cbf[4]; |
4179 | 0 | WORD32 num_split_flags = 1; |
4180 | 0 | WORD32 i; |
4181 | 0 | UWORD8 u1_tu_size; |
4182 | 0 | UWORD8 *pu1_pred; |
4183 | 0 | UWORD8 *pu1_ecd_data; |
4184 | 0 | WORD16 *pi2_deq_data; |
4185 | 0 | UWORD8 *pu1_csbf_buf; |
4186 | 0 | UWORD8 *pu1_tu_sz_sft; |
4187 | 0 | UWORD8 *pu1_tu_posx; |
4188 | 0 | UWORD8 *pu1_tu_posy; |
4189 | 0 | LWORD64 total_rdopt_cost; |
4190 | 0 | WORD32 ctr; |
4191 | 0 | WORD32 chrm_ctr; |
4192 | 0 | WORD32 num_tu_in_cu = 0; |
4193 | 0 | WORD32 pred_stride; |
4194 | 0 | WORD32 recon_stride; |
4195 | 0 | WORD32 trans_size = ps_cu_analyse->u1_cu_size; |
4196 | 0 | WORD32 csbf_strd; |
4197 | 0 | WORD32 chrm_present_flag; |
4198 | 0 | WORD32 ecd_data_bytes_cons; |
4199 | 0 | WORD32 num_4x4_in_cu; |
4200 | 0 | WORD32 num_4x4_in_tu; |
4201 | 0 | WORD32 recon_func_mode; |
4202 | 0 | WORD32 cu_bits; |
4203 | 0 | UWORD8 u1_compute_spatial_ssd; |
4204 | | |
4205 | | /* min_trans_size is initialized to some huge number than usual TU sizes */ |
4206 | 0 | WORD32 i4_min_trans_size = 256; |
4207 | | /* Get the RDOPT cost of the best CU mode for early_exit */ |
4208 | 0 | LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; |
4209 | 0 | WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; |
4210 | | |
4211 | | /* model for no residue syntax qt root cbf flag */ |
4212 | 0 | UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX]; |
4213 | | |
4214 | | /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ |
4215 | 0 | UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END]; |
4216 | | |
4217 | | /* for skip cases tables are not reqquired */ |
4218 | 0 | UWORD8 u1_skip_tu_sz_sft = 0; |
4219 | 0 | UWORD8 u1_skip_tu_posx = 0; |
4220 | 0 | UWORD8 u1_skip_tu_posy = 0; |
4221 | 0 | UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy; |
4222 | | |
4223 | | /* get the pointers based on curbuf idx */ |
4224 | 0 | ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; |
4225 | 0 | ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; |
4226 | 0 | pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; |
4227 | 0 | pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; |
4228 | 0 | csbf_strd = ps_ctxt->i4_cu_csbf_strd; |
4229 | 0 | pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; |
4230 | |
|
4231 | 0 | pred_stride = ps_inter_cand->i4_pred_data_stride; |
4232 | 0 | recon_stride = cu_size; |
4233 | 0 | pu1_pred = ps_inter_cand->pu1_pred_data; |
4234 | 0 | chrm_ctr = 0; |
4235 | 0 | ecd_data_bytes_cons = 0; |
4236 | 0 | total_rdopt_cost = 0; |
4237 | 0 | num_4x4_in_cu = cu_size >> 2; |
4238 | 0 | recon_func_mode = PRED_MODE_INTER; |
4239 | 0 | cu_bits = 0; |
4240 | | |
4241 | | /* get the 4x4 level postion of current cu */ |
4242 | 0 | cu_pos_x = cu_pos_x << 1; |
4243 | 0 | cu_pos_y = cu_pos_y << 1; |
4244 | | |
4245 | | /* default value for cu coded flag */ |
4246 | 0 | ps_final_prms->u1_is_cu_coded = 0; |
4247 | | |
4248 | | /*init of ssd of CU accuumulated over all TU*/ |
4249 | 0 | ps_final_prms->u4_cu_sad = 0; |
4250 | | |
4251 | | /* populate the coeffs scan idx */ |
4252 | 0 | ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
4253 | |
|
4254 | 0 | #if ENABLE_INTER_ZCU_COST |
4255 | | /* reset cu not coded cost */ |
4256 | 0 | ps_ctxt->i8_cu_not_coded_cost = 0; |
4257 | | |
4258 | | /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ |
4259 | 0 | memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END); |
4260 | 0 | #endif |
4261 | |
|
4262 | 0 | if(ps_cu_analyse->u1_cu_size == 64) |
4263 | 0 | { |
4264 | 0 | num_split_flags = 4; |
4265 | 0 | u1_tu_size = 32; |
4266 | 0 | } |
4267 | 0 | else |
4268 | 0 | { |
4269 | 0 | num_split_flags = 1; |
4270 | 0 | u1_tu_size = ps_cu_analyse->u1_cu_size; |
4271 | 0 | } |
4272 | | |
4273 | | /* ckeck for skip mode */ |
4274 | 0 | if(1 == ps_final_prms->u1_skip_flag) |
4275 | 0 | { |
4276 | 0 | if(64 == cu_size) |
4277 | 0 | { |
4278 | | /* TU = CU/2 is set but no trnaform is evaluated */ |
4279 | 0 | num_tu_in_cu = 4; |
4280 | 0 | pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; |
4281 | 0 | pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; |
4282 | 0 | pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; |
4283 | 0 | } |
4284 | 0 | else |
4285 | 0 | { |
4286 | | /* TU = CU is set but no trnaform is evaluated */ |
4287 | 0 | num_tu_in_cu = 1; |
4288 | 0 | pu1_tu_sz_sft = &u1_skip_tu_sz_sft; |
4289 | 0 | pu1_tu_posx = &u1_skip_tu_posx; |
4290 | 0 | pu1_tu_posy = &u1_skip_tu_posy; |
4291 | 0 | } |
4292 | |
|
4293 | 0 | recon_func_mode = PRED_MODE_SKIP; |
4294 | 0 | } |
4295 | | /* check for PU part mode being AMP or No AMP */ |
4296 | 0 | else if(ps_final_prms->u1_part_mode < SIZE_2NxnU) |
4297 | 0 | { |
4298 | 0 | if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64)) |
4299 | 0 | { |
4300 | | /* TU= CU is evaluated 2Nx2N inter case */ |
4301 | 0 | num_tu_in_cu = 1; |
4302 | 0 | pu1_tu_sz_sft = &u1_skip_tu_sz_sft; |
4303 | 0 | pu1_tu_posx = &u1_skip_tu_posx; |
4304 | 0 | pu1_tu_posy = &u1_skip_tu_posy; |
4305 | 0 | } |
4306 | 0 | else |
4307 | 0 | { |
4308 | | /* currently TU= CU/2 is evaluated for all inter case */ |
4309 | 0 | num_tu_in_cu = 4; |
4310 | 0 | pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; |
4311 | 0 | pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; |
4312 | 0 | pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; |
4313 | 0 | } |
4314 | 0 | } |
4315 | 0 | else |
4316 | 0 | { |
4317 | | /* for AMP cases one level of TU recurssion is done */ |
4318 | | /* based on oreintation of the partitions */ |
4319 | 0 | num_tu_in_cu = 10; |
4320 | 0 | pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
4321 | 0 | pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
4322 | 0 | pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
4323 | 0 | } |
4324 | |
|
4325 | 0 | ps_tu_prms = &s_tu_prms[0]; |
4326 | 0 | num_tu_in_cu = 0; |
4327 | |
|
4328 | 0 | for(i = 0; i < num_split_flags; i++) |
4329 | 0 | { |
4330 | 0 | WORD32 i4_x_off = 0, i4_y_off = 0; |
4331 | |
|
4332 | 0 | if(i == 1 || i == 3) |
4333 | 0 | { |
4334 | 0 | i4_x_off = 32; |
4335 | 0 | } |
4336 | |
|
4337 | 0 | if(i == 2 || i == 3) |
4338 | 0 | { |
4339 | 0 | i4_y_off = 32; |
4340 | 0 | } |
4341 | |
|
4342 | 0 | if(1 == ps_final_prms->u1_skip_flag) |
4343 | 0 | { |
4344 | 0 | ai4_tu_split_flags[0] = 0; |
4345 | 0 | ps_inter_cand->ai4_tu_split_flag[i] = 0; |
4346 | |
|
4347 | 0 | ai4_tu_early_cbf[0] = 0; |
4348 | 0 | } |
4349 | 0 | else |
4350 | 0 | { |
4351 | 0 | ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i]; |
4352 | 0 | ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i]; |
4353 | 0 | } |
4354 | |
|
4355 | 0 | ps_tu_prms->u1_tu_size = u1_tu_size; |
4356 | |
|
4357 | 0 | ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update( |
4358 | 0 | ps_tu_prms, |
4359 | 0 | &num_tu_in_cu, |
4360 | 0 | 0, |
4361 | 0 | ai4_tu_split_flags[0], |
4362 | 0 | ai4_tu_early_cbf[0], |
4363 | 0 | i4_x_off, |
4364 | 0 | i4_y_off); |
4365 | 0 | } |
4366 | | |
4367 | | /* loop for all tu blocks in current cu */ |
4368 | 0 | ps_tu_prms = &s_tu_prms[0]; |
4369 | 0 | for(ctr = 0; ctr < num_tu_in_cu; ctr++) |
4370 | 0 | { |
4371 | 0 | trans_size = ps_tu_prms->u1_tu_size; |
4372 | |
|
4373 | 0 | if(i4_min_trans_size > trans_size) |
4374 | 0 | { |
4375 | 0 | i4_min_trans_size = trans_size; |
4376 | 0 | } |
4377 | 0 | ps_tu_prms++; |
4378 | 0 | } |
4379 | |
|
4380 | 0 | if(ps_ctxt->i1_cu_qp_delta_enable) |
4381 | 0 | { |
4382 | 0 | ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0); |
4383 | 0 | } |
4384 | |
|
4385 | 0 | if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) |
4386 | 0 | { |
4387 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = |
4388 | 0 | ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / |
4389 | 0 | 100.0f); |
4390 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
4391 | 0 | ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * |
4392 | 0 | (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
4393 | 0 | } |
4394 | |
|
4395 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
4396 | 0 | (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
4397 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
4398 | |
|
4399 | 0 | if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
4400 | 0 | { |
4401 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
4402 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
4403 | 0 | } |
4404 | |
|
4405 | 0 | if(!u1_compute_spatial_ssd) |
4406 | 0 | { |
4407 | 0 | ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; |
4408 | 0 | ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
4409 | 0 | } |
4410 | 0 | else |
4411 | 0 | { |
4412 | 0 | ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1; |
4413 | 0 | } |
4414 | |
|
4415 | 0 | ps_tu_prms = &s_tu_prms[0]; |
4416 | |
|
4417 | 0 | ASSERT(num_tu_in_cu <= 256); |
4418 | | |
4419 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
4420 | 0 | memcpy( |
4421 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
4422 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
4423 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
4424 | 0 | IHEVC_CAB_COEFFX_PREFIX); |
4425 | |
|
4426 | 0 | for(ctr = 0; ctr < num_tu_in_cu; ctr++) |
4427 | 0 | { |
4428 | 0 | WORD32 curr_bytes; |
4429 | 0 | WORD32 tx_size; |
4430 | 0 | WORD32 cbf, zero_col, zero_row; |
4431 | 0 | LWORD64 rdopt_cost; |
4432 | 0 | UWORD8 u1_is_recon_available; |
4433 | |
|
4434 | 0 | WORD32 curr_pos_x; |
4435 | 0 | WORD32 curr_pos_y; |
4436 | 0 | nbr_4x4_t *ps_cur_nbr_4x4; |
4437 | 0 | UWORD8 *pu1_cur_pred; |
4438 | 0 | UWORD8 *pu1_cur_src; |
4439 | 0 | UWORD8 *pu1_cur_recon; |
4440 | 0 | WORD16 *pi2_cur_deq_data; |
4441 | 0 | UWORD32 u4_tu_sad; |
4442 | 0 | WORD32 tu_bits; |
4443 | |
|
4444 | 0 | WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
4445 | |
|
4446 | 0 | trans_size = ps_tu_prms->u1_tu_size; |
4447 | | /* get the current pos x and pos y in pixels */ |
4448 | 0 | curr_pos_x = ps_tu_prms->u1_x_off; //((cu_size >> 2) * pu1_tu_posx[ctr]); |
4449 | 0 | curr_pos_y = ps_tu_prms->u1_y_off; //((cu_size >> 2) * pu1_tu_posy[ctr]); |
4450 | |
|
4451 | 0 | num_4x4_in_tu = trans_size >> 2; |
4452 | |
|
4453 | | #if FORCE_8x8_TFR |
4454 | | if(cu_size == 64) |
4455 | | { |
4456 | | curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]); |
4457 | | curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]); |
4458 | | } |
4459 | | #endif |
4460 | | |
4461 | | /* increment the pointers to start of current TU */ |
4462 | 0 | pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x); |
4463 | 0 | pu1_cur_src += (curr_pos_y * src_strd); |
4464 | 0 | pu1_cur_pred = (pu1_pred + curr_pos_x); |
4465 | 0 | pu1_cur_pred += (curr_pos_y * pred_stride); |
4466 | 0 | pi2_cur_deq_data = pi2_deq_data + curr_pos_x; |
4467 | 0 | pi2_cur_deq_data += (curr_pos_y * cu_size); |
4468 | 0 | pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) + |
4469 | 0 | curr_pos_x + curr_pos_y * i4_recon_stride; |
4470 | |
|
4471 | 0 | ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2)); |
4472 | 0 | ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu); |
4473 | | |
4474 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
4475 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
4476 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
4477 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
4478 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
4479 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
4480 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
4481 | |
|
4482 | 0 | i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; |
4483 | 0 | i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; |
4484 | | |
4485 | | /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */ |
4486 | | /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */ |
4487 | | /* Currently the complete array will contain only single value*/ |
4488 | | /*The rounding factor is calculated with the formula |
4489 | | Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2 |
4490 | | rounding factor = (1 - DeadZone Val) |
4491 | | |
4492 | | Assumption: Cabac states of All the sub-blocks in the TU are considered independent |
4493 | | */ |
4494 | 0 | if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0)) |
4495 | 0 | { |
4496 | 0 | double i4_lamda_modifier; |
4497 | |
|
4498 | 0 | if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id)) |
4499 | 0 | { |
4500 | 0 | i4_lamda_modifier = ps_ctxt->i4_lamda_modifier * |
4501 | 0 | CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00); |
4502 | 0 | } |
4503 | 0 | else |
4504 | 0 | { |
4505 | 0 | i4_lamda_modifier = ps_ctxt->i4_lamda_modifier; |
4506 | 0 | } |
4507 | 0 | if(ps_ctxt->i4_use_const_lamda_modifier) |
4508 | 0 | { |
4509 | 0 | if(ISLICE == ps_ctxt->i1_slice_type) |
4510 | 0 | { |
4511 | 0 | i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
4512 | 0 | } |
4513 | 0 | else |
4514 | 0 | { |
4515 | 0 | i4_lamda_modifier = CONST_LAMDA_MOD_VAL; |
4516 | 0 | } |
4517 | 0 | } |
4518 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = |
4519 | 0 | &ps_ctxt->i4_quant_round_tu[0][0]; |
4520 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = |
4521 | 0 | &ps_ctxt->i4_quant_round_tu[1][0]; |
4522 | |
|
4523 | 0 | memset( |
4524 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
4525 | 0 | 0, |
4526 | 0 | trans_size * trans_size * sizeof(WORD32)); |
4527 | 0 | memset( |
4528 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
4529 | 0 | 0, |
4530 | 0 | trans_size * trans_size * sizeof(WORD32)); |
4531 | |
|
4532 | 0 | ihevce_quant_rounding_factor_gen( |
4533 | 0 | trans_size, |
4534 | 0 | 1, |
4535 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, |
4536 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3], |
4537 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3], |
4538 | 0 | i4_lamda_modifier, |
4539 | 0 | 1); |
4540 | 0 | } |
4541 | 0 | else |
4542 | 0 | { |
4543 | 0 | ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = |
4544 | 0 | ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3]; |
4545 | 0 | ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = |
4546 | 0 | ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3]; |
4547 | 0 | } |
4548 | | |
4549 | | /* call T Q IT IQ and recon function */ |
4550 | 0 | cbf = ihevce_t_q_iq_ssd_scan_fxn( |
4551 | 0 | ps_ctxt, |
4552 | 0 | pu1_cur_pred, |
4553 | 0 | pred_stride, |
4554 | 0 | pu1_cur_src, |
4555 | 0 | src_strd, |
4556 | 0 | pi2_cur_deq_data, |
4557 | 0 | cu_size, |
4558 | 0 | pu1_cur_recon, |
4559 | 0 | i4_recon_stride, |
4560 | 0 | pu1_ecd_data, |
4561 | 0 | pu1_csbf_buf, |
4562 | 0 | csbf_strd, |
4563 | 0 | trans_size, |
4564 | 0 | recon_func_mode, |
4565 | 0 | &rdopt_cost, |
4566 | 0 | &curr_bytes, |
4567 | 0 | &tu_bits, |
4568 | 0 | &u4_tu_sad, |
4569 | 0 | &zero_col, |
4570 | 0 | &zero_row, |
4571 | 0 | &u1_is_recon_available, |
4572 | 0 | i4_perform_rdoq, |
4573 | 0 | i4_perform_sbh, |
4574 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
4575 | 0 | i4_alpha_stim_multiplier, |
4576 | 0 | u1_is_cu_noisy, |
4577 | 0 | #endif |
4578 | 0 | u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
4579 | 0 | ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1); |
4580 | |
|
4581 | | #if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
4582 | | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
4583 | | { |
4584 | | #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
4585 | | rdopt_cost = ihevce_inject_stim_into_distortion( |
4586 | | pu1_cur_src, |
4587 | | src_strd, |
4588 | | pu1_cur_pred, |
4589 | | pred_stride, |
4590 | | rdopt_cost, |
4591 | | i4_alpha_stim_multiplier, |
4592 | | trans_size, |
4593 | | 0, |
4594 | | ps_ctxt->u1_enable_psyRDOPT, |
4595 | | NULL_PLANE); |
4596 | | #else |
4597 | | if(u1_compute_spatial_ssd && u1_is_recon_available) |
4598 | | { |
4599 | | rdopt_cost = ihevce_inject_stim_into_distortion( |
4600 | | pu1_cur_src, |
4601 | | src_strd, |
4602 | | pu1_cur_recon, |
4603 | | i4_recon_stride, |
4604 | | rdopt_cost, |
4605 | | i4_alpha_stim_multiplier, |
4606 | | trans_size, |
4607 | | 0, |
4608 | | NULL_PLANE); |
4609 | | } |
4610 | | else |
4611 | | { |
4612 | | rdopt_cost = ihevce_inject_stim_into_distortion( |
4613 | | pu1_cur_src, |
4614 | | src_strd, |
4615 | | pu1_cur_pred, |
4616 | | pred_stride, |
4617 | | rdopt_cost, |
4618 | | i4_alpha_stim_multiplier, |
4619 | | trans_size, |
4620 | | 0, |
4621 | | ps_ctxt->u1_enable_psyRDOPT, |
4622 | | NULL_PLANE); |
4623 | | } |
4624 | | #endif |
4625 | | } |
4626 | | #endif |
4627 | |
|
4628 | 0 | if(u1_compute_spatial_ssd && u1_is_recon_available) |
4629 | 0 | { |
4630 | 0 | ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0; |
4631 | 0 | } |
4632 | 0 | else |
4633 | 0 | { |
4634 | 0 | ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; |
4635 | 0 | } |
4636 | | |
4637 | | /* accumulate the TU sad into cu sad */ |
4638 | 0 | ps_final_prms->u4_cu_sad += u4_tu_sad; |
4639 | | |
4640 | | /* accumulate the TU bits into cu bits */ |
4641 | 0 | cu_bits += tu_bits; |
4642 | | |
4643 | | /* inter cu is coded if any of the tu is coded in it */ |
4644 | 0 | ps_final_prms->u1_is_cu_coded |= cbf; |
4645 | | |
4646 | | /* call the entropy function to get the bits */ |
4647 | | /* add that to rd opt cost(SSD) */ |
4648 | | |
4649 | | /* update the bytes */ |
4650 | 0 | ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; |
4651 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes; |
4652 | | /* update the zero_row and col info for the final mode */ |
4653 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col; |
4654 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row; |
4655 | | |
4656 | | /* update the bytes */ |
4657 | 0 | ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons; |
4658 | | |
4659 | | /* update the total bytes cons */ |
4660 | 0 | ecd_data_bytes_cons += curr_bytes; |
4661 | 0 | pu1_ecd_data += curr_bytes; |
4662 | | |
4663 | | /* RDOPT copy States : New updated after curr TU to TU init */ |
4664 | 0 | if(0 != cbf) |
4665 | 0 | { |
4666 | | /* update to new state only if CBF is non zero */ |
4667 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
4668 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
4669 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
4670 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
4671 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
4672 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
4673 | 0 | } |
4674 | | |
4675 | | /* by default chroma present is set to 1*/ |
4676 | 0 | chrm_present_flag = 1; |
4677 | 0 | if(4 == trans_size) |
4678 | 0 | { |
4679 | | /* if tusize is 4x4 then only first luma 4x4 will have chroma*/ |
4680 | 0 | if(0 != chrm_ctr) |
4681 | 0 | { |
4682 | 0 | chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE; |
4683 | 0 | } |
4684 | | |
4685 | | /* increment the chrm ctr unconditionally */ |
4686 | 0 | chrm_ctr++; |
4687 | | |
4688 | | /* after ctr reached 4 reset it */ |
4689 | 0 | if(4 == chrm_ctr) |
4690 | 0 | { |
4691 | 0 | chrm_ctr = 0; |
4692 | 0 | } |
4693 | 0 | } |
4694 | |
|
4695 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf; |
4696 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; |
4697 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; |
4698 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; |
4699 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; |
4700 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag; |
4701 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp; |
4702 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0; |
4703 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0; |
4704 | 0 | GETRANGE(tx_size, trans_size); |
4705 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; |
4706 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2); |
4707 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2); |
4708 | | |
4709 | | /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ |
4710 | 0 | ps_cur_nbr_4x4->b1_y_cbf = cbf; |
4711 | | /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/ |
4712 | 0 | ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; |
4713 | | |
4714 | | /* Qp and cbf are stored for the all 4x4 in TU */ |
4715 | 0 | { |
4716 | 0 | WORD32 i, j; |
4717 | 0 | nbr_4x4_t *ps_tmp_4x4; |
4718 | 0 | ps_tmp_4x4 = ps_cur_nbr_4x4; |
4719 | |
|
4720 | 0 | for(i = 0; i < num_4x4_in_tu; i++) |
4721 | 0 | { |
4722 | 0 | for(j = 0; j < num_4x4_in_tu; j++) |
4723 | 0 | { |
4724 | 0 | ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp; |
4725 | 0 | ps_tmp_4x4[j].b1_y_cbf = cbf; |
4726 | 0 | } |
4727 | | /* row level update*/ |
4728 | 0 | ps_tmp_4x4 += num_4x4_in_cu; |
4729 | 0 | } |
4730 | 0 | } |
4731 | |
|
4732 | 0 | #if RDOPT_ENABLE |
4733 | | /* compute the rdopt cost */ |
4734 | 0 | rdopt_cost += |
4735 | 0 | COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
4736 | 0 | #endif |
4737 | | /* accumulate the costs */ |
4738 | 0 | total_rdopt_cost += rdopt_cost; |
4739 | |
|
4740 | 0 | ps_tu_prms++; |
4741 | |
|
4742 | 0 | if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
4743 | 0 | { |
4744 | | /* Early exit : If the current running cost exceeds |
4745 | | the prev. best mode cost, break */ |
4746 | 0 | if(total_rdopt_cost > prev_best_rdopt_cost) |
4747 | 0 | { |
4748 | 0 | return (total_rdopt_cost); |
4749 | 0 | } |
4750 | 0 | } |
4751 | 0 | } |
4752 | | |
4753 | | /* Modify the cost function for this CU. */ |
4754 | | /* loop in for 8x8 blocks */ |
4755 | 0 | if(ps_ctxt->u1_enable_psyRDOPT) |
4756 | 0 | { |
4757 | 0 | UWORD8 *pu1_recon_cu; |
4758 | 0 | WORD32 recon_stride; |
4759 | 0 | WORD32 curr_pos_x; |
4760 | 0 | WORD32 curr_pos_y; |
4761 | 0 | WORD32 start_index; |
4762 | 0 | WORD32 num_horz_cu_in_ctb; |
4763 | 0 | WORD32 had_block_size; |
4764 | | |
4765 | | /* tODO: sreenivasa ctb size has to be used appropriately */ |
4766 | 0 | had_block_size = 8; |
4767 | 0 | num_horz_cu_in_ctb = 64 / had_block_size; |
4768 | |
|
4769 | 0 | curr_pos_x = cu_pos_x << 2; /* pel units */ |
4770 | 0 | curr_pos_y = cu_pos_y << 2; /* pel units */ |
4771 | 0 | recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
4772 | 0 | pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore |
4773 | 0 | .apv_luma_recon_bufs[0]); // already pointing to the current CU recon |
4774 | | //+ \curr_pos_x + curr_pos_y * recon_stride; |
4775 | | |
4776 | | /* start index to index the source satd of curr cu int he current ctb*/ |
4777 | 0 | start_index = |
4778 | 0 | (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
4779 | |
|
4780 | 0 | { |
4781 | 0 | total_rdopt_cost += ihevce_psy_rd_cost( |
4782 | 0 | ps_ctxt->ai4_source_satd_8x8, |
4783 | 0 | pu1_recon_cu, |
4784 | 0 | recon_stride, |
4785 | 0 | 1, //howz stride |
4786 | 0 | cu_size, |
4787 | 0 | 0, // pic type |
4788 | 0 | 0, //layer id |
4789 | 0 | ps_ctxt->i4_satd_lamda, // lambda |
4790 | 0 | start_index, |
4791 | 0 | ps_ctxt->u1_is_input_data_hbd, |
4792 | 0 | ps_ctxt->u4_psy_strength, |
4793 | 0 | &ps_ctxt->s_cmn_opt_func); // 8 bit |
4794 | 0 | } |
4795 | 0 | } |
4796 | | |
4797 | | /* store the num TUs*/ |
4798 | 0 | ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu; |
4799 | | |
4800 | | /* update the bytes consumed */ |
4801 | 0 | ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; |
4802 | | |
4803 | | /* store the current cu size to final prms */ |
4804 | 0 | ps_final_prms->u1_cu_size = cu_size; |
4805 | | |
4806 | | /* cu bits will be having luma residual bits till this point */ |
4807 | | /* if zero_cbf eval is disabled then cu bits will be zero */ |
4808 | 0 | ps_final_prms->u4_cu_luma_res_bits = cu_bits; |
4809 | | |
4810 | | /* ------------- Chroma processing -------------- */ |
4811 | | /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ |
4812 | 0 | if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) |
4813 | 0 | { |
4814 | 0 | LWORD64 chrm_rdopt_cost; |
4815 | 0 | WORD32 chrm_rdopt_tu_bits; |
4816 | | |
4817 | | /* Store the current RDOPT cost to enable early exit in chrom_prcs */ |
4818 | 0 | ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; |
4819 | |
|
4820 | 0 | chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( |
4821 | 0 | ps_ctxt, |
4822 | 0 | curr_buf_idx, |
4823 | 0 | 0, /* TU mode : Don't care in Inter patrh */ |
4824 | 0 | ps_chrm_cu_buf_prms->pu1_curr_src, |
4825 | 0 | ps_chrm_cu_buf_prms->i4_chrm_src_stride, |
4826 | 0 | ps_chrm_cu_buf_prms->pu1_cu_left, |
4827 | 0 | ps_chrm_cu_buf_prms->pu1_cu_top, |
4828 | 0 | ps_chrm_cu_buf_prms->pu1_cu_top_left, |
4829 | 0 | ps_chrm_cu_buf_prms->i4_cu_left_stride, |
4830 | 0 | (cu_pos_x >> 1), |
4831 | 0 | (cu_pos_y >> 1), |
4832 | 0 | &chrm_rdopt_tu_bits, |
4833 | 0 | i4_alpha_stim_multiplier, |
4834 | 0 | u1_is_cu_noisy); |
4835 | |
|
4836 | 0 | #if WEIGH_CHROMA_COST |
4837 | 0 | chrm_rdopt_cost = (LWORD64)( |
4838 | 0 | (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
4839 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
4840 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
4841 | 0 | #endif |
4842 | |
|
4843 | 0 | #if CHROMA_RDOPT_ENABLE |
4844 | 0 | total_rdopt_cost += chrm_rdopt_cost; |
4845 | 0 | #endif |
4846 | 0 | cu_bits += chrm_rdopt_tu_bits; |
4847 | | |
4848 | | /* during chroma evaluation if skip decision was over written */ |
4849 | | /* then the current skip candidate is set to a non skip candidate */ |
4850 | 0 | ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag; |
4851 | | |
4852 | | /* cu bits for chroma residual if chroma rdopt is on */ |
4853 | | /* if zero_cbf eval is disabled then cu bits will be zero */ |
4854 | 0 | ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; |
4855 | |
|
4856 | 0 | if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
4857 | 0 | { |
4858 | | /* Early exit : If the current running cost exceeds |
4859 | | the prev. best mode cost, break */ |
4860 | 0 | if(total_rdopt_cost > prev_best_rdopt_cost) |
4861 | 0 | { |
4862 | 0 | return (total_rdopt_cost); |
4863 | 0 | } |
4864 | 0 | } |
4865 | 0 | } |
4866 | 0 | else |
4867 | 0 | {} |
4868 | | |
4869 | 0 | #if SHRINK_INTER_TUTREE |
4870 | | /* ------------- Quadtree TU split optimization ------------ */ |
4871 | 0 | if(ps_final_prms->u1_is_cu_coded) |
4872 | 0 | { |
4873 | 0 | ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree( |
4874 | 0 | &ps_final_prms->as_tu_enc_loop[0], |
4875 | 0 | &ps_final_prms->as_tu_enc_loop_temp_prms[0], |
4876 | 0 | &ps_final_prms->s_recon_datastore, |
4877 | 0 | num_tu_in_cu, |
4878 | 0 | (ps_ctxt->u1_chroma_array_type == 2)); |
4879 | 0 | } |
4880 | 0 | #endif |
4881 | | |
4882 | | /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */ |
4883 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
4884 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
4885 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
4886 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
4887 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
4888 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
4889 | | |
4890 | | /* -------- Bit estimate for RD opt -------------- */ |
4891 | 0 | { |
4892 | 0 | nbr_avail_flags_t s_nbr; |
4893 | | /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ |
4894 | 0 | WORD32 cbf_bits, header_bits; |
4895 | | |
4896 | | /* get the neighbour availability flags for current cu */ |
4897 | 0 | ihevce_get_only_nbr_flag( |
4898 | 0 | &s_nbr, |
4899 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
4900 | 0 | ps_ctxt->i4_nbr_map_strd, |
4901 | 0 | cu_pos_x, |
4902 | 0 | cu_pos_y, |
4903 | 0 | (cu_size >> 2), |
4904 | 0 | (cu_size >> 2)); |
4905 | | |
4906 | | /* call the entropy rdo encode to get the bit estimate for current cu */ |
4907 | 0 | header_bits = ihevce_entropy_rdo_encode_cu( |
4908 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, |
4909 | 0 | ps_final_prms, |
4910 | 0 | (cu_pos_x >> 1), /* back to 8x8 pel units */ |
4911 | 0 | (cu_pos_y >> 1), /* back to 8x8 pel units */ |
4912 | 0 | cu_size, |
4913 | 0 | ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail |
4914 | 0 | : s_nbr.u1_top_avail, |
4915 | 0 | s_nbr.u1_left_avail, |
4916 | 0 | &ps_final_prms->pu1_cu_coeffs[0], |
4917 | 0 | &cbf_bits); |
4918 | |
|
4919 | 0 | cu_bits += header_bits; |
4920 | | |
4921 | | /* cbf bits are excluded from header bits, instead considered as texture bits */ |
4922 | | /* incase if zero cbf eval is disabled then texture bits gets added here */ |
4923 | 0 | ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); |
4924 | 0 | ps_final_prms->u4_cu_cbf_bits = cbf_bits; |
4925 | |
|
4926 | 0 | #if RDOPT_ENABLE |
4927 | | /* add the cost of coding the header bits */ |
4928 | 0 | total_rdopt_cost += |
4929 | 0 | COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
4930 | |
|
4931 | 0 | #if ENABLE_INTER_ZCU_COST |
4932 | | /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */ |
4933 | 0 | if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level)) |
4934 | 0 | { |
4935 | 0 | LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; |
4936 | |
|
4937 | 0 | WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) && |
4938 | 0 | (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag); |
4939 | |
|
4940 | 0 | cab_ctxt_t *ps_cab_ctxt = |
4941 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt; |
4942 | | |
4943 | | /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */ |
4944 | 0 | UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12; |
4945 | | |
4946 | | /* account for coding qt_root_cbf = 0 */ |
4947 | | /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */ |
4948 | 0 | u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0]; |
4949 | 0 | if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]) |
4950 | 0 | u4_cu_hdr_bits_q12 = 0; |
4951 | 0 | else |
4952 | 0 | u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]; |
4953 | | |
4954 | | /* add the cost of coding the header bits */ |
4955 | 0 | i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30( |
4956 | 0 | u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */, |
4957 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf, |
4958 | 0 | (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); |
4959 | |
|
4960 | 0 | if(ps_ctxt->u1_enable_psyRDOPT) |
4961 | 0 | { |
4962 | 0 | i8_cu_not_coded_cost = total_rdopt_cost + 1; |
4963 | 0 | } |
4964 | | |
4965 | | /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */ |
4966 | 0 | if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu)) |
4967 | 0 | { |
4968 | 0 | WORD32 tx_size; |
4969 | | |
4970 | | /* force cu as not coded and update the cost */ |
4971 | 0 | ps_final_prms->u1_is_cu_coded = 0; |
4972 | 0 | ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
4973 | 0 | ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; |
4974 | |
|
4975 | 0 | total_rdopt_cost = i8_cu_not_coded_cost; |
4976 | | |
4977 | | /* reset num TUs to 1 unless cu size id 64 */ |
4978 | 0 | ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1; |
4979 | 0 | trans_size = (64 == cu_size) ? 32 : cu_size; |
4980 | 0 | GETRANGE(tx_size, trans_size); |
4981 | | |
4982 | | /* reset the bytes consumed */ |
4983 | 0 | ps_final_prms->i4_num_bytes_ecd_data = 0; |
4984 | | |
4985 | | /* reset texture related bits and roll back header bits*/ |
4986 | 0 | ps_final_prms->u4_cu_cbf_bits = 0; |
4987 | 0 | ps_final_prms->u4_cu_luma_res_bits = 0; |
4988 | 0 | ps_final_prms->u4_cu_chroma_res_bits = 0; |
4989 | 0 | ps_final_prms->u4_cu_hdr_bits = |
4990 | 0 | (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q; |
4991 | | |
4992 | | /* update cabac model with qtroot cbf = 0 decision */ |
4993 | 0 | ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] = |
4994 | 0 | gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1]; |
4995 | | |
4996 | | /* restore untouched cabac models for, tusplit, cbfs, texture etc */ |
4997 | 0 | memcpy( |
4998 | 0 | &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM], |
4999 | 0 | &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM], |
5000 | 0 | (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM)); |
5001 | | |
5002 | | /* mark all tus as not coded for final eval */ |
5003 | 0 | for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++) |
5004 | 0 | { |
5005 | 0 | WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0; |
5006 | 0 | WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0; |
5007 | |
|
5008 | 0 | nbr_4x4_t *ps_cur_nbr_4x4 = |
5009 | 0 | ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu); |
5010 | |
|
5011 | 0 | num_4x4_in_tu = trans_size >> 2; |
5012 | |
|
5013 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0; |
5014 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0; |
5015 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0; |
5016 | |
|
5017 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0; |
5018 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; |
5019 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; |
5020 | |
|
5021 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; |
5022 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; |
5023 | |
|
5024 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; |
5025 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x; |
5026 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y; |
5027 | | |
5028 | | /* reset cbf for the all 4x4 in TU */ |
5029 | 0 | { |
5030 | 0 | WORD32 i, j; |
5031 | 0 | nbr_4x4_t *ps_tmp_4x4; |
5032 | 0 | ps_tmp_4x4 = ps_cur_nbr_4x4; |
5033 | |
|
5034 | 0 | for(i = 0; i < num_4x4_in_tu; i++) |
5035 | 0 | { |
5036 | 0 | for(j = 0; j < num_4x4_in_tu; j++) |
5037 | 0 | { |
5038 | 0 | ps_tmp_4x4[j].b1_y_cbf = 0; |
5039 | 0 | } |
5040 | | /* row level update*/ |
5041 | 0 | ps_tmp_4x4 += num_4x4_in_cu; |
5042 | 0 | } |
5043 | 0 | } |
5044 | 0 | } |
5045 | 0 | } |
5046 | 0 | } |
5047 | 0 | #endif /* ENABLE_INTER_ZCU_COST */ |
5048 | |
|
5049 | 0 | #endif /* RDOPT_ENABLE */ |
5050 | 0 | } |
5051 | |
|
5052 | 0 | return (total_rdopt_cost); |
5053 | 0 | } |
5054 | | |
5055 | | #if ENABLE_RDO_BASED_TU_RECURSION |
5056 | | LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer( |
5057 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
5058 | | enc_loop_cu_prms_t *ps_cu_prms, |
5059 | | void *pv_src, |
5060 | | WORD32 cu_size, |
5061 | | WORD32 cu_pos_x, |
5062 | | WORD32 cu_pos_y, |
5063 | | WORD32 curr_buf_idx, |
5064 | | enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, |
5065 | | cu_inter_cand_t *ps_inter_cand, |
5066 | | cu_analyse_t *ps_cu_analyse, |
5067 | | WORD32 i4_alpha_stim_multiplier) |
5068 | 0 | { |
5069 | 0 | tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1]; |
5070 | 0 | buffer_data_for_tu_t s_buffer_data_for_tu; |
5071 | 0 | enc_loop_cu_final_prms_t *ps_final_prms; |
5072 | 0 | nbr_4x4_t *ps_nbr_4x4; |
5073 | |
|
5074 | 0 | WORD32 num_split_flags = 1; |
5075 | 0 | UWORD8 u1_tu_size; |
5076 | 0 | UWORD8 *pu1_pred; |
5077 | 0 | UWORD8 *pu1_ecd_data; |
5078 | 0 | WORD16 *pi2_deq_data; |
5079 | 0 | UWORD8 *pu1_csbf_buf; |
5080 | 0 | UWORD8 *pu1_tu_sz_sft; |
5081 | 0 | UWORD8 *pu1_tu_posx; |
5082 | 0 | UWORD8 *pu1_tu_posy; |
5083 | 0 | LWORD64 total_rdopt_cost; |
5084 | 0 | WORD32 ctr; |
5085 | 0 | WORD32 chrm_ctr; |
5086 | 0 | WORD32 pred_stride; |
5087 | 0 | WORD32 recon_stride; |
5088 | 0 | WORD32 trans_size = ps_cu_analyse->u1_cu_size; |
5089 | 0 | WORD32 csbf_strd; |
5090 | 0 | WORD32 ecd_data_bytes_cons; |
5091 | 0 | WORD32 num_4x4_in_cu; |
5092 | 0 | WORD32 num_4x4_in_tu; |
5093 | 0 | WORD32 recon_func_mode; |
5094 | 0 | WORD32 cu_bits; |
5095 | 0 | UWORD8 u1_compute_spatial_ssd; |
5096 | | /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ |
5097 | 0 | UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END]; |
5098 | |
|
5099 | 0 | WORD32 i4_min_trans_size = 256; |
5100 | 0 | LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost; |
5101 | 0 | WORD32 src_strd = ps_cu_prms->i4_luma_src_stride; |
5102 | | /* model for no residue syntax qt root cbf flag */ |
5103 | 0 | UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX]; |
5104 | 0 | UWORD8 u1_skip_tu_sz_sft = 0; |
5105 | 0 | UWORD8 u1_skip_tu_posx = 0; |
5106 | 0 | UWORD8 u1_skip_tu_posy = 0; |
5107 | 0 | UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy; |
5108 | |
|
5109 | 0 | ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; |
5110 | 0 | ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; |
5111 | 0 | pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0]; |
5112 | 0 | pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0]; |
5113 | 0 | csbf_strd = ps_ctxt->i4_cu_csbf_strd; |
5114 | 0 | pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0]; |
5115 | 0 | pred_stride = ps_inter_cand->i4_pred_data_stride; |
5116 | 0 | recon_stride = cu_size; |
5117 | 0 | pu1_pred = ps_inter_cand->pu1_pred_data; |
5118 | 0 | chrm_ctr = 0; |
5119 | 0 | ecd_data_bytes_cons = 0; |
5120 | 0 | total_rdopt_cost = 0; |
5121 | 0 | num_4x4_in_cu = cu_size >> 2; |
5122 | 0 | recon_func_mode = PRED_MODE_INTER; |
5123 | 0 | cu_bits = 0; |
5124 | | |
5125 | | /* get the 4x4 level postion of current cu */ |
5126 | 0 | cu_pos_x = cu_pos_x << 1; |
5127 | 0 | cu_pos_y = cu_pos_y << 1; |
5128 | |
|
5129 | 0 | ps_final_prms->u1_is_cu_coded = 0; |
5130 | 0 | ps_final_prms->u4_cu_sad = 0; |
5131 | | |
5132 | | /* populate the coeffs scan idx */ |
5133 | 0 | ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
5134 | |
|
5135 | 0 | #if ENABLE_INTER_ZCU_COST |
5136 | | /* reset cu not coded cost */ |
5137 | 0 | ps_ctxt->i8_cu_not_coded_cost = 0; |
5138 | | |
5139 | | /* backup copy of cabac states for restoration if zero cu reside rdo wins later */ |
5140 | 0 | memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END); |
5141 | 0 | #endif |
5142 | |
|
5143 | 0 | if(ps_cu_analyse->u1_cu_size == 64) |
5144 | 0 | { |
5145 | 0 | num_split_flags = 4; |
5146 | 0 | u1_tu_size = 32; |
5147 | 0 | } |
5148 | 0 | else |
5149 | 0 | { |
5150 | 0 | num_split_flags = 1; |
5151 | 0 | u1_tu_size = ps_cu_analyse->u1_cu_size; |
5152 | 0 | } |
5153 | |
|
5154 | 0 | if(1 == ps_final_prms->u1_skip_flag) |
5155 | 0 | { |
5156 | 0 | if(64 == cu_size) |
5157 | 0 | { |
5158 | | /* TU = CU/2 is set but no trnaform is evaluated */ |
5159 | 0 | pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; |
5160 | 0 | pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; |
5161 | 0 | pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; |
5162 | 0 | } |
5163 | 0 | else |
5164 | 0 | { |
5165 | | /* TU = CU is set but no trnaform is evaluated */ |
5166 | 0 | pu1_tu_sz_sft = &u1_skip_tu_sz_sft; |
5167 | 0 | pu1_tu_posx = &u1_skip_tu_posx; |
5168 | 0 | pu1_tu_posy = &u1_skip_tu_posy; |
5169 | 0 | } |
5170 | |
|
5171 | 0 | recon_func_mode = PRED_MODE_SKIP; |
5172 | 0 | } |
5173 | | /* check for PU part mode being AMP or No AMP */ |
5174 | 0 | else if(ps_final_prms->u1_part_mode < SIZE_2NxnU) |
5175 | 0 | { |
5176 | 0 | if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64)) |
5177 | 0 | { |
5178 | | /* TU= CU is evaluated 2Nx2N inter case */ |
5179 | 0 | pu1_tu_sz_sft = &u1_skip_tu_sz_sft; |
5180 | 0 | pu1_tu_posx = &u1_skip_tu_posx; |
5181 | 0 | pu1_tu_posy = &u1_skip_tu_posy; |
5182 | 0 | } |
5183 | 0 | else |
5184 | 0 | { |
5185 | | /* currently TU= CU/2 is evaluated for all inter case */ |
5186 | 0 | pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0]; |
5187 | 0 | pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0]; |
5188 | 0 | pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0]; |
5189 | 0 | } |
5190 | 0 | } |
5191 | 0 | else |
5192 | 0 | { |
5193 | | /* for AMP cases one level of TU recurssion is done */ |
5194 | | /* based on oreintation of the partitions */ |
5195 | 0 | pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
5196 | 0 | pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
5197 | 0 | pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0]; |
5198 | 0 | } |
5199 | |
|
5200 | 0 | i4_min_trans_size = 4; |
5201 | |
|
5202 | 0 | if(ps_ctxt->i1_cu_qp_delta_enable) |
5203 | 0 | { |
5204 | 0 | ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, i4_min_trans_size, 0); |
5205 | 0 | } |
5206 | |
|
5207 | 0 | if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT) |
5208 | 0 | { |
5209 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf = |
5210 | 0 | ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / |
5211 | 0 | 100.0f); |
5212 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf = |
5213 | 0 | ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf * |
5214 | 0 | (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f); |
5215 | 0 | } |
5216 | |
|
5217 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
5218 | 0 | (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
5219 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
5220 | |
|
5221 | 0 | if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
5222 | 0 | { |
5223 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
5224 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
5225 | 0 | } |
5226 | |
|
5227 | 0 | if(!u1_compute_spatial_ssd) |
5228 | 0 | { |
5229 | 0 | ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; |
5230 | 0 | ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
5231 | 0 | } |
5232 | 0 | else |
5233 | 0 | { |
5234 | 0 | ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1; |
5235 | |
|
5236 | 0 | if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)) |
5237 | 0 | { |
5238 | 0 | ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1; |
5239 | 0 | } |
5240 | 0 | } |
5241 | | |
5242 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
5243 | 0 | memcpy( |
5244 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
5245 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
5246 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
5247 | 0 | IHEVC_CAB_COEFFX_PREFIX); |
5248 | |
|
5249 | 0 | ihevce_tu_tree_init( |
5250 | 0 | as_tu_nodes, |
5251 | 0 | cu_size, |
5252 | 0 | (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0, |
5253 | 0 | ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth, |
5254 | 0 | INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), |
5255 | 0 | ps_ctxt->u1_chroma_array_type == 2); |
5256 | |
|
5257 | 0 | if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)) |
5258 | 0 | { |
5259 | 0 | ihevce_tuSplitArray_to_tuTree_mapper( |
5260 | 0 | as_tu_nodes, |
5261 | 0 | ps_inter_cand->ai4_tu_split_flag, |
5262 | 0 | cu_size, |
5263 | 0 | cu_size, |
5264 | 0 | MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)), |
5265 | 0 | MIN(MAX_TU_SIZE, cu_size), |
5266 | 0 | ps_inter_cand->b1_skip_flag); |
5267 | 0 | } |
5268 | |
|
5269 | 0 | ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size); |
5270 | | |
5271 | 0 | #if ENABLE_INTER_ZCU_COST |
5272 | 0 | ps_ctxt->i8_cu_not_coded_cost = 0; |
5273 | 0 | #endif |
5274 | |
|
5275 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src; |
5276 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred; |
5277 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon = |
5278 | 0 | ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]; |
5279 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd; |
5280 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride; |
5281 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride = |
5282 | 0 | ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
5283 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src; |
5284 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred = |
5285 | 0 | ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] + |
5286 | 0 | curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) * |
5287 | 0 | (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1))); |
5288 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon = |
5289 | 0 | ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0]; |
5290 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride = |
5291 | 0 | ps_chrm_cu_buf_prms->i4_chrm_src_stride; |
5292 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride = |
5293 | 0 | ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX]; |
5294 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride = |
5295 | 0 | ps_final_prms->s_recon_datastore.i4_chromaRecon_stride; |
5296 | 0 | s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4; |
5297 | 0 | s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data; |
5298 | 0 | s_buffer_data_for_tu.pi2_deq_data_chroma = |
5299 | 0 | pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx; |
5300 | 0 | s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu; |
5301 | 0 | s_buffer_data_for_tu.i4_deq_data_stride = cu_size; |
5302 | 0 | s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size; |
5303 | 0 | s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data; |
5304 | |
|
5305 | 0 | if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)) |
5306 | 0 | { |
5307 | 0 | UWORD8 i; |
5308 | |
|
5309 | 0 | UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred; |
5310 | |
|
5311 | 0 | for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++) |
5312 | 0 | { |
5313 | 0 | pu_t *ps_pu; |
5314 | |
|
5315 | 0 | WORD32 inter_pu_wd; |
5316 | 0 | WORD32 inter_pu_ht; |
5317 | |
|
5318 | 0 | ps_pu = ps_inter_cand->as_inter_pu + i; |
5319 | |
|
5320 | 0 | inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ |
5321 | 0 | inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; |
5322 | 0 | inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2); |
5323 | 0 | ihevce_chroma_inter_pred_pu( |
5324 | 0 | &ps_ctxt->s_mc_ctxt, |
5325 | 0 | ps_pu, |
5326 | 0 | pu1_pred, |
5327 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride); |
5328 | 0 | if(!!ps_inter_cand->b3_part_size) |
5329 | 0 | { |
5330 | | /* 2Nx__ partion case */ |
5331 | 0 | if(inter_pu_wd == cu_size) |
5332 | 0 | { |
5333 | 0 | pu1_pred += |
5334 | 0 | (inter_pu_ht * |
5335 | 0 | s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride); |
5336 | 0 | } |
5337 | | |
5338 | | /* __x2N partion case */ |
5339 | 0 | if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2))) |
5340 | 0 | { |
5341 | 0 | pu1_pred += inter_pu_wd; |
5342 | 0 | } |
5343 | 0 | } |
5344 | 0 | } |
5345 | 0 | } |
5346 | |
|
5347 | | #if !ENABLE_TOP_DOWN_TU_RECURSION |
5348 | | total_rdopt_cost = ihevce_tu_tree_selector( |
5349 | | ps_ctxt, |
5350 | | as_tu_nodes, |
5351 | | &s_buffer_data_for_tu, |
5352 | | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
5353 | | .s_cabac_ctxt.au1_ctxt_models[0], |
5354 | | recon_func_mode, |
5355 | | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
5356 | | i4_alpha_stim_multiplier, |
5357 | | u1_is_cu_noisy, |
5358 | | #endif |
5359 | | 0, |
5360 | | ps_ctxt->u1_max_inter_tr_depth, |
5361 | | ps_inter_cand->b3_part_size, |
5362 | | u1_compute_spatial_ssd); |
5363 | | #else |
5364 | 0 | total_rdopt_cost = ihevce_topDown_tu_tree_selector( |
5365 | 0 | ps_ctxt, |
5366 | 0 | as_tu_nodes, |
5367 | 0 | &s_buffer_data_for_tu, |
5368 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
5369 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
5370 | 0 | recon_func_mode, |
5371 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
5372 | 0 | i4_alpha_stim_multiplier, |
5373 | 0 | u1_is_cu_noisy, |
5374 | 0 | #endif |
5375 | 0 | 0, |
5376 | 0 | ps_ctxt->u1_max_inter_tr_depth, |
5377 | 0 | ps_inter_cand->b3_part_size, |
5378 | 0 | INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), |
5379 | 0 | u1_compute_spatial_ssd); |
5380 | 0 | #endif |
5381 | |
|
5382 | 0 | ps_final_prms->u2_num_tus_in_cu = 0; |
5383 | 0 | ps_final_prms->u4_cu_luma_res_bits = 0; |
5384 | 0 | ps_final_prms->u4_cu_sad = 0; |
5385 | 0 | total_rdopt_cost = 0; |
5386 | 0 | ecd_data_bytes_cons = 0; |
5387 | 0 | cu_bits = 0; |
5388 | 0 | #if ENABLE_INTER_ZCU_COST |
5389 | 0 | ps_ctxt->i8_cu_not_coded_cost = 0; |
5390 | 0 | #endif |
5391 | 0 | ps_final_prms->u1_is_cu_coded = 0; |
5392 | 0 | ps_final_prms->u1_cu_size = cu_size; |
5393 | |
|
5394 | 0 | ihevce_tu_selector_debriefer( |
5395 | 0 | as_tu_nodes, |
5396 | 0 | ps_final_prms, |
5397 | 0 | &total_rdopt_cost, |
5398 | 0 | #if ENABLE_INTER_ZCU_COST |
5399 | 0 | &ps_ctxt->i8_cu_not_coded_cost, |
5400 | 0 | #endif |
5401 | 0 | &ecd_data_bytes_cons, |
5402 | 0 | &cu_bits, |
5403 | 0 | &ps_final_prms->u2_num_tus_in_cu, |
5404 | 0 | ps_ctxt->i4_cu_qp, |
5405 | 0 | cu_pos_x * 4, |
5406 | 0 | cu_pos_y * 4, |
5407 | 0 | INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0), |
5408 | 0 | (ps_ctxt->u1_chroma_array_type == 2), |
5409 | 0 | POS_TL); |
5410 | |
|
5411 | 0 | if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))) |
5412 | 0 | { |
5413 | 0 | ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons; |
5414 | 0 | } |
5415 | | |
5416 | | /* Modify the cost function for this CU. */ |
5417 | | /* loop in for 8x8 blocks */ |
5418 | 0 | if(ps_ctxt->u1_enable_psyRDOPT) |
5419 | 0 | { |
5420 | 0 | UWORD8 *pu1_recon_cu; |
5421 | 0 | WORD32 recon_stride; |
5422 | 0 | WORD32 curr_pos_x; |
5423 | 0 | WORD32 curr_pos_y; |
5424 | 0 | WORD32 start_index; |
5425 | 0 | WORD32 num_horz_cu_in_ctb; |
5426 | 0 | WORD32 had_block_size; |
5427 | | |
5428 | | /* tODO: sreenivasa ctb size has to be used appropriately */ |
5429 | 0 | had_block_size = 8; |
5430 | 0 | num_horz_cu_in_ctb = 64 / had_block_size; |
5431 | |
|
5432 | 0 | curr_pos_x = cu_pos_x << 2; /* pel units */ |
5433 | 0 | curr_pos_y = cu_pos_y << 2; /* pel units */ |
5434 | 0 | recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride; |
5435 | 0 | pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore |
5436 | 0 | .apv_luma_recon_bufs[0]); // already pointing to the current CU recon |
5437 | | //+ \curr_pos_x + curr_pos_y * recon_stride; |
5438 | | |
5439 | | /* start index to index the source satd of curr cu int he current ctb*/ |
5440 | 0 | start_index = |
5441 | 0 | (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
5442 | |
|
5443 | 0 | { |
5444 | 0 | total_rdopt_cost += ihevce_psy_rd_cost( |
5445 | 0 | ps_ctxt->ai4_source_satd_8x8, |
5446 | 0 | pu1_recon_cu, |
5447 | 0 | recon_stride, |
5448 | 0 | 1, //howz stride |
5449 | 0 | cu_size, |
5450 | 0 | 0, // pic type |
5451 | 0 | 0, //layer id |
5452 | 0 | ps_ctxt->i4_satd_lamda, // lambda |
5453 | 0 | start_index, |
5454 | 0 | ps_ctxt->u1_is_input_data_hbd, |
5455 | 0 | ps_ctxt->u4_psy_strength, |
5456 | 0 | &ps_ctxt->s_cmn_opt_func); // 8 bit |
5457 | 0 | } |
5458 | 0 | } |
5459 | |
|
5460 | 0 | ps_final_prms->u1_chroma_intra_pred_mode = 4; |
5461 | | |
5462 | | /* update the bytes consumed */ |
5463 | 0 | ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons; |
5464 | | |
5465 | | /* store the current cu size to final prms */ |
5466 | 0 | ps_final_prms->u1_cu_size = cu_size; |
5467 | | /* ------------- Chroma processing -------------- */ |
5468 | | /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/ |
5469 | 0 | if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt && |
5470 | 0 | !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))) |
5471 | 0 | { |
5472 | 0 | LWORD64 chrm_rdopt_cost; |
5473 | 0 | WORD32 chrm_rdopt_tu_bits; |
5474 | | |
5475 | | /* Store the current RDOPT cost to enable early exit in chrom_prcs */ |
5476 | 0 | ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost; |
5477 | |
|
5478 | 0 | chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt( |
5479 | 0 | ps_ctxt, |
5480 | 0 | curr_buf_idx, |
5481 | 0 | 0, /* TU mode : Don't care in Inter patrh */ |
5482 | 0 | ps_chrm_cu_buf_prms->pu1_curr_src, |
5483 | 0 | ps_chrm_cu_buf_prms->i4_chrm_src_stride, |
5484 | 0 | ps_chrm_cu_buf_prms->pu1_cu_left, |
5485 | 0 | ps_chrm_cu_buf_prms->pu1_cu_top, |
5486 | 0 | ps_chrm_cu_buf_prms->pu1_cu_top_left, |
5487 | 0 | ps_chrm_cu_buf_prms->i4_cu_left_stride, |
5488 | 0 | (cu_pos_x >> 1), |
5489 | 0 | (cu_pos_y >> 1), |
5490 | 0 | &chrm_rdopt_tu_bits, |
5491 | 0 | i4_alpha_stim_multiplier, |
5492 | 0 | u1_is_cu_noisy); |
5493 | |
|
5494 | 0 | #if WEIGH_CHROMA_COST |
5495 | 0 | chrm_rdopt_cost = (LWORD64)( |
5496 | 0 | (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
5497 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
5498 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
5499 | 0 | #endif |
5500 | |
|
5501 | 0 | #if CHROMA_RDOPT_ENABLE |
5502 | 0 | total_rdopt_cost += chrm_rdopt_cost; |
5503 | 0 | #endif |
5504 | 0 | cu_bits += chrm_rdopt_tu_bits; |
5505 | | |
5506 | | /* during chroma evaluation if skip decision was over written */ |
5507 | | /* then the current skip candidate is set to a non skip candidate */ |
5508 | 0 | ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag; |
5509 | | |
5510 | | /* cu bits for chroma residual if chroma rdopt is on */ |
5511 | | /* if zero_cbf eval is disabled then cu bits will be zero */ |
5512 | 0 | ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits; |
5513 | |
|
5514 | 0 | if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
5515 | 0 | { |
5516 | | /* Early exit : If the current running cost exceeds |
5517 | | the prev. best mode cost, break */ |
5518 | 0 | if(total_rdopt_cost > prev_best_rdopt_cost) |
5519 | 0 | { |
5520 | 0 | return (total_rdopt_cost); |
5521 | 0 | } |
5522 | 0 | } |
5523 | 0 | } |
5524 | 0 | else |
5525 | 0 | {} |
5526 | | |
5527 | 0 | #if SHRINK_INTER_TUTREE |
5528 | | /* ------------- Quadtree TU split optimization ------------ */ |
5529 | 0 | if(ps_final_prms->u1_is_cu_coded) |
5530 | 0 | { |
5531 | 0 | ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree( |
5532 | 0 | &ps_final_prms->as_tu_enc_loop[0], |
5533 | 0 | &ps_final_prms->as_tu_enc_loop_temp_prms[0], |
5534 | 0 | &ps_final_prms->s_recon_datastore, |
5535 | 0 | ps_final_prms->u2_num_tus_in_cu, |
5536 | 0 | (ps_ctxt->u1_chroma_array_type == 2)); |
5537 | 0 | } |
5538 | 0 | #endif |
5539 | | |
5540 | | /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */ |
5541 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
5542 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx] |
5543 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
5544 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
5545 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
5546 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
5547 | | |
5548 | | /* -------- Bit estimate for RD opt -------------- */ |
5549 | 0 | { |
5550 | 0 | nbr_avail_flags_t s_nbr; |
5551 | | /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/ |
5552 | 0 | WORD32 cbf_bits, header_bits; |
5553 | | |
5554 | | /* get the neighbour availability flags for current cu */ |
5555 | 0 | ihevce_get_only_nbr_flag( |
5556 | 0 | &s_nbr, |
5557 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
5558 | 0 | ps_ctxt->i4_nbr_map_strd, |
5559 | 0 | cu_pos_x, |
5560 | 0 | cu_pos_y, |
5561 | 0 | (cu_size >> 2), |
5562 | 0 | (cu_size >> 2)); |
5563 | | |
5564 | | /* call the entropy rdo encode to get the bit estimate for current cu */ |
5565 | 0 | header_bits = ihevce_entropy_rdo_encode_cu( |
5566 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, |
5567 | 0 | ps_final_prms, |
5568 | 0 | (cu_pos_x >> 1), /* back to 8x8 pel units */ |
5569 | 0 | (cu_pos_y >> 1), /* back to 8x8 pel units */ |
5570 | 0 | cu_size, |
5571 | 0 | ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail |
5572 | 0 | : s_nbr.u1_top_avail, |
5573 | 0 | s_nbr.u1_left_avail, |
5574 | 0 | &ps_final_prms->pu1_cu_coeffs[0], |
5575 | 0 | &cbf_bits); |
5576 | |
|
5577 | 0 | cu_bits += header_bits; |
5578 | | |
5579 | | /* cbf bits are excluded from header bits, instead considered as texture bits */ |
5580 | | /* incase if zero cbf eval is disabled then texture bits gets added here */ |
5581 | 0 | ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits); |
5582 | 0 | ps_final_prms->u4_cu_cbf_bits = cbf_bits; |
5583 | |
|
5584 | 0 | #if RDOPT_ENABLE |
5585 | | /* add the cost of coding the header bits */ |
5586 | 0 | total_rdopt_cost += |
5587 | 0 | COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT); |
5588 | |
|
5589 | 0 | #if ENABLE_INTER_ZCU_COST |
5590 | | /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */ |
5591 | 0 | if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level)) |
5592 | 0 | { |
5593 | 0 | LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost; |
5594 | |
|
5595 | 0 | WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) && |
5596 | 0 | (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag); |
5597 | |
|
5598 | 0 | cab_ctxt_t *ps_cab_ctxt = |
5599 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt; |
5600 | | |
5601 | | /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */ |
5602 | 0 | UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12; |
5603 | | |
5604 | | /* account for coding qt_root_cbf = 0 */ |
5605 | | /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */ |
5606 | 0 | u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0]; |
5607 | 0 | if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]) |
5608 | 0 | u4_cu_hdr_bits_q12 = 0; |
5609 | 0 | else |
5610 | 0 | u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1]; |
5611 | | |
5612 | | /* add the cost of coding the header bits */ |
5613 | 0 | i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30( |
5614 | 0 | u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */, |
5615 | 0 | ps_ctxt->i8_cl_ssd_lambda_qf, |
5616 | 0 | (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); |
5617 | |
|
5618 | 0 | if(ps_ctxt->u1_enable_psyRDOPT) |
5619 | 0 | { |
5620 | 0 | i8_cu_not_coded_cost = total_rdopt_cost + 1; |
5621 | 0 | } |
5622 | | |
5623 | | /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */ |
5624 | 0 | if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu)) |
5625 | 0 | { |
5626 | 0 | WORD32 tx_size; |
5627 | | |
5628 | | /* force cu as not coded and update the cost */ |
5629 | 0 | ps_final_prms->u1_is_cu_coded = 0; |
5630 | 0 | ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0; |
5631 | 0 | ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0; |
5632 | |
|
5633 | 0 | total_rdopt_cost = i8_cu_not_coded_cost; |
5634 | | |
5635 | | /* reset num TUs to 1 unless cu size id 64 */ |
5636 | 0 | ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1; |
5637 | 0 | trans_size = (64 == cu_size) ? 32 : cu_size; |
5638 | 0 | GETRANGE(tx_size, trans_size); |
5639 | | |
5640 | | /* reset the bytes consumed */ |
5641 | 0 | ps_final_prms->i4_num_bytes_ecd_data = 0; |
5642 | | |
5643 | | /* reset texture related bits and roll back header bits*/ |
5644 | 0 | ps_final_prms->u4_cu_cbf_bits = 0; |
5645 | 0 | ps_final_prms->u4_cu_luma_res_bits = 0; |
5646 | 0 | ps_final_prms->u4_cu_chroma_res_bits = 0; |
5647 | 0 | ps_final_prms->u4_cu_hdr_bits = |
5648 | 0 | (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q; |
5649 | | |
5650 | | /* update cabac model with qtroot cbf = 0 decision */ |
5651 | 0 | ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] = |
5652 | 0 | gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1]; |
5653 | | |
5654 | | /* restore untouched cabac models for, tusplit, cbfs, texture etc */ |
5655 | 0 | memcpy( |
5656 | 0 | &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM], |
5657 | 0 | &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM], |
5658 | 0 | (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM)); |
5659 | | |
5660 | | /* mark all tus as not coded for final eval */ |
5661 | 0 | for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++) |
5662 | 0 | { |
5663 | 0 | WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0; |
5664 | 0 | WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0; |
5665 | |
|
5666 | 0 | nbr_4x4_t *ps_cur_nbr_4x4 = |
5667 | 0 | ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu); |
5668 | |
|
5669 | 0 | num_4x4_in_tu = trans_size >> 2; |
5670 | |
|
5671 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0; |
5672 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0; |
5673 | 0 | ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0; |
5674 | |
|
5675 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0; |
5676 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0; |
5677 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0; |
5678 | |
|
5679 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0; |
5680 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0; |
5681 | |
|
5682 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3; |
5683 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x; |
5684 | 0 | ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y; |
5685 | | |
5686 | | /* reset cbf for the all 4x4 in TU */ |
5687 | 0 | { |
5688 | 0 | WORD32 i, j; |
5689 | 0 | nbr_4x4_t *ps_tmp_4x4; |
5690 | 0 | ps_tmp_4x4 = ps_cur_nbr_4x4; |
5691 | |
|
5692 | 0 | for(i = 0; i < num_4x4_in_tu; i++) |
5693 | 0 | { |
5694 | 0 | for(j = 0; j < num_4x4_in_tu; j++) |
5695 | 0 | { |
5696 | 0 | ps_tmp_4x4[j].b1_y_cbf = 0; |
5697 | 0 | } |
5698 | | /* row level update*/ |
5699 | 0 | ps_tmp_4x4 += num_4x4_in_cu; |
5700 | 0 | } |
5701 | 0 | } |
5702 | 0 | } |
5703 | 0 | } |
5704 | 0 | } |
5705 | 0 | #endif /* ENABLE_INTER_ZCU_COST */ |
5706 | |
|
5707 | 0 | #endif /* RDOPT_ENABLE */ |
5708 | 0 | } |
5709 | |
|
5710 | 0 | return (total_rdopt_cost); |
5711 | 0 | } |
5712 | | #endif |
5713 | | |
5714 | | /*! |
5715 | | ****************************************************************************** |
5716 | | * \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif |
5717 | | * |
5718 | | * \brief |
5719 | | * Inter Coding unit funtion which performs MC and MVP calc for RD opt mode |
5720 | | * |
5721 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
5722 | | * \param[in] ps_inter_cand pointer to inter candidate structure |
5723 | | * \param[in] cu_size Current CU size |
5724 | | * \param[in] cu_pos_x cu position x w.r.t to ctb |
5725 | | * \param[in] cu_pos_y cu position y w.r.t to ctb |
5726 | | * \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer |
5727 | | * \param[in] ps_top_nbr_4x4 top neighbour 4x4 structure pointer |
5728 | | * \param[in] ps_topleft_nbr_4x4 top left neighbour 4x4 structure pointer |
5729 | | * \param[in] nbr_4x4_left_strd left neighbour 4x4 buffer stride |
5730 | | * \param[in] curr_buf_idx Current Buffer index |
5731 | | * |
5732 | | * \return |
5733 | | * Rdopt cost |
5734 | | * |
5735 | | * \author |
5736 | | * Ittiam |
5737 | | * |
5738 | | ***************************************************************************** |
5739 | | */ |
5740 | | LWORD64 ihevce_inter_rdopt_cu_mc_mvp( |
5741 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
5742 | | cu_inter_cand_t *ps_inter_cand, |
5743 | | WORD32 cu_size, |
5744 | | WORD32 cu_pos_x, |
5745 | | WORD32 cu_pos_y, |
5746 | | nbr_4x4_t *ps_left_nbr_4x4, |
5747 | | nbr_4x4_t *ps_top_nbr_4x4, |
5748 | | nbr_4x4_t *ps_topleft_nbr_4x4, |
5749 | | WORD32 nbr_4x4_left_strd, |
5750 | | WORD32 curr_buf_idx) |
5751 | 0 | { |
5752 | | /* local variables */ |
5753 | 0 | enc_loop_cu_final_prms_t *ps_final_prms; |
5754 | 0 | nbr_avail_flags_t s_nbr; |
5755 | 0 | nbr_4x4_t *ps_nbr_4x4; |
5756 | |
|
5757 | 0 | UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND]; |
5758 | 0 | UWORD8 *pu1_pred; |
5759 | 0 | WORD32 rdopt_cost; |
5760 | 0 | WORD32 ctr; |
5761 | 0 | WORD32 num_cu_part; |
5762 | 0 | WORD32 inter_pu_wd; |
5763 | 0 | WORD32 inter_pu_ht; |
5764 | 0 | WORD32 pred_stride; |
5765 | | |
5766 | | /* get the pointers based on curbuf idx */ |
5767 | 0 | ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0]; |
5768 | 0 | ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx]; |
5769 | 0 | pu1_pred = ps_inter_cand->pu1_pred_data; |
5770 | |
|
5771 | 0 | pred_stride = ps_inter_cand->i4_pred_data_stride; |
5772 | | |
5773 | | /* store the partition mode in final prms */ |
5774 | 0 | ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size; |
5775 | | |
5776 | | /* since encoder does not support NXN part type */ |
5777 | | /* num parts can be either 1 or 2 only */ |
5778 | 0 | ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size); |
5779 | | |
5780 | 0 | num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1; |
5781 | | |
5782 | | /* get the 4x4 level position of current cu */ |
5783 | 0 | cu_pos_x = cu_pos_x << 1; |
5784 | 0 | cu_pos_y = cu_pos_y << 1; |
5785 | | |
5786 | | /* populate cu level params */ |
5787 | 0 | ps_final_prms->u1_intra_flag = PRED_MODE_INTER; |
5788 | 0 | ps_final_prms->u2_num_pus_in_cu = num_cu_part; |
5789 | | |
5790 | | /* run a loop over all the partitons in cu */ |
5791 | 0 | for(ctr = 0; ctr < num_cu_part; ctr++) |
5792 | 0 | { |
5793 | 0 | pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND]; |
5794 | 0 | pu_t *ps_pu; |
5795 | 0 | WORD32 skip_or_merge_flag; |
5796 | 0 | UWORD8 u1_use_mvp_from_top_row; |
5797 | |
|
5798 | 0 | ps_pu = &ps_inter_cand->as_inter_pu[ctr]; |
5799 | | |
5800 | | /* IF AMP then each partitions can have diff wd ht */ |
5801 | 0 | inter_pu_wd = (ps_pu->b4_wd + 1) << 2; |
5802 | 0 | inter_pu_ht = (ps_pu->b4_ht + 1) << 2; |
5803 | | |
5804 | | /* populate reference pic buf id for bs compute */ |
5805 | | |
5806 | | /* L0 */ |
5807 | 0 | if(-1 != ps_pu->mv.i1_l0_ref_idx) |
5808 | 0 | { |
5809 | 0 | ps_pu->mv.i1_l0_ref_pic_buf_id = |
5810 | 0 | ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id; |
5811 | 0 | } |
5812 | | |
5813 | | /* L1 */ |
5814 | 0 | if(-1 != ps_pu->mv.i1_l1_ref_idx) |
5815 | 0 | { |
5816 | 0 | ps_pu->mv.i1_l1_ref_pic_buf_id = |
5817 | 0 | ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id; |
5818 | 0 | } |
5819 | | |
5820 | | /* SKIP or merge check for every part */ |
5821 | 0 | skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag; |
5822 | | |
5823 | | /* ----------- MV Prediction ----------------- */ |
5824 | 0 | if(0 == skip_or_merge_flag) |
5825 | 0 | { |
5826 | | /* get the neighbour availability flags */ |
5827 | 0 | ihevce_get_only_nbr_flag( |
5828 | 0 | &s_nbr, |
5829 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
5830 | 0 | ps_ctxt->i4_nbr_map_strd, |
5831 | 0 | cu_pos_x, |
5832 | 0 | cu_pos_y, |
5833 | 0 | inter_pu_wd >> 2, |
5834 | 0 | inter_pu_ht >> 2); |
5835 | |
|
5836 | 0 | if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0)) |
5837 | 0 | { |
5838 | 0 | u1_use_mvp_from_top_row = 0; |
5839 | 0 | } |
5840 | 0 | else |
5841 | 0 | { |
5842 | 0 | u1_use_mvp_from_top_row = 1; |
5843 | 0 | } |
5844 | |
|
5845 | 0 | if(!u1_use_mvp_from_top_row) |
5846 | 0 | { |
5847 | 0 | if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail) |
5848 | 0 | { |
5849 | 0 | if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail) |
5850 | 0 | { |
5851 | 0 | WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos; |
5852 | | |
5853 | | /* Ensure Top Right Sync */ |
5854 | 0 | if(!ps_ctxt->u1_use_top_at_ctb_boundary) |
5855 | 0 | { |
5856 | 0 | curr_cu_pos_in_row = |
5857 | 0 | ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2); |
5858 | |
|
5859 | 0 | if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0) |
5860 | 0 | { |
5861 | | /* No wait for 1st row */ |
5862 | 0 | cu_top_right_offset = -(MAX_CTB_SIZE); |
5863 | 0 | { |
5864 | 0 | ihevce_tile_params_t *ps_col_tile_params = |
5865 | 0 | ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + |
5866 | 0 | ps_ctxt->i4_tile_col_idx); |
5867 | | |
5868 | | /* No wait for 1st row */ |
5869 | 0 | cu_top_right_offset = |
5870 | 0 | -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE)); |
5871 | 0 | } |
5872 | 0 | cu_top_right_dep_pos = 0; |
5873 | 0 | } |
5874 | 0 | else |
5875 | 0 | { |
5876 | 0 | cu_top_right_offset = (cu_size) + 4; |
5877 | 0 | cu_top_right_dep_pos = |
5878 | 0 | (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1; |
5879 | 0 | } |
5880 | |
|
5881 | 0 | ihevce_dmgr_chk_row_row_sync( |
5882 | 0 | ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right, |
5883 | 0 | curr_cu_pos_in_row, |
5884 | 0 | cu_top_right_offset, |
5885 | 0 | cu_top_right_dep_pos, |
5886 | 0 | ps_ctxt->i4_tile_col_idx, /* Col Tile No. */ |
5887 | 0 | ps_ctxt->thrd_id); |
5888 | 0 | } |
5889 | |
|
5890 | 0 | u1_use_mvp_from_top_row = 1; |
5891 | 0 | } |
5892 | 0 | else |
5893 | 0 | { |
5894 | 0 | s_nbr.u1_top_avail = 0; |
5895 | 0 | s_nbr.u1_top_lt_avail = 0; |
5896 | 0 | s_nbr.u1_top_rt_avail = 0; |
5897 | 0 | } |
5898 | 0 | } |
5899 | 0 | else |
5900 | 0 | { |
5901 | 0 | u1_use_mvp_from_top_row = 1; |
5902 | 0 | } |
5903 | 0 | } |
5904 | | /* Call the MV prediction module to get MVP */ |
5905 | 0 | ihevce_mv_pred( |
5906 | 0 | &ps_ctxt->s_mv_pred_ctxt, |
5907 | 0 | ps_top_nbr_4x4, |
5908 | 0 | ps_left_nbr_4x4, |
5909 | 0 | ps_topleft_nbr_4x4, |
5910 | 0 | nbr_4x4_left_strd, |
5911 | 0 | &s_nbr, |
5912 | 0 | NULL, /* colocated MV */ |
5913 | 0 | ps_pu, |
5914 | 0 | &as_pred_mv[0], |
5915 | 0 | au1_is_top_used); |
5916 | 0 | } |
5917 | | |
5918 | | /* store the nbr 4x4 structure */ |
5919 | 0 | ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag; |
5920 | 0 | ps_nbr_4x4->b1_intra_flag = 0; |
5921 | 0 | ps_nbr_4x4->b1_pred_l0_flag = 0; |
5922 | 0 | ps_nbr_4x4->b1_pred_l1_flag = 0; |
5923 | | |
5924 | | /* DC is default mode for inter cu, required for intra mode signalling */ |
5925 | 0 | ps_nbr_4x4->b6_luma_intra_mode = 1; |
5926 | | |
5927 | | /* copy the motion vectors to neighbour structure */ |
5928 | 0 | ps_nbr_4x4->mv = ps_pu->mv; |
5929 | | |
5930 | | /* copy the PU to final out pu */ |
5931 | 0 | ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu; |
5932 | | |
5933 | | /* copy the PU to chroma */ |
5934 | 0 | ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu; |
5935 | | |
5936 | | /* store the skip flag to final prms */ |
5937 | 0 | ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag; |
5938 | | |
5939 | | /* MVP index & MVD calc is gated on skip/merge flag */ |
5940 | 0 | if(0 == skip_or_merge_flag) |
5941 | 0 | { |
5942 | | /* calculate the MVDs and popluate the MVP idx for L0 */ |
5943 | 0 | if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode)) |
5944 | 0 | { |
5945 | 0 | WORD32 idx0_cost, idx1_cost; |
5946 | | |
5947 | | /* calculate the ABS mvd for cand 0 */ |
5948 | 0 | idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx); |
5949 | 0 | idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy); |
5950 | | |
5951 | | /* calculate the ABS mvd for cand 1 */ |
5952 | 0 | if(u1_use_mvp_from_top_row) |
5953 | 0 | { |
5954 | 0 | idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx); |
5955 | 0 | idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy); |
5956 | 0 | } |
5957 | 0 | else |
5958 | 0 | { |
5959 | 0 | idx1_cost = INT_MAX; |
5960 | 0 | } |
5961 | | |
5962 | | /* based on the least cost choose the mvp idx */ |
5963 | 0 | if(idx0_cost <= idx1_cost) |
5964 | 0 | { |
5965 | 0 | ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -= |
5966 | 0 | as_pred_mv[0].s_l0_mv.i2_mvx; |
5967 | 0 | ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -= |
5968 | 0 | as_pred_mv[0].s_l0_mv.i2_mvy; |
5969 | |
|
5970 | 0 | ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0; |
5971 | 0 | } |
5972 | 0 | else |
5973 | 0 | { |
5974 | 0 | ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -= |
5975 | 0 | as_pred_mv[1].s_l0_mv.i2_mvx; |
5976 | 0 | ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -= |
5977 | 0 | as_pred_mv[1].s_l0_mv.i2_mvy; |
5978 | |
|
5979 | 0 | ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1; |
5980 | 0 | } |
5981 | | |
5982 | | /* set the pred l0 flag for neighbour storage */ |
5983 | 0 | ps_nbr_4x4->b1_pred_l0_flag = 1; |
5984 | 0 | } |
5985 | | /* calculate the MVDs and popluate the MVP idx for L1 */ |
5986 | 0 | if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode)) |
5987 | 0 | { |
5988 | 0 | WORD32 idx0_cost, idx1_cost; |
5989 | | |
5990 | | /* calculate the ABS mvd for cand 0 */ |
5991 | 0 | idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx); |
5992 | 0 | idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy); |
5993 | | |
5994 | | /* calculate the ABS mvd for cand 1 */ |
5995 | 0 | if(u1_use_mvp_from_top_row) |
5996 | 0 | { |
5997 | 0 | idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx); |
5998 | 0 | idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy); |
5999 | 0 | } |
6000 | 0 | else |
6001 | 0 | { |
6002 | 0 | idx1_cost = INT_MAX; |
6003 | 0 | } |
6004 | | |
6005 | | /* based on the least cost choose the mvp idx */ |
6006 | 0 | if(idx0_cost <= idx1_cost) |
6007 | 0 | { |
6008 | 0 | ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -= |
6009 | 0 | as_pred_mv[0].s_l1_mv.i2_mvx; |
6010 | 0 | ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -= |
6011 | 0 | as_pred_mv[0].s_l1_mv.i2_mvy; |
6012 | |
|
6013 | 0 | ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0; |
6014 | 0 | } |
6015 | 0 | else |
6016 | 0 | { |
6017 | 0 | ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -= |
6018 | 0 | as_pred_mv[1].s_l1_mv.i2_mvx; |
6019 | 0 | ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -= |
6020 | 0 | as_pred_mv[1].s_l1_mv.i2_mvy; |
6021 | |
|
6022 | 0 | ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1; |
6023 | 0 | } |
6024 | | |
6025 | | /* set the pred l1 flag for neighbour storage */ |
6026 | 0 | ps_nbr_4x4->b1_pred_l1_flag = 1; |
6027 | 0 | } |
6028 | | |
6029 | | /* set the merge flag to 0 */ |
6030 | 0 | ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0; |
6031 | 0 | ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0; |
6032 | 0 | } |
6033 | 0 | else |
6034 | 0 | { |
6035 | | /* copy the merge index from candidate */ |
6036 | 0 | ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag; |
6037 | |
|
6038 | 0 | ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx; |
6039 | |
|
6040 | 0 | if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode)) |
6041 | 0 | { |
6042 | | /* set the pred l0 flag for neighbour storage */ |
6043 | 0 | ps_nbr_4x4->b1_pred_l0_flag = 1; |
6044 | 0 | } |
6045 | | |
6046 | | /* calculate the MVDs and popluate the MVP idx for L1 */ |
6047 | 0 | if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode)) |
6048 | 0 | { |
6049 | | /* set the pred l1 flag for neighbour storage */ |
6050 | 0 | ps_nbr_4x4->b1_pred_l1_flag = 1; |
6051 | 0 | } |
6052 | 0 | } |
6053 | | |
6054 | | /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */ |
6055 | 0 | rdopt_cost = 0; |
6056 | | |
6057 | | /* copy the MV to colocated Mv structure */ |
6058 | 0 | ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv; |
6059 | 0 | ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv; |
6060 | 0 | ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx; |
6061 | 0 | ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx; |
6062 | 0 | ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode; |
6063 | 0 | ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0; |
6064 | | |
6065 | | /* replicate neighbour 4x4 strcuture for entire partition */ |
6066 | 0 | { |
6067 | 0 | WORD32 i, j; |
6068 | 0 | nbr_4x4_t *ps_tmp_4x4; |
6069 | |
|
6070 | 0 | ps_tmp_4x4 = ps_nbr_4x4; |
6071 | |
|
6072 | 0 | for(i = 0; i < (inter_pu_ht >> 2); i++) |
6073 | 0 | { |
6074 | 0 | for(j = 0; j < (inter_pu_wd >> 2); j++) |
6075 | 0 | { |
6076 | 0 | ps_tmp_4x4[j] = *ps_nbr_4x4; |
6077 | 0 | } |
6078 | | /* row level update*/ |
6079 | 0 | ps_tmp_4x4 += (cu_size >> 2); |
6080 | 0 | } |
6081 | 0 | } |
6082 | | /* set the neighbour map to 1 */ |
6083 | 0 | ihevce_set_inter_nbr_map( |
6084 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
6085 | 0 | ps_ctxt->i4_nbr_map_strd, |
6086 | 0 | cu_pos_x, |
6087 | 0 | cu_pos_y, |
6088 | 0 | (inter_pu_wd >> 2), |
6089 | 0 | (inter_pu_ht >> 2), |
6090 | 0 | 1); |
6091 | | /* ----------- Motion Compensation for Luma ----------- */ |
6092 | | #if !ENABLE_MIXED_INTER_MODE_EVAL |
6093 | | { |
6094 | | IV_API_CALL_STATUS_T valid_mv_cand; |
6095 | | |
6096 | | /*If the inter candidate is neither merge cand nor skip cand |
6097 | | then calculate the mc.*/ |
6098 | | if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on)) |
6099 | | { |
6100 | | valid_mv_cand = |
6101 | | ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0); |
6102 | | |
6103 | | /* assert if the MC is given a valid mv candidate */ |
6104 | | ASSERT(valid_mv_cand == IV_SUCCESS); |
6105 | | } |
6106 | | } |
6107 | | #endif |
6108 | 0 | if((2 == num_cu_part) && (0 == ctr)) |
6109 | 0 | { |
6110 | | /* 2Nx__ partion case */ |
6111 | 0 | if(inter_pu_wd == cu_size) |
6112 | 0 | { |
6113 | 0 | cu_pos_y += (inter_pu_ht >> 2); |
6114 | 0 | pu1_pred += (inter_pu_ht * pred_stride); |
6115 | 0 | ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2); |
6116 | 0 | ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd; |
6117 | 0 | ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2); |
6118 | 0 | ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd; |
6119 | 0 | } |
6120 | | |
6121 | | /* __x2N partion case */ |
6122 | 0 | if(inter_pu_ht == cu_size) |
6123 | 0 | { |
6124 | 0 | cu_pos_x += (inter_pu_wd >> 2); |
6125 | 0 | pu1_pred += inter_pu_wd; |
6126 | 0 | ps_nbr_4x4 += (inter_pu_wd >> 2); |
6127 | 0 | ps_left_nbr_4x4 = ps_nbr_4x4 - 1; |
6128 | 0 | ps_top_nbr_4x4 += (inter_pu_wd >> 2); |
6129 | 0 | ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1; |
6130 | 0 | nbr_4x4_left_strd = (cu_size >> 2); |
6131 | 0 | } |
6132 | 0 | } |
6133 | 0 | } |
6134 | |
|
6135 | 0 | return (rdopt_cost); |
6136 | 0 | } |
6137 | | |
6138 | | /*! |
6139 | | ****************************************************************************** |
6140 | | * \if Function name : ihevce_intra_chroma_pred_mode_selector \endif |
6141 | | * |
6142 | | * \brief |
6143 | | * Coding unit processing function for chroma special modes (Non-Luma modes) |
6144 | | * |
6145 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
6146 | | * \param[in] ps_chrm_cu_buf_prms ctxt having chroma related prms |
6147 | | * \param[in] ps_cu_analyse pointer to cu analyse |
6148 | | * \param[in] rd_opt_curr_idx index in the array of RDopt params |
6149 | | * \param[in] tu_mode TU_EQ_CU or other case |
6150 | | * |
6151 | | * \return |
6152 | | * Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits |
6153 | | * |
6154 | | * \author |
6155 | | * Ittiam |
6156 | | * |
6157 | | ***************************************************************************** |
6158 | | */ |
6159 | | UWORD8 ihevce_distortion_based_intra_chroma_mode_selector( |
6160 | | cu_analyse_t *ps_cu_analyse, |
6161 | | ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution, |
6162 | | pf_intra_pred *ppf_chroma_ip, |
6163 | | pf_res_trans_luma_had_chroma *ppf_resd_trns_had, |
6164 | | UWORD8 *pu1_src, |
6165 | | WORD32 i4_src_stride, |
6166 | | UWORD8 *pu1_pred, |
6167 | | WORD32 i4_pred_stride, |
6168 | | UWORD8 *pu1_ctb_nbr_map, |
6169 | | WORD32 i4_nbr_map_strd, |
6170 | | UWORD8 *pu1_ref_sub_out, |
6171 | | WORD32 i4_alpha_stim_multiplier, |
6172 | | UWORD8 u1_is_cu_noisy, |
6173 | | UWORD8 u1_trans_size, |
6174 | | UWORD8 u1_trans_idx, |
6175 | | UWORD8 u1_num_tus_in_cu, |
6176 | | UWORD8 u1_num_4x4_luma_blks_in_tu, |
6177 | | UWORD8 u1_enable_psyRDOPT, |
6178 | | UWORD8 u1_is_422) |
6179 | 0 | { |
6180 | 0 | UWORD8 u1_chrm_mode; |
6181 | 0 | UWORD8 ctr; |
6182 | 0 | WORD32 i4_subtu_idx; |
6183 | |
|
6184 | 0 | WORD32 i = 0; |
6185 | 0 | UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 }; |
6186 | 0 | WORD32 i4_satd_had[4] = { 0 }; |
6187 | 0 | WORD32 i4_best_satd_had = INT_MAX; |
6188 | 0 | UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1); |
6189 | 0 | UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1); |
6190 | 0 | WORD32 i4_num_sub_tus = u1_is_422 + 1; |
6191 | 0 | UWORD8 u1_best_chrm_mode = 0; |
6192 | | |
6193 | | /* Get the best satd among all possible modes */ |
6194 | 0 | for(i = 0; i < 4; i++) |
6195 | 0 | { |
6196 | 0 | WORD32 left_strd = i4_src_stride; |
6197 | |
|
6198 | 0 | u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]] |
6199 | 0 | : u1_chrm_modes[i]; |
6200 | | |
6201 | | /* loop based on num tus in a cu */ |
6202 | 0 | for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++) |
6203 | 0 | { |
6204 | 0 | WORD32 luma_nbr_flags; |
6205 | 0 | WORD32 chrm_pred_func_idx; |
6206 | |
|
6207 | 0 | WORD32 i4_trans_size_m2 = u1_trans_size << 1; |
6208 | 0 | UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) + |
6209 | 0 | (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422); |
6210 | 0 | UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) + |
6211 | 0 | (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422); |
6212 | 0 | WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu); |
6213 | 0 | WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu); |
6214 | |
|
6215 | 0 | luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
6216 | 0 | pu1_ctb_nbr_map, |
6217 | 0 | i4_nbr_map_strd, |
6218 | 0 | i4_curr_tu_pos_x, |
6219 | 0 | i4_curr_tu_pos_y, |
6220 | 0 | u1_num_4x4_luma_blks_in_tu, |
6221 | 0 | u1_num_4x4_luma_blks_in_tu); |
6222 | |
|
6223 | 0 | for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++) |
6224 | 0 | { |
6225 | 0 | WORD32 nbr_flags; |
6226 | |
|
6227 | 0 | UWORD8 *pu1_cur_src = |
6228 | 0 | pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride); |
6229 | 0 | UWORD8 *pu1_cur_pred = |
6230 | 0 | pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride); |
6231 | 0 | UWORD8 *pu1_left = pu1_cur_src - 2; |
6232 | 0 | UWORD8 *pu1_top = pu1_cur_src - i4_src_stride; |
6233 | 0 | UWORD8 *pu1_top_left = pu1_top - 2; |
6234 | |
|
6235 | 0 | nbr_flags = ihevce_get_intra_chroma_tu_nbr( |
6236 | 0 | luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422); |
6237 | | |
6238 | | /* call the chroma reference array substitution */ |
6239 | 0 | pf_ref_substitution( |
6240 | 0 | pu1_top_left, |
6241 | 0 | pu1_top, |
6242 | 0 | pu1_left, |
6243 | 0 | left_strd, |
6244 | 0 | u1_trans_size, |
6245 | 0 | nbr_flags, |
6246 | 0 | pu1_ref_sub_out, |
6247 | 0 | 1); |
6248 | | |
6249 | | /* use the look up to get the function idx */ |
6250 | 0 | chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode]; |
6251 | | |
6252 | | /* call the intra prediction function */ |
6253 | 0 | ppf_chroma_ip[chrm_pred_func_idx]( |
6254 | 0 | pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode); |
6255 | |
|
6256 | 0 | if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier) |
6257 | 0 | { |
6258 | | /* compute Hadamard-transform satd : Cb */ |
6259 | 0 | i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1]( |
6260 | 0 | pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0); |
6261 | | |
6262 | | /* compute Hadamard-transform satd : Cr */ |
6263 | 0 | i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1]( |
6264 | 0 | pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0); |
6265 | 0 | } |
6266 | 0 | else |
6267 | 0 | { |
6268 | 0 | WORD32 i4_satd; |
6269 | | |
6270 | | /* compute Hadamard-transform satd : Cb */ |
6271 | 0 | i4_satd = ppf_resd_trns_had[u1_trans_idx - 1]( |
6272 | 0 | pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0); |
6273 | |
|
6274 | 0 | i4_satd = ihevce_inject_stim_into_distortion( |
6275 | 0 | pu1_cur_src, |
6276 | 0 | i4_src_stride, |
6277 | 0 | pu1_cur_pred, |
6278 | 0 | i4_pred_stride, |
6279 | 0 | i4_satd, |
6280 | 0 | i4_alpha_stim_multiplier, |
6281 | 0 | u1_trans_size, |
6282 | 0 | 0, |
6283 | 0 | u1_enable_psyRDOPT, |
6284 | 0 | U_PLANE); |
6285 | |
|
6286 | 0 | i4_satd_had[i] += i4_satd; |
6287 | | |
6288 | | /* compute Hadamard-transform satd : Cr */ |
6289 | 0 | i4_satd = ppf_resd_trns_had[u1_trans_idx - 1]( |
6290 | 0 | pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0); |
6291 | |
|
6292 | 0 | i4_satd = ihevce_inject_stim_into_distortion( |
6293 | 0 | pu1_cur_src, |
6294 | 0 | i4_src_stride, |
6295 | 0 | pu1_cur_pred, |
6296 | 0 | i4_pred_stride, |
6297 | 0 | i4_satd, |
6298 | 0 | i4_alpha_stim_multiplier, |
6299 | 0 | u1_trans_size, |
6300 | 0 | 0, |
6301 | 0 | u1_enable_psyRDOPT, |
6302 | 0 | V_PLANE); |
6303 | |
|
6304 | 0 | i4_satd_had[i] += i4_satd; |
6305 | 0 | } |
6306 | 0 | } |
6307 | | |
6308 | | /* set the neighbour map to 1 */ |
6309 | 0 | ihevce_set_nbr_map( |
6310 | 0 | pu1_ctb_nbr_map, |
6311 | 0 | i4_nbr_map_strd, |
6312 | 0 | i4_curr_tu_pos_x, |
6313 | 0 | i4_curr_tu_pos_y, |
6314 | 0 | u1_num_4x4_luma_blks_in_tu, |
6315 | 0 | 1); |
6316 | 0 | } |
6317 | | |
6318 | | /* set the neighbour map to 0 */ |
6319 | 0 | ihevce_set_nbr_map( |
6320 | 0 | pu1_ctb_nbr_map, |
6321 | 0 | i4_nbr_map_strd, |
6322 | 0 | (ps_cu_analyse->b3_cu_pos_x << 1), |
6323 | 0 | (ps_cu_analyse->b3_cu_pos_y << 1), |
6324 | 0 | (ps_cu_analyse->u1_cu_size >> 2), |
6325 | 0 | 0); |
6326 | | |
6327 | | /* Get the least SATD and corresponding mode */ |
6328 | 0 | if(i4_best_satd_had > i4_satd_had[i]) |
6329 | 0 | { |
6330 | 0 | i4_best_satd_had = i4_satd_had[i]; |
6331 | 0 | u1_best_chrm_mode = u1_chrm_mode; |
6332 | 0 | } |
6333 | 0 | } |
6334 | |
|
6335 | 0 | return u1_best_chrm_mode; |
6336 | 0 | } |
6337 | | |
6338 | | void ihevce_intra_chroma_pred_mode_selector( |
6339 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
6340 | | enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms, |
6341 | | cu_analyse_t *ps_cu_analyse, |
6342 | | WORD32 rd_opt_curr_idx, |
6343 | | WORD32 tu_mode, |
6344 | | WORD32 i4_alpha_stim_multiplier, |
6345 | | UWORD8 u1_is_cu_noisy) |
6346 | 0 | { |
6347 | 0 | chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt; |
6348 | |
|
6349 | 0 | ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; |
6350 | |
|
6351 | 0 | UWORD8 *pu1_pred; |
6352 | 0 | WORD32 trans_size; |
6353 | 0 | WORD32 num_tus_in_cu; |
6354 | 0 | WORD32 pred_strd; |
6355 | 0 | WORD32 ctr; |
6356 | 0 | WORD32 i4_subtu_idx; |
6357 | 0 | WORD32 i4_num_sub_tus; |
6358 | 0 | WORD32 trans_idx; |
6359 | 0 | WORD32 scan_idx; |
6360 | 0 | WORD32 num_4x4_luma_in_tu; |
6361 | 0 | WORD32 cu_pos_x; |
6362 | 0 | WORD32 cu_pos_y; |
6363 | |
|
6364 | 0 | recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore, |
6365 | 0 | &ps_ctxt->as_cu_prms[1].s_recon_datastore }; |
6366 | |
|
6367 | 0 | LWORD64 chrm_cod_cost = 0; |
6368 | 0 | WORD32 chrm_tu_bits = 0; |
6369 | 0 | WORD32 best_chrm_mode = DM_CHROMA_IDX; |
6370 | 0 | UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src; |
6371 | 0 | WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride; |
6372 | 0 | UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left; |
6373 | 0 | UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top; |
6374 | 0 | UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left; |
6375 | 0 | WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride; |
6376 | 0 | WORD32 cu_size = ps_cu_analyse->u1_cu_size; |
6377 | 0 | WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; |
6378 | 0 | WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; |
6379 | 0 | UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); |
6380 | |
|
6381 | 0 | ihevc_intra_pred_chroma_ref_substitution_fptr = |
6382 | 0 | ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; |
6383 | 0 | i4_num_sub_tus = (u1_is_422 == 1) + 1; |
6384 | |
|
6385 | | #if DISABLE_RDOQ_INTRA |
6386 | | i4_perform_rdoq = 0; |
6387 | | #endif |
6388 | |
|
6389 | 0 | if(TU_EQ_CU == tu_mode) |
6390 | 0 | { |
6391 | 0 | num_tus_in_cu = 1; |
6392 | 0 | trans_size = cu_size >> 1; |
6393 | 0 | num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/ |
6394 | 0 | ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode]; |
6395 | 0 | } |
6396 | 0 | else |
6397 | 0 | { |
6398 | 0 | num_tus_in_cu = 4; |
6399 | 0 | trans_size = cu_size >> 2; |
6400 | 0 | num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/ |
6401 | | |
6402 | | /* For 8x8 CU only one TU */ |
6403 | 0 | if(MIN_TU_SIZE > trans_size) |
6404 | 0 | { |
6405 | 0 | trans_size = MIN_TU_SIZE; |
6406 | 0 | num_tus_in_cu = 1; |
6407 | | /* chroma nbr avail. is derived based on luma. |
6408 | | for 4x4 chrm use 8x8 luma's size */ |
6409 | 0 | num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1; |
6410 | 0 | } |
6411 | |
|
6412 | 0 | ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode]; |
6413 | 0 | } |
6414 | | |
6415 | | /* Can't be TU_EQ_SUBCU case */ |
6416 | 0 | ASSERT(TU_EQ_SUBCU != tu_mode); |
6417 | | |
6418 | | /* translate the transform size to index */ |
6419 | 0 | trans_idx = trans_size >> 2; |
6420 | |
|
6421 | 0 | pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data; |
6422 | |
|
6423 | 0 | pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride; |
6424 | | |
6425 | | /* for 16x16 cases */ |
6426 | 0 | if(16 == trans_size) |
6427 | 0 | { |
6428 | 0 | trans_idx = 3; |
6429 | 0 | } |
6430 | |
|
6431 | 0 | best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector( |
6432 | 0 | ps_cu_analyse, |
6433 | 0 | ihevc_intra_pred_chroma_ref_substitution_fptr, |
6434 | 0 | ps_ctxt->apf_chrm_ip, |
6435 | 0 | ps_ctxt->apf_chrm_resd_trns_had, |
6436 | 0 | pu1_chrm_src, |
6437 | 0 | chrm_src_stride, |
6438 | 0 | pu1_pred, |
6439 | 0 | pred_strd, |
6440 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
6441 | 0 | ps_ctxt->i4_nbr_map_strd, |
6442 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
6443 | 0 | i4_alpha_stim_multiplier, |
6444 | 0 | u1_is_cu_noisy, |
6445 | 0 | trans_size, |
6446 | 0 | trans_idx, |
6447 | 0 | num_tus_in_cu, |
6448 | 0 | num_4x4_luma_in_tu, |
6449 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
6450 | 0 | u1_is_422); |
6451 | | |
6452 | | /* Store the best chroma mode */ |
6453 | 0 | ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode; |
6454 | | |
6455 | | /* evaluate RDOPT cost for the Best mode */ |
6456 | 0 | { |
6457 | 0 | WORD32 i4_subtu_pos_x; |
6458 | 0 | WORD32 i4_subtu_pos_y; |
6459 | 0 | UWORD8 u1_compute_spatial_ssd; |
6460 | |
|
6461 | 0 | WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 }; |
6462 | 0 | WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 }; |
6463 | | /* State for prefix bin of chroma intra pred mode before CU encode */ |
6464 | 0 | UWORD8 u1_chroma_intra_mode_prefix_state = |
6465 | 0 | ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE]; |
6466 | 0 | WORD32 luma_trans_size = trans_size << 1; |
6467 | 0 | WORD32 calc_recon = 0; |
6468 | 0 | UWORD8 *pu1_left = pu1_cu_left; |
6469 | 0 | UWORD8 *pu1_top = pu1_cu_top; |
6470 | 0 | UWORD8 *pu1_top_left = pu1_cu_top_left; |
6471 | 0 | WORD32 left_strd = cu_left_stride; |
6472 | |
|
6473 | 0 | if(ps_ctxt->i1_cu_qp_delta_enable) |
6474 | 0 | { |
6475 | 0 | ihevce_update_cu_level_qp_lamda(ps_ctxt, ps_cu_analyse, luma_trans_size, 1); |
6476 | 0 | } |
6477 | |
|
6478 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
6479 | 0 | (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
6480 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
6481 | |
|
6482 | 0 | if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
6483 | 0 | { |
6484 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
6485 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
6486 | 0 | } |
6487 | | |
6488 | | /* get the 4x4 level postion of current cu */ |
6489 | 0 | cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1); |
6490 | 0 | cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1); |
6491 | |
|
6492 | 0 | calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1)); |
6493 | |
|
6494 | 0 | if(calc_recon || u1_compute_spatial_ssd) |
6495 | 0 | { |
6496 | 0 | aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1; |
6497 | 0 | aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1; |
6498 | 0 | } |
6499 | 0 | else |
6500 | 0 | { |
6501 | 0 | aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0; |
6502 | 0 | aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0; |
6503 | 0 | } |
6504 | | |
6505 | | /* loop based on num tus in a cu */ |
6506 | 0 | for(ctr = 0; ctr < num_tus_in_cu; ctr++) |
6507 | 0 | { |
6508 | 0 | WORD16 *pi2_cur_deq_data_cb; |
6509 | 0 | WORD16 *pi2_cur_deq_data_cr; |
6510 | |
|
6511 | 0 | WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride; |
6512 | 0 | WORD32 luma_nbr_flags = 0; |
6513 | |
|
6514 | 0 | luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
6515 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
6516 | 0 | ps_ctxt->i4_nbr_map_strd, |
6517 | 0 | (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x, |
6518 | 0 | (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y, |
6519 | 0 | (luma_trans_size >> 2), |
6520 | 0 | (luma_trans_size >> 2)); |
6521 | |
|
6522 | 0 | for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++) |
6523 | 0 | { |
6524 | 0 | WORD32 cbf, num_bytes; |
6525 | 0 | LWORD64 trans_ssd_u, trans_ssd_v; |
6526 | 0 | UWORD8 u1_is_recon_available; |
6527 | |
|
6528 | 0 | WORD32 trans_size_m2 = trans_size << 1; |
6529 | 0 | UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) + |
6530 | 0 | (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) + |
6531 | 0 | (i4_subtu_idx * trans_size * chrm_src_stride); |
6532 | 0 | UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) + |
6533 | 0 | (((ctr > 1) * trans_size * pred_strd) << u1_is_422) + |
6534 | 0 | (i4_subtu_idx * trans_size * pred_strd); |
6535 | 0 | WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride; |
6536 | 0 | UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0] |
6537 | 0 | ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) + |
6538 | 0 | ((ctr & 1) * trans_size_m2) + |
6539 | 0 | (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) + |
6540 | 0 | (i4_subtu_idx * trans_size * i4_recon_stride); |
6541 | | |
6542 | | /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb. |
6543 | | chroma coeff/iq for high quality intra SATD special modes. Will |
6544 | | be over written by coeff of luma mode in chroma_rdopt call */ |
6545 | 0 | UWORD8 *pu1_ecd_data_cb = |
6546 | 0 | &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0]; |
6547 | 0 | UWORD8 *pu1_ecd_data_cr = |
6548 | 0 | &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0]; |
6549 | |
|
6550 | 0 | WORD32 chrm_pred_func_idx = 0; |
6551 | 0 | LWORD64 curr_cb_cod_cost = 0; |
6552 | 0 | LWORD64 curr_cr_cod_cost = 0; |
6553 | 0 | WORD32 nbr_flags = 0; |
6554 | |
|
6555 | 0 | i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2); |
6556 | 0 | i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) + |
6557 | 0 | ((i4_subtu_idx * trans_size) >> 2); |
6558 | 0 | pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + |
6559 | 0 | ((ctr & 1) * trans_size) + |
6560 | 0 | (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) + |
6561 | 0 | (i4_subtu_idx * trans_size * deq_data_strd); |
6562 | 0 | pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + |
6563 | 0 | ((ctr & 1) * trans_size) + |
6564 | 0 | (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) + |
6565 | 0 | (i4_subtu_idx * trans_size * deq_data_strd); |
6566 | | |
6567 | | /* left cu boundary */ |
6568 | 0 | if(0 == i4_subtu_pos_x) |
6569 | 0 | { |
6570 | 0 | left_strd = cu_left_stride; |
6571 | 0 | pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd; |
6572 | 0 | } |
6573 | 0 | else |
6574 | 0 | { |
6575 | 0 | pu1_left = pu1_cur_recon - 2; |
6576 | 0 | left_strd = i4_recon_stride; |
6577 | 0 | } |
6578 | | |
6579 | | /* top cu boundary */ |
6580 | 0 | if(0 == i4_subtu_pos_y) |
6581 | 0 | { |
6582 | 0 | pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2); |
6583 | 0 | } |
6584 | 0 | else |
6585 | 0 | { |
6586 | 0 | pu1_top = pu1_cur_recon - i4_recon_stride; |
6587 | 0 | } |
6588 | | |
6589 | | /* by default top left is set to cu top left */ |
6590 | 0 | pu1_top_left = pu1_cu_top_left; |
6591 | | |
6592 | | /* top left based on position */ |
6593 | 0 | if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x)) |
6594 | 0 | { |
6595 | 0 | pu1_top_left = pu1_left - left_strd; |
6596 | 0 | } |
6597 | 0 | else if(0 != i4_subtu_pos_x) |
6598 | 0 | { |
6599 | 0 | pu1_top_left = pu1_top - 2; |
6600 | 0 | } |
6601 | | |
6602 | | /* populate the coeffs scan idx */ |
6603 | 0 | scan_idx = SCAN_DIAG_UPRIGHT; |
6604 | | |
6605 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
6606 | 0 | COPY_CABAC_STATES( |
6607 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
6608 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
6609 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
6610 | 0 | IHEVC_CAB_CTXT_END); |
6611 | | |
6612 | | /* for 4x4 transforms based on intra pred mode scan is choosen*/ |
6613 | 0 | if(4 == trans_size) |
6614 | 0 | { |
6615 | | /* for modes from 22 upto 30 horizontal scan is used */ |
6616 | 0 | if((best_chrm_mode > 21) && (best_chrm_mode < 31)) |
6617 | 0 | { |
6618 | 0 | scan_idx = SCAN_HORZ; |
6619 | 0 | } |
6620 | | /* for modes from 6 upto 14 horizontal scan is used */ |
6621 | 0 | else if((best_chrm_mode > 5) && (best_chrm_mode < 15)) |
6622 | 0 | { |
6623 | 0 | scan_idx = SCAN_VERT; |
6624 | 0 | } |
6625 | 0 | } |
6626 | |
|
6627 | 0 | nbr_flags = ihevce_get_intra_chroma_tu_nbr( |
6628 | 0 | luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422); |
6629 | | |
6630 | | /* call the chroma reference array substitution */ |
6631 | 0 | ihevc_intra_pred_chroma_ref_substitution_fptr( |
6632 | 0 | pu1_top_left, |
6633 | 0 | pu1_top, |
6634 | 0 | pu1_left, |
6635 | 0 | left_strd, |
6636 | 0 | trans_size, |
6637 | 0 | nbr_flags, |
6638 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
6639 | 0 | 1); |
6640 | | |
6641 | | /* use the look up to get the function idx */ |
6642 | 0 | chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode]; |
6643 | | |
6644 | | /* call the intra prediction function */ |
6645 | 0 | ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( |
6646 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
6647 | 0 | 1, |
6648 | 0 | pu1_cur_pred, |
6649 | 0 | pred_strd, |
6650 | 0 | trans_size, |
6651 | 0 | best_chrm_mode); |
6652 | | |
6653 | | /* UPLANE RDOPT Loop */ |
6654 | 0 | { |
6655 | 0 | WORD32 tu_bits; |
6656 | |
|
6657 | 0 | cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
6658 | 0 | ps_ctxt, |
6659 | 0 | pu1_cur_pred, |
6660 | 0 | pred_strd, |
6661 | 0 | pu1_cur_src, |
6662 | 0 | chrm_src_stride, |
6663 | 0 | pi2_cur_deq_data_cb, |
6664 | 0 | deq_data_strd, |
6665 | 0 | pu1_cur_recon, |
6666 | 0 | i4_recon_stride, |
6667 | 0 | pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx], |
6668 | 0 | ps_ctxt->au1_cu_csbf, |
6669 | 0 | ps_ctxt->i4_cu_csbf_strd, |
6670 | 0 | trans_size, |
6671 | 0 | scan_idx, |
6672 | 0 | 1, |
6673 | 0 | &num_bytes, |
6674 | 0 | &tu_bits, |
6675 | 0 | &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr], |
6676 | 0 | &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr], |
6677 | 0 | &u1_is_recon_available, |
6678 | 0 | i4_perform_sbh, |
6679 | 0 | i4_perform_rdoq, |
6680 | 0 | &trans_ssd_u, |
6681 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
6682 | 0 | i4_alpha_stim_multiplier, |
6683 | 0 | u1_is_cu_noisy, |
6684 | 0 | #endif |
6685 | 0 | 0, |
6686 | 0 | u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
6687 | 0 | U_PLANE); |
6688 | |
|
6689 | | #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL |
6690 | | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
6691 | | { |
6692 | | #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
6693 | | trans_ssd_u = ihevce_inject_stim_into_distortion( |
6694 | | pu1_cur_src, |
6695 | | chrm_src_stride, |
6696 | | pu1_cur_pred, |
6697 | | pred_strd, |
6698 | | trans_ssd_u, |
6699 | | i4_alpha_stim_multiplier, |
6700 | | trans_size, |
6701 | | 0, |
6702 | | ps_ctxt->u1_enable_psyRDOPT, |
6703 | | U_PLANE); |
6704 | | #else |
6705 | | if(u1_compute_spatial_ssd && u1_is_recon_available) |
6706 | | { |
6707 | | trans_ssd_u = ihevce_inject_stim_into_distortion( |
6708 | | pu1_cur_src, |
6709 | | chrm_src_stride, |
6710 | | pu1_cur_recon, |
6711 | | i4_recon_stride, |
6712 | | trans_ssd_u, |
6713 | | i4_alpha_stim_multiplier, |
6714 | | trans_size, |
6715 | | 0, |
6716 | | ps_ctxt->u1_enable_psyRDOPT, |
6717 | | U_PLANE); |
6718 | | } |
6719 | | else |
6720 | | { |
6721 | | trans_ssd_u = ihevce_inject_stim_into_distortion( |
6722 | | pu1_cur_src, |
6723 | | chrm_src_stride, |
6724 | | pu1_cur_pred, |
6725 | | pred_strd, |
6726 | | trans_ssd_u, |
6727 | | i4_alpha_stim_multiplier, |
6728 | | trans_size, |
6729 | | 0, |
6730 | | ps_ctxt->u1_enable_psyRDOPT, |
6731 | | U_PLANE); |
6732 | | } |
6733 | | #endif |
6734 | | } |
6735 | | #endif |
6736 | | |
6737 | | /* RDOPT copy States : New updated after curr TU to TU init */ |
6738 | 0 | if(0 != cbf) |
6739 | 0 | { |
6740 | 0 | memcpy( |
6741 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
6742 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
6743 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
6744 | 0 | IHEVC_CAB_CTXT_END); |
6745 | 0 | } |
6746 | | /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
6747 | 0 | else |
6748 | 0 | { |
6749 | 0 | memcpy( |
6750 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
6751 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
6752 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
6753 | 0 | IHEVC_CAB_CTXT_END); |
6754 | 0 | } |
6755 | |
|
6756 | 0 | if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd)) |
6757 | 0 | { |
6758 | 0 | ihevce_chroma_it_recon_fxn( |
6759 | 0 | ps_ctxt, |
6760 | 0 | pi2_cur_deq_data_cb, |
6761 | 0 | deq_data_strd, |
6762 | 0 | pu1_cur_pred, |
6763 | 0 | pred_strd, |
6764 | 0 | pu1_cur_recon, |
6765 | 0 | i4_recon_stride, |
6766 | 0 | (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]), |
6767 | 0 | trans_size, |
6768 | 0 | cbf, |
6769 | 0 | ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr], |
6770 | 0 | ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr], |
6771 | 0 | U_PLANE); |
6772 | 0 | } |
6773 | |
|
6774 | 0 | ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf; |
6775 | 0 | curr_cb_cod_cost = |
6776 | 0 | trans_ssd_u + |
6777 | 0 | COMPUTE_RATE_COST_CLIP30( |
6778 | 0 | tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
6779 | 0 | chrm_tu_bits += tu_bits; |
6780 | 0 | ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes; |
6781 | 0 | ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] = |
6782 | 0 | num_bytes; |
6783 | 0 | } |
6784 | | |
6785 | | /* VPLANE RDOPT Loop */ |
6786 | 0 | { |
6787 | 0 | WORD32 tu_bits; |
6788 | |
|
6789 | 0 | cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
6790 | 0 | ps_ctxt, |
6791 | 0 | pu1_cur_pred, |
6792 | 0 | pred_strd, |
6793 | 0 | pu1_cur_src, |
6794 | 0 | chrm_src_stride, |
6795 | 0 | pi2_cur_deq_data_cr, |
6796 | 0 | deq_data_strd, |
6797 | 0 | pu1_cur_recon, |
6798 | 0 | i4_recon_stride, |
6799 | 0 | pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx], |
6800 | 0 | ps_ctxt->au1_cu_csbf, |
6801 | 0 | ps_ctxt->i4_cu_csbf_strd, |
6802 | 0 | trans_size, |
6803 | 0 | scan_idx, |
6804 | 0 | 1, |
6805 | 0 | &num_bytes, |
6806 | 0 | &tu_bits, |
6807 | 0 | &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr], |
6808 | 0 | &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr], |
6809 | 0 | &u1_is_recon_available, |
6810 | 0 | i4_perform_sbh, |
6811 | 0 | i4_perform_rdoq, |
6812 | 0 | &trans_ssd_v, |
6813 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
6814 | 0 | i4_alpha_stim_multiplier, |
6815 | 0 | u1_is_cu_noisy, |
6816 | 0 | #endif |
6817 | 0 | 0, |
6818 | 0 | u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
6819 | 0 | V_PLANE); |
6820 | |
|
6821 | | #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL |
6822 | | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
6823 | | { |
6824 | | #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
6825 | | trans_ssd_v = ihevce_inject_stim_into_distortion( |
6826 | | pu1_cur_src, |
6827 | | chrm_src_stride, |
6828 | | pu1_cur_pred, |
6829 | | pred_strd, |
6830 | | trans_ssd_v, |
6831 | | i4_alpha_stim_multiplier, |
6832 | | trans_size, |
6833 | | 0, |
6834 | | ps_ctxt->u1_enable_psyRDOPT, |
6835 | | V_PLANE); |
6836 | | #else |
6837 | | if(u1_compute_spatial_ssd && u1_is_recon_available) |
6838 | | { |
6839 | | trans_ssd_v = ihevce_inject_stim_into_distortion( |
6840 | | pu1_cur_src, |
6841 | | chrm_src_stride, |
6842 | | pu1_cur_recon, |
6843 | | i4_recon_stride, |
6844 | | trans_ssd_v, |
6845 | | i4_alpha_stim_multiplier, |
6846 | | trans_size, |
6847 | | 0, |
6848 | | ps_ctxt->u1_enable_psyRDOPT, |
6849 | | V_PLANE); |
6850 | | } |
6851 | | else |
6852 | | { |
6853 | | trans_ssd_v = ihevce_inject_stim_into_distortion( |
6854 | | pu1_cur_src, |
6855 | | chrm_src_stride, |
6856 | | pu1_cur_pred, |
6857 | | pred_strd, |
6858 | | trans_ssd_v, |
6859 | | i4_alpha_stim_multiplier, |
6860 | | trans_size, |
6861 | | 0, |
6862 | | ps_ctxt->u1_enable_psyRDOPT, |
6863 | | V_PLANE); |
6864 | | } |
6865 | | #endif |
6866 | | } |
6867 | | #endif |
6868 | | |
6869 | | /* RDOPT copy States : New updated after curr TU to TU init */ |
6870 | 0 | if(0 != cbf) |
6871 | 0 | { |
6872 | 0 | COPY_CABAC_STATES( |
6873 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
6874 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
6875 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
6876 | 0 | IHEVC_CAB_CTXT_END); |
6877 | 0 | } |
6878 | | /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
6879 | 0 | else |
6880 | 0 | { |
6881 | 0 | COPY_CABAC_STATES( |
6882 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
6883 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
6884 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
6885 | 0 | IHEVC_CAB_CTXT_END); |
6886 | 0 | } |
6887 | |
|
6888 | 0 | if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd)) |
6889 | 0 | { |
6890 | 0 | ihevce_chroma_it_recon_fxn( |
6891 | 0 | ps_ctxt, |
6892 | 0 | pi2_cur_deq_data_cr, |
6893 | 0 | deq_data_strd, |
6894 | 0 | pu1_cur_pred, |
6895 | 0 | pred_strd, |
6896 | 0 | pu1_cur_recon, |
6897 | 0 | i4_recon_stride, |
6898 | 0 | (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]), |
6899 | 0 | trans_size, |
6900 | 0 | cbf, |
6901 | 0 | ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr], |
6902 | 0 | ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr], |
6903 | 0 | V_PLANE); |
6904 | 0 | } |
6905 | |
|
6906 | 0 | ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf; |
6907 | 0 | curr_cr_cod_cost = |
6908 | 0 | trans_ssd_v + |
6909 | 0 | COMPUTE_RATE_COST_CLIP30( |
6910 | 0 | tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
6911 | 0 | chrm_tu_bits += tu_bits; |
6912 | 0 | ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes; |
6913 | 0 | ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] = |
6914 | 0 | num_bytes; |
6915 | 0 | } |
6916 | |
|
6917 | 0 | chrm_cod_cost += curr_cb_cod_cost; |
6918 | 0 | chrm_cod_cost += curr_cr_cod_cost; |
6919 | 0 | } |
6920 | | |
6921 | | /* set the neighbour map to 1 */ |
6922 | 0 | ihevce_set_nbr_map( |
6923 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
6924 | 0 | ps_ctxt->i4_nbr_map_strd, |
6925 | 0 | (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x, |
6926 | 0 | (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y, |
6927 | 0 | (luma_trans_size >> 2), |
6928 | 0 | 1); |
6929 | 0 | } |
6930 | | |
6931 | | /* set the neighbour map to 0 */ |
6932 | 0 | ihevce_set_nbr_map( |
6933 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
6934 | 0 | ps_ctxt->i4_nbr_map_strd, |
6935 | 0 | (ps_cu_analyse->b3_cu_pos_x << 1), |
6936 | 0 | (ps_cu_analyse->b3_cu_pos_y << 1), |
6937 | 0 | (ps_cu_analyse->u1_cu_size >> 2), |
6938 | 0 | 0); |
6939 | | |
6940 | | /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */ |
6941 | | /* This is done by adding the bits for signalling chroma mode (0-3) */ |
6942 | | /* and subtracting the bits for chroma mode same as luma mode (4) */ |
6943 | 0 | #if CHROMA_RDOPT_ENABLE |
6944 | 0 | { |
6945 | | /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */ |
6946 | 0 | WORD32 bits_frac_1 = |
6947 | 0 | gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1]; |
6948 | |
|
6949 | 0 | WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1; |
6950 | | |
6951 | | /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */ |
6952 | 0 | WORD32 bits_for_mode4 = |
6953 | 0 | gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0]; |
6954 | | |
6955 | | /* accumulate into final rd cost for chroma */ |
6956 | 0 | ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30( |
6957 | 0 | (bits_for_mode_0to3 - bits_for_mode4), |
6958 | 0 | ps_ctxt->i8_cl_ssd_lambda_chroma_qf, |
6959 | 0 | (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); |
6960 | |
|
6961 | 0 | chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode; |
6962 | 0 | } |
6963 | 0 | #endif |
6964 | |
|
6965 | 0 | if(ps_ctxt->u1_enable_psyRDOPT) |
6966 | 0 | { |
6967 | 0 | UWORD8 *pu1_recon_cu; |
6968 | 0 | WORD32 recon_stride; |
6969 | 0 | WORD32 curr_pos_x; |
6970 | 0 | WORD32 curr_pos_y; |
6971 | 0 | WORD32 start_index; |
6972 | 0 | WORD32 num_horz_cu_in_ctb; |
6973 | 0 | WORD32 had_block_size; |
6974 | | |
6975 | | /* tODO: sreenivasa ctb size has to be used appropriately */ |
6976 | 0 | had_block_size = 8; |
6977 | 0 | num_horz_cu_in_ctb = 2 * 64 / had_block_size; |
6978 | 0 | curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ |
6979 | 0 | curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */ |
6980 | 0 | recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride; |
6981 | 0 | pu1_recon_cu = |
6982 | 0 | aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]; // |
6983 | | |
6984 | | /* start index to index the source satd of curr cu int he current ctb*/ |
6985 | 0 | start_index = 2 * (curr_pos_x / had_block_size) + |
6986 | 0 | (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
6987 | |
|
6988 | 0 | { |
6989 | 0 | chrm_cod_cost += ihevce_psy_rd_cost_croma( |
6990 | 0 | ps_ctxt->ai4_source_chroma_satd, |
6991 | 0 | pu1_recon_cu, |
6992 | 0 | recon_stride, |
6993 | 0 | 1, // |
6994 | 0 | cu_size, |
6995 | 0 | 0, // pic type |
6996 | 0 | 0, //layer id |
6997 | 0 | ps_ctxt->i4_satd_lamda, // lambda |
6998 | 0 | start_index, |
6999 | 0 | ps_ctxt->u1_is_input_data_hbd, // 8 bit |
7000 | 0 | ps_ctxt->u1_chroma_array_type, |
7001 | 0 | &ps_ctxt->s_cmn_opt_func |
7002 | |
|
7003 | 0 | ); // chroma subsampling 420 |
7004 | 0 | } |
7005 | 0 | } |
7006 | |
|
7007 | 0 | ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost; |
7008 | 0 | ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits; |
7009 | |
|
7010 | 0 | memcpy( |
7011 | 0 | &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0], |
7012 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
7013 | 0 | IHEVC_CAB_CTXT_END); |
7014 | 0 | } |
7015 | 0 | } |
7016 | | |
7017 | | /*! |
7018 | | ****************************************************************************** |
7019 | | * \if Function name : ihevce_chroma_cu_prcs_rdopt \endif |
7020 | | * |
7021 | | * \brief |
7022 | | * Coding unit processing function for chroma |
7023 | | * |
7024 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
7025 | | * \param[in] rd_opt_curr_idx index in the array of RDopt params |
7026 | | * \param[in] func_proc_mode TU_EQ_CU or other case |
7027 | | * \param[in] pu1_chrm_src pointer to source data buffer |
7028 | | * \param[in] chrm_src_stride source buffer stride |
7029 | | * \param[in] pu1_cu_left pointer to left recon data buffer |
7030 | | * \param[in] pu1_cu_top pointer to top recon data buffer |
7031 | | * \param[in] pu1_cu_top_left pointer to top left recon data buffer |
7032 | | * \param[in] left_stride left recon buffer stride |
7033 | | * \param[out] cu_pos_x position x of current CU in CTB |
7034 | | * \param[out] cu_pos_y position y of current CU in CTB |
7035 | | * \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits |
7036 | | * |
7037 | | * \return |
7038 | | * Chroma coding cost (cb adn Cr included) |
7039 | | * |
7040 | | * \author |
7041 | | * Ittiam |
7042 | | * |
7043 | | ***************************************************************************** |
7044 | | */ |
7045 | | LWORD64 ihevce_chroma_cu_prcs_rdopt( |
7046 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
7047 | | WORD32 rd_opt_curr_idx, |
7048 | | WORD32 func_proc_mode, |
7049 | | UWORD8 *pu1_chrm_src, |
7050 | | WORD32 chrm_src_stride, |
7051 | | UWORD8 *pu1_cu_left, |
7052 | | UWORD8 *pu1_cu_top, |
7053 | | UWORD8 *pu1_cu_top_left, |
7054 | | WORD32 cu_left_stride, |
7055 | | WORD32 cu_pos_x, |
7056 | | WORD32 cu_pos_y, |
7057 | | WORD32 *pi4_chrm_tu_bits, |
7058 | | WORD32 i4_alpha_stim_multiplier, |
7059 | | UWORD8 u1_is_cu_noisy) |
7060 | 0 | { |
7061 | 0 | tu_enc_loop_out_t *ps_tu; |
7062 | 0 | tu_enc_loop_temp_prms_t *ps_tu_temp_prms; |
7063 | |
|
7064 | 0 | ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; |
7065 | |
|
7066 | 0 | UWORD8 *pu1_pred; |
7067 | 0 | UWORD8 *pu1_recon; |
7068 | 0 | WORD32 i4_recon_stride; |
7069 | 0 | WORD32 cu_size, trans_size = 0; |
7070 | 0 | WORD32 pred_strd; |
7071 | 0 | WORD32 ctr, i4_subtu_idx; |
7072 | 0 | WORD32 scan_idx; |
7073 | 0 | WORD32 u1_is_cu_coded_old; |
7074 | 0 | WORD32 init_bytes_offset; |
7075 | |
|
7076 | 0 | enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx]; |
7077 | 0 | recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore; |
7078 | |
|
7079 | 0 | WORD32 total_bytes_offset = 0; |
7080 | 0 | LWORD64 chrm_cod_cost = 0; |
7081 | 0 | WORD32 chrm_tu_bits = 0; |
7082 | 0 | WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35; |
7083 | 0 | LWORD64 i8_ssd_cb = 0; |
7084 | 0 | WORD32 i4_bits_cb = 0; |
7085 | 0 | LWORD64 i8_ssd_cr = 0; |
7086 | 0 | WORD32 i4_bits_cr = 0; |
7087 | 0 | UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); |
7088 | 0 | UWORD8 u1_num_tus = |
7089 | | /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */ |
7090 | 0 | (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag) |
7091 | 0 | ? 1 |
7092 | 0 | : ps_best_cu_prms->u2_num_tus_in_cu; |
7093 | 0 | UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1; |
7094 | 0 | UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) && |
7095 | 0 | (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) && |
7096 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
7097 | | /* Get the RDOPT cost of the best CU mode for early_exit */ |
7098 | 0 | LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost; |
7099 | | /* Get the current running RDOPT (Luma RDOPT) for early_exit */ |
7100 | 0 | LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost; |
7101 | 0 | WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq; |
7102 | 0 | WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh; |
7103 | |
|
7104 | 0 | ihevc_intra_pred_chroma_ref_substitution_fptr = |
7105 | 0 | ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; |
7106 | |
|
7107 | 0 | if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT) |
7108 | 0 | { |
7109 | 0 | u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) && |
7110 | 0 | CONVERT_SSDS_TO_SPATIAL_DOMAIN; |
7111 | 0 | } |
7112 | | |
7113 | | /* Store the init bytes offset from luma */ |
7114 | 0 | init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data; |
7115 | | |
7116 | | /* Unused pred buffer in merge_skip_pred_data_t structure is used as |
7117 | | Chroma pred storage buf. for final_recon function. |
7118 | | The buffer is split into two and used as a ping-pong buffer */ |
7119 | 0 | pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] + |
7120 | 0 | rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + |
7121 | 0 | (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1))); |
7122 | |
|
7123 | 0 | pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX]; |
7124 | |
|
7125 | 0 | pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0]; |
7126 | 0 | i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride; |
7127 | 0 | cu_size = ps_best_cu_prms->u1_cu_size; |
7128 | 0 | chrm_tu_bits = 0; |
7129 | | |
7130 | | /* get the first TU pointer */ |
7131 | 0 | ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; |
7132 | | /* get the first TU enc_loop temp prms pointer */ |
7133 | 0 | ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
7134 | |
|
7135 | 0 | if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) |
7136 | 0 | { |
7137 | | /* Mode signalled by intra prediction for luma */ |
7138 | 0 | luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0]; |
7139 | |
|
7140 | | #if DISABLE_RDOQ_INTRA |
7141 | | i4_perform_rdoq = 0; |
7142 | | #endif |
7143 | 0 | } |
7144 | | |
7145 | 0 | else |
7146 | 0 | { |
7147 | 0 | UWORD8 *pu1_pred_org = pu1_pred; |
7148 | | |
7149 | | /* ------ Motion Compensation for Chroma -------- */ |
7150 | 0 | for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++) |
7151 | 0 | { |
7152 | 0 | pu_t *ps_pu; |
7153 | 0 | WORD32 inter_pu_wd; |
7154 | 0 | WORD32 inter_pu_ht; |
7155 | |
|
7156 | 0 | ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr]; |
7157 | |
|
7158 | 0 | inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ |
7159 | 0 | inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; |
7160 | 0 | inter_pu_ht <<= u1_is_422; |
7161 | |
|
7162 | 0 | ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd); |
7163 | |
|
7164 | 0 | if(2 == ps_best_cu_prms->u2_num_pus_in_cu) |
7165 | 0 | { |
7166 | | /* 2Nx__ partion case */ |
7167 | 0 | if(inter_pu_wd == cu_size) |
7168 | 0 | { |
7169 | 0 | pu1_pred += (inter_pu_ht * pred_strd); |
7170 | 0 | } |
7171 | | |
7172 | | /* __x2N partion case */ |
7173 | 0 | if(inter_pu_ht == (cu_size >> (u1_is_422 == 0))) |
7174 | 0 | { |
7175 | 0 | pu1_pred += inter_pu_wd; |
7176 | 0 | } |
7177 | 0 | } |
7178 | 0 | } |
7179 | | |
7180 | | /* restore the pred pointer to start for transform loop */ |
7181 | 0 | pu1_pred = pu1_pred_org; |
7182 | 0 | } |
7183 | | |
7184 | | /* Used to store back only the luma based info. if SATD based chorma |
7185 | | mode also comes */ |
7186 | 0 | u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded; |
7187 | | |
7188 | | /* evaluate chroma candidates (same as luma) and |
7189 | | if INTRA & HIGH_QUALITY compare with best SATD mode */ |
7190 | 0 | { |
7191 | 0 | WORD32 calc_recon = 0, deq_data_strd; |
7192 | 0 | WORD16 *pi2_deq_data; |
7193 | 0 | UWORD8 *pu1_ecd_data; |
7194 | 0 | UWORD8 u1_is_mode_eq_chroma_satd_mode = 0; |
7195 | |
|
7196 | 0 | pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0]; |
7197 | 0 | pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx; |
7198 | 0 | deq_data_strd = cu_size; |
7199 | | /* update ecd buffer for storing coeff. */ |
7200 | 0 | pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0]; |
7201 | 0 | pu1_ecd_data += init_bytes_offset; |
7202 | | /* store chroma starting index */ |
7203 | 0 | ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset; |
7204 | | |
7205 | | /* get the first TU pointer */ |
7206 | 0 | ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; |
7207 | 0 | ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
7208 | | |
7209 | | /* Reset total_bytes_offset for each candidate */ |
7210 | 0 | chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode] |
7211 | 0 | : luma_pred_mode; |
7212 | |
|
7213 | 0 | total_bytes_offset = 0; |
7214 | |
|
7215 | 0 | if(TU_EQ_SUBCU == func_proc_mode) |
7216 | 0 | { |
7217 | 0 | func_proc_mode = TU_EQ_CU_DIV2; |
7218 | 0 | } |
7219 | | |
7220 | | /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and |
7221 | | TU_EQ_CU_DIV2 and TU_EQ_SUBCU case */ |
7222 | 0 | if(8 == cu_size) |
7223 | 0 | { |
7224 | 0 | func_proc_mode = TU_EQ_CU; |
7225 | 0 | } |
7226 | | |
7227 | | /* loop based on num tus in a cu */ |
7228 | 0 | if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd || |
7229 | 0 | (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd && |
7230 | 0 | (chrm_pred_mode != |
7231 | 0 | ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode))) |
7232 | 0 | { |
7233 | | /* loop based on num tus in a cu */ |
7234 | 0 | for(ctr = 0; ctr < u1_num_tus; ctr++) |
7235 | 0 | { |
7236 | 0 | WORD32 num_bytes = 0; |
7237 | 0 | LWORD64 curr_cb_cod_cost = 0; |
7238 | 0 | LWORD64 curr_cr_cod_cost = 0; |
7239 | 0 | WORD32 chrm_pred_func_idx = 0; |
7240 | 0 | UWORD8 u1_is_early_exit_condition_satisfied = 0; |
7241 | | |
7242 | | /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7 */ |
7243 | | /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */ |
7244 | 0 | ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0; |
7245 | 0 | ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0; |
7246 | 0 | ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset; |
7247 | 0 | ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset; |
7248 | 0 | ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset; |
7249 | 0 | ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset; |
7250 | 0 | ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0; |
7251 | 0 | ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0; |
7252 | 0 | ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0; |
7253 | 0 | ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0; |
7254 | | |
7255 | | /* TU level inits */ |
7256 | | /* check if chroma present flag is set */ |
7257 | 0 | if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) |
7258 | 0 | { |
7259 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
7260 | 0 | COPY_CABAC_STATES( |
7261 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx] |
7262 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
7263 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
7264 | 0 | IHEVC_CAB_CTXT_END); |
7265 | | |
7266 | | /* get the current transform size */ |
7267 | 0 | trans_size = ps_tu->s_tu.b3_size; |
7268 | 0 | trans_size = (1 << (trans_size + 1)); /* in chroma units */ |
7269 | | |
7270 | | /* since 2x2 transform is not allowed for chroma*/ |
7271 | 0 | if(2 == trans_size) |
7272 | 0 | { |
7273 | 0 | trans_size = 4; |
7274 | 0 | } |
7275 | 0 | } |
7276 | |
|
7277 | 0 | for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++) |
7278 | 0 | { |
7279 | 0 | WORD32 cbf; |
7280 | 0 | UWORD8 u1_is_recon_available; |
7281 | |
|
7282 | 0 | WORD32 nbr_flags = 0; |
7283 | 0 | WORD32 zero_cols = 0; |
7284 | 0 | WORD32 zero_rows = 0; |
7285 | | |
7286 | | /* check if chroma present flag is set */ |
7287 | 0 | if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) |
7288 | 0 | { |
7289 | 0 | UWORD8 *pu1_cur_pred; |
7290 | 0 | UWORD8 *pu1_cur_recon; |
7291 | 0 | UWORD8 *pu1_cur_src; |
7292 | 0 | WORD16 *pi2_cur_deq_data; |
7293 | 0 | WORD32 curr_pos_x, curr_pos_y; |
7294 | 0 | LWORD64 trans_ssd_u, trans_ssd_v; |
7295 | | |
7296 | | /* get the current sub-tu posx and posy w.r.t to cu */ |
7297 | 0 | curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3); |
7298 | 0 | curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) + |
7299 | 0 | (i4_subtu_idx * trans_size); |
7300 | | |
7301 | | /* 420sp case only vertical height will be half */ |
7302 | 0 | if(u1_is_422 == 0) |
7303 | 0 | { |
7304 | 0 | curr_pos_y >>= 1; |
7305 | 0 | } |
7306 | | |
7307 | | /* increment the pointers to start of current Sub-TU */ |
7308 | 0 | pu1_cur_recon = (pu1_recon + curr_pos_x); |
7309 | 0 | pu1_cur_recon += (curr_pos_y * i4_recon_stride); |
7310 | 0 | pu1_cur_src = (pu1_chrm_src + curr_pos_x); |
7311 | 0 | pu1_cur_src += (curr_pos_y * chrm_src_stride); |
7312 | 0 | pu1_cur_pred = (pu1_pred + curr_pos_x); |
7313 | 0 | pu1_cur_pred += (curr_pos_y * pred_strd); |
7314 | 0 | pi2_cur_deq_data = pi2_deq_data + curr_pos_x; |
7315 | 0 | pi2_cur_deq_data += (curr_pos_y * deq_data_strd); |
7316 | | |
7317 | | /* populate the coeffs scan idx */ |
7318 | 0 | scan_idx = SCAN_DIAG_UPRIGHT; |
7319 | | |
7320 | | /* perform intra prediction only for Intra case */ |
7321 | 0 | if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) |
7322 | 0 | { |
7323 | 0 | UWORD8 *pu1_top_left; |
7324 | 0 | UWORD8 *pu1_top; |
7325 | 0 | UWORD8 *pu1_left; |
7326 | 0 | WORD32 left_strd; |
7327 | |
|
7328 | 0 | calc_recon = !u1_compute_spatial_ssd && |
7329 | 0 | ((4 == u1_num_tus) || (u1_is_422 == 1)) && |
7330 | 0 | (((u1_num_tus == 1) && (0 == i4_subtu_idx)) || |
7331 | 0 | ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) || |
7332 | 0 | ((u1_num_tus == 4) && (ctr < 3))); |
7333 | | |
7334 | | /* left cu boundary */ |
7335 | 0 | if(0 == curr_pos_x) |
7336 | 0 | { |
7337 | 0 | pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride; |
7338 | 0 | left_strd = cu_left_stride; |
7339 | 0 | } |
7340 | 0 | else |
7341 | 0 | { |
7342 | 0 | pu1_left = pu1_cur_recon - 2; |
7343 | 0 | left_strd = i4_recon_stride; |
7344 | 0 | } |
7345 | | |
7346 | | /* top cu boundary */ |
7347 | 0 | if(0 == curr_pos_y) |
7348 | 0 | { |
7349 | 0 | pu1_top = pu1_cu_top + curr_pos_x; |
7350 | 0 | } |
7351 | 0 | else |
7352 | 0 | { |
7353 | 0 | pu1_top = pu1_cur_recon - i4_recon_stride; |
7354 | 0 | } |
7355 | | |
7356 | | /* by default top left is set to cu top left */ |
7357 | 0 | pu1_top_left = pu1_cu_top_left; |
7358 | | |
7359 | | /* top left based on position */ |
7360 | 0 | if((0 != curr_pos_y) && (0 == curr_pos_x)) |
7361 | 0 | { |
7362 | 0 | pu1_top_left = pu1_left - cu_left_stride; |
7363 | 0 | } |
7364 | 0 | else if(0 != curr_pos_x) |
7365 | 0 | { |
7366 | 0 | pu1_top_left = pu1_top - 2; |
7367 | 0 | } |
7368 | | |
7369 | | /* for 4x4 transforms based on intra pred mode scan is choosen*/ |
7370 | 0 | if(4 == trans_size) |
7371 | 0 | { |
7372 | | /* for modes from 22 upto 30 horizontal scan is used */ |
7373 | 0 | if((chrm_pred_mode > 21) && (chrm_pred_mode < 31)) |
7374 | 0 | { |
7375 | 0 | scan_idx = SCAN_HORZ; |
7376 | 0 | } |
7377 | | /* for modes from 6 upto 14 horizontal scan is used */ |
7378 | 0 | else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15)) |
7379 | 0 | { |
7380 | 0 | scan_idx = SCAN_VERT; |
7381 | 0 | } |
7382 | 0 | } |
7383 | |
|
7384 | 0 | nbr_flags = ihevce_get_intra_chroma_tu_nbr( |
7385 | 0 | ps_best_cu_prms->au4_nbr_flags[ctr], |
7386 | 0 | i4_subtu_idx, |
7387 | 0 | trans_size, |
7388 | 0 | u1_is_422); |
7389 | | |
7390 | | /* call the chroma reference array substitution */ |
7391 | 0 | ihevc_intra_pred_chroma_ref_substitution_fptr( |
7392 | 0 | pu1_top_left, |
7393 | 0 | pu1_top, |
7394 | 0 | pu1_left, |
7395 | 0 | left_strd, |
7396 | 0 | trans_size, |
7397 | 0 | nbr_flags, |
7398 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
7399 | 0 | 1); |
7400 | | |
7401 | | /* use the look up to get the function idx */ |
7402 | 0 | chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode]; |
7403 | | |
7404 | | /* call the intra prediction function */ |
7405 | 0 | ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( |
7406 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
7407 | 0 | 1, |
7408 | 0 | pu1_cur_pred, |
7409 | 0 | pred_strd, |
7410 | 0 | trans_size, |
7411 | 0 | chrm_pred_mode); |
7412 | 0 | } |
7413 | |
|
7414 | 0 | if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon)) |
7415 | 0 | { |
7416 | 0 | ps_recon_datastore->au1_is_chromaRecon_available[0] = |
7417 | 0 | !ps_best_cu_prms->u1_skip_flag; |
7418 | 0 | } |
7419 | 0 | else if(!ctr && !i4_subtu_idx) |
7420 | 0 | { |
7421 | 0 | ps_recon_datastore->au1_is_chromaRecon_available[0] = 0; |
7422 | 0 | } |
7423 | | /************************************************************/ |
7424 | | /* recon loop is done for all cases including skip cu */ |
7425 | | /* This is because skipping chroma reisdual based on luma */ |
7426 | | /* skip decision can lead to chroma artifacts */ |
7427 | | /************************************************************/ |
7428 | | /************************************************************/ |
7429 | | /*In the high quality and medium speed modes, wherein chroma*/ |
7430 | | /*and luma costs are included in the total cost calculation */ |
7431 | | /*the cost is just a ssd cost, and not that obtained through*/ |
7432 | | /*iq_it path */ |
7433 | | /************************************************************/ |
7434 | 0 | if(ps_best_cu_prms->u1_skip_flag == 0) |
7435 | 0 | { |
7436 | 0 | WORD32 tu_bits; |
7437 | |
|
7438 | 0 | cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
7439 | 0 | ps_ctxt, |
7440 | 0 | pu1_cur_pred, |
7441 | 0 | pred_strd, |
7442 | 0 | pu1_cur_src, |
7443 | 0 | chrm_src_stride, |
7444 | 0 | pi2_cur_deq_data, |
7445 | 0 | deq_data_strd, |
7446 | 0 | pu1_cur_recon, |
7447 | 0 | i4_recon_stride, |
7448 | 0 | pu1_ecd_data + total_bytes_offset, |
7449 | 0 | ps_ctxt->au1_cu_csbf, |
7450 | 0 | ps_ctxt->i4_cu_csbf_strd, |
7451 | 0 | trans_size, |
7452 | 0 | scan_idx, |
7453 | 0 | PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag, |
7454 | 0 | &num_bytes, |
7455 | 0 | &tu_bits, |
7456 | 0 | &zero_cols, |
7457 | 0 | &zero_rows, |
7458 | 0 | &u1_is_recon_available, |
7459 | 0 | i4_perform_sbh, |
7460 | 0 | i4_perform_rdoq, |
7461 | 0 | &trans_ssd_u, |
7462 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
7463 | 0 | i4_alpha_stim_multiplier, |
7464 | 0 | u1_is_cu_noisy, |
7465 | 0 | #endif |
7466 | 0 | ps_best_cu_prms->u1_skip_flag, |
7467 | 0 | u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
7468 | 0 | U_PLANE); |
7469 | |
|
7470 | 0 | if(u1_compute_spatial_ssd && u1_is_recon_available) |
7471 | 0 | { |
7472 | 0 | ps_recon_datastore |
7473 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
7474 | 0 | [i4_subtu_idx] = 0; |
7475 | 0 | } |
7476 | 0 | else |
7477 | 0 | { |
7478 | 0 | ps_recon_datastore |
7479 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
7480 | 0 | [i4_subtu_idx] = UCHAR_MAX; |
7481 | 0 | } |
7482 | |
|
7483 | | #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
7484 | | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
7485 | | { |
7486 | | #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
7487 | | trans_ssd_u = ihevce_inject_stim_into_distortion( |
7488 | | pu1_cur_src, |
7489 | | chrm_src_stride, |
7490 | | pu1_cur_pred, |
7491 | | pred_strd, |
7492 | | trans_ssd_u, |
7493 | | i4_alpha_stim_multiplier, |
7494 | | trans_size, |
7495 | | 0, |
7496 | | ps_ctxt->u1_enable_psyRDOPT, |
7497 | | U_PLANE); |
7498 | | #else |
7499 | | if(u1_compute_spatial_ssd && u1_is_recon_available) |
7500 | | { |
7501 | | trans_ssd_u = ihevce_inject_stim_into_distortion( |
7502 | | pu1_cur_src, |
7503 | | chrm_src_stride, |
7504 | | pu1_cur_recon, |
7505 | | i4_recon_stride, |
7506 | | trans_ssd_u, |
7507 | | i4_alpha_stim_multiplier, |
7508 | | trans_size, |
7509 | | 0, |
7510 | | ps_ctxt->u1_enable_psyRDOPT, |
7511 | | U_PLANE); |
7512 | | } |
7513 | | else |
7514 | | { |
7515 | | trans_ssd_u = ihevce_inject_stim_into_distortion( |
7516 | | pu1_cur_src, |
7517 | | chrm_src_stride, |
7518 | | pu1_cur_pred, |
7519 | | pred_strd, |
7520 | | trans_ssd_u, |
7521 | | i4_alpha_stim_multiplier, |
7522 | | trans_size, |
7523 | | 0, |
7524 | | ps_ctxt->u1_enable_psyRDOPT, |
7525 | | U_PLANE); |
7526 | | } |
7527 | | #endif |
7528 | | } |
7529 | | #endif |
7530 | |
|
7531 | 0 | curr_cb_cod_cost = |
7532 | 0 | trans_ssd_u + |
7533 | 0 | COMPUTE_RATE_COST_CLIP30( |
7534 | 0 | tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
7535 | |
|
7536 | 0 | chrm_tu_bits += tu_bits; |
7537 | 0 | i4_bits_cb += tu_bits; |
7538 | | |
7539 | | /* RDOPT copy States : New updated after curr TU to TU init */ |
7540 | 0 | if(0 != cbf) |
7541 | 0 | { |
7542 | 0 | COPY_CABAC_STATES( |
7543 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
7544 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt |
7545 | 0 | .as_cu_entropy_ctxt[rd_opt_curr_idx] |
7546 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
7547 | 0 | IHEVC_CAB_CTXT_END); |
7548 | 0 | } |
7549 | | /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
7550 | 0 | else |
7551 | 0 | { |
7552 | 0 | COPY_CABAC_STATES( |
7553 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt |
7554 | 0 | .as_cu_entropy_ctxt[rd_opt_curr_idx] |
7555 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
7556 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
7557 | 0 | IHEVC_CAB_CTXT_END); |
7558 | 0 | } |
7559 | | |
7560 | | /* If Intra and TU=CU/2, need recon for next TUs */ |
7561 | 0 | if(calc_recon) |
7562 | 0 | { |
7563 | 0 | ihevce_chroma_it_recon_fxn( |
7564 | 0 | ps_ctxt, |
7565 | 0 | pi2_cur_deq_data, |
7566 | 0 | deq_data_strd, |
7567 | 0 | pu1_cur_pred, |
7568 | 0 | pred_strd, |
7569 | 0 | pu1_cur_recon, |
7570 | 0 | i4_recon_stride, |
7571 | 0 | (pu1_ecd_data + total_bytes_offset), |
7572 | 0 | trans_size, |
7573 | 0 | cbf, |
7574 | 0 | zero_cols, |
7575 | 0 | zero_rows, |
7576 | 0 | U_PLANE); |
7577 | |
|
7578 | 0 | ps_recon_datastore |
7579 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
7580 | 0 | [i4_subtu_idx] = 0; |
7581 | 0 | } |
7582 | 0 | else |
7583 | 0 | { |
7584 | 0 | ps_recon_datastore |
7585 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
7586 | 0 | [i4_subtu_idx] = UCHAR_MAX; |
7587 | 0 | } |
7588 | 0 | } |
7589 | 0 | else |
7590 | 0 | { |
7591 | | /* num bytes is set to 0 */ |
7592 | 0 | num_bytes = 0; |
7593 | | |
7594 | | /* cbf is returned as 0 */ |
7595 | 0 | cbf = 0; |
7596 | |
|
7597 | 0 | curr_cb_cod_cost = trans_ssd_u = |
7598 | |
|
7599 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
7600 | 0 | pu1_cur_pred, |
7601 | 0 | pu1_cur_src, |
7602 | 0 | pred_strd, |
7603 | 0 | chrm_src_stride, |
7604 | 0 | trans_size, |
7605 | 0 | trans_size, |
7606 | 0 | U_PLANE); |
7607 | |
|
7608 | 0 | if(u1_compute_spatial_ssd) |
7609 | 0 | { |
7610 | | /* buffer copy fromp pred to recon */ |
7611 | |
|
7612 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
7613 | 0 | pu1_cur_pred, |
7614 | 0 | pred_strd, |
7615 | 0 | pu1_cur_recon, |
7616 | 0 | i4_recon_stride, |
7617 | 0 | trans_size, |
7618 | 0 | trans_size, |
7619 | 0 | U_PLANE); |
7620 | |
|
7621 | 0 | ps_recon_datastore |
7622 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
7623 | 0 | [i4_subtu_idx] = 0; |
7624 | 0 | } |
7625 | |
|
7626 | 0 | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
7627 | 0 | { |
7628 | 0 | trans_ssd_u = ihevce_inject_stim_into_distortion( |
7629 | 0 | pu1_cur_src, |
7630 | 0 | chrm_src_stride, |
7631 | 0 | pu1_cur_pred, |
7632 | 0 | pred_strd, |
7633 | 0 | trans_ssd_u, |
7634 | 0 | i4_alpha_stim_multiplier, |
7635 | 0 | trans_size, |
7636 | 0 | 0, |
7637 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
7638 | 0 | U_PLANE); |
7639 | 0 | } |
7640 | |
|
7641 | 0 | #if ENABLE_INTER_ZCU_COST |
7642 | | #if !WEIGH_CHROMA_COST |
7643 | | /* cbf = 0, accumulate cu not coded cost */ |
7644 | | ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost; |
7645 | | #else |
7646 | | /* cbf = 0, accumulate cu not coded cost */ |
7647 | |
|
7648 | 0 | ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( |
7649 | 0 | (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
7650 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
7651 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
7652 | 0 | #endif |
7653 | 0 | #endif |
7654 | 0 | } |
7655 | |
|
7656 | | #if !WEIGH_CHROMA_COST |
7657 | | curr_rdopt_cost += curr_cb_cod_cost; |
7658 | | #else |
7659 | 0 | curr_rdopt_cost += |
7660 | 0 | ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
7661 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
7662 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
7663 | 0 | #endif |
7664 | 0 | chrm_cod_cost += curr_cb_cod_cost; |
7665 | 0 | i8_ssd_cb += trans_ssd_u; |
7666 | |
|
7667 | 0 | if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
7668 | 0 | { |
7669 | | /* Early exit : If the current running cost exceeds |
7670 | | the prev. best mode cost, break */ |
7671 | 0 | if(curr_rdopt_cost > prev_best_rdopt_cost) |
7672 | 0 | { |
7673 | 0 | u1_is_early_exit_condition_satisfied = 1; |
7674 | 0 | break; |
7675 | 0 | } |
7676 | 0 | } |
7677 | | |
7678 | | /* inter cu is coded if any of the tu is coded in it */ |
7679 | 0 | ps_best_cu_prms->u1_is_cu_coded |= cbf; |
7680 | | |
7681 | | /* update CB related params */ |
7682 | 0 | ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] = |
7683 | 0 | total_bytes_offset + init_bytes_offset; |
7684 | |
|
7685 | 0 | if(0 == i4_subtu_idx) |
7686 | 0 | { |
7687 | 0 | ps_tu->s_tu.b1_cb_cbf = cbf; |
7688 | 0 | } |
7689 | 0 | else |
7690 | 0 | { |
7691 | 0 | ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf; |
7692 | 0 | } |
7693 | |
|
7694 | 0 | total_bytes_offset += num_bytes; |
7695 | |
|
7696 | 0 | ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols; |
7697 | 0 | ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows; |
7698 | 0 | ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes; |
7699 | | |
7700 | | /* recon loop is done for non skip cases */ |
7701 | 0 | if(ps_best_cu_prms->u1_skip_flag == 0) |
7702 | 0 | { |
7703 | 0 | WORD32 tu_bits; |
7704 | |
|
7705 | 0 | cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
7706 | 0 | ps_ctxt, |
7707 | 0 | pu1_cur_pred, |
7708 | 0 | pred_strd, |
7709 | 0 | pu1_cur_src, |
7710 | 0 | chrm_src_stride, |
7711 | 0 | pi2_cur_deq_data + trans_size, |
7712 | 0 | deq_data_strd, |
7713 | 0 | pu1_cur_recon, |
7714 | 0 | i4_recon_stride, |
7715 | 0 | pu1_ecd_data + total_bytes_offset, |
7716 | 0 | ps_ctxt->au1_cu_csbf, |
7717 | 0 | ps_ctxt->i4_cu_csbf_strd, |
7718 | 0 | trans_size, |
7719 | 0 | scan_idx, |
7720 | 0 | PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag, |
7721 | 0 | &num_bytes, |
7722 | 0 | &tu_bits, |
7723 | 0 | &zero_cols, |
7724 | 0 | &zero_rows, |
7725 | 0 | &u1_is_recon_available, |
7726 | 0 | i4_perform_sbh, |
7727 | 0 | i4_perform_rdoq, |
7728 | 0 | &trans_ssd_v, |
7729 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
7730 | 0 | i4_alpha_stim_multiplier, |
7731 | 0 | u1_is_cu_noisy, |
7732 | 0 | #endif |
7733 | 0 | ps_best_cu_prms->u1_skip_flag, |
7734 | 0 | u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
7735 | 0 | V_PLANE); |
7736 | |
|
7737 | 0 | if(u1_compute_spatial_ssd && u1_is_recon_available) |
7738 | 0 | { |
7739 | 0 | ps_recon_datastore |
7740 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
7741 | 0 | [i4_subtu_idx] = 0; |
7742 | 0 | } |
7743 | 0 | else |
7744 | 0 | { |
7745 | 0 | ps_recon_datastore |
7746 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
7747 | 0 | [i4_subtu_idx] = UCHAR_MAX; |
7748 | 0 | } |
7749 | |
|
7750 | | #if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
7751 | | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
7752 | | { |
7753 | | #if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT |
7754 | | trans_ssd_v = ihevce_inject_stim_into_distortion( |
7755 | | pu1_cur_src, |
7756 | | chrm_src_stride, |
7757 | | pu1_cur_pred, |
7758 | | pred_strd, |
7759 | | trans_ssd_v, |
7760 | | i4_alpha_stim_multiplier, |
7761 | | trans_size, |
7762 | | 0, |
7763 | | ps_ctxt->u1_enable_psyRDOPT, |
7764 | | V_PLANE); |
7765 | | #else |
7766 | | if(u1_compute_spatial_ssd && u1_is_recon_available) |
7767 | | { |
7768 | | trans_ssd_v = ihevce_inject_stim_into_distortion( |
7769 | | pu1_cur_src, |
7770 | | chrm_src_stride, |
7771 | | pu1_cur_recon, |
7772 | | i4_recon_stride, |
7773 | | trans_ssd_v, |
7774 | | i4_alpha_stim_multiplier, |
7775 | | trans_size, |
7776 | | 0, |
7777 | | ps_ctxt->u1_enable_psyRDOPT, |
7778 | | V_PLANE); |
7779 | | } |
7780 | | else |
7781 | | { |
7782 | | trans_ssd_v = ihevce_inject_stim_into_distortion( |
7783 | | pu1_cur_src, |
7784 | | chrm_src_stride, |
7785 | | pu1_cur_pred, |
7786 | | pred_strd, |
7787 | | trans_ssd_v, |
7788 | | i4_alpha_stim_multiplier, |
7789 | | trans_size, |
7790 | | 0, |
7791 | | ps_ctxt->u1_enable_psyRDOPT, |
7792 | | V_PLANE); |
7793 | | } |
7794 | | #endif |
7795 | | } |
7796 | | #endif |
7797 | |
|
7798 | 0 | curr_cr_cod_cost = |
7799 | 0 | trans_ssd_v + |
7800 | 0 | COMPUTE_RATE_COST_CLIP30( |
7801 | 0 | tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
7802 | 0 | chrm_tu_bits += tu_bits; |
7803 | 0 | i4_bits_cr += tu_bits; |
7804 | | |
7805 | | /* RDOPT copy States : New updated after curr TU to TU init */ |
7806 | 0 | if(0 != cbf) |
7807 | 0 | { |
7808 | 0 | COPY_CABAC_STATES( |
7809 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
7810 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt |
7811 | 0 | .as_cu_entropy_ctxt[rd_opt_curr_idx] |
7812 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
7813 | 0 | IHEVC_CAB_CTXT_END); |
7814 | 0 | } |
7815 | | /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
7816 | 0 | else |
7817 | 0 | { |
7818 | 0 | COPY_CABAC_STATES( |
7819 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt |
7820 | 0 | .as_cu_entropy_ctxt[rd_opt_curr_idx] |
7821 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
7822 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
7823 | 0 | IHEVC_CAB_CTXT_END); |
7824 | 0 | } |
7825 | | |
7826 | | /* If Intra and TU=CU/2, need recon for next TUs */ |
7827 | 0 | if(calc_recon) |
7828 | 0 | { |
7829 | 0 | ihevce_chroma_it_recon_fxn( |
7830 | 0 | ps_ctxt, |
7831 | 0 | (pi2_cur_deq_data + trans_size), |
7832 | 0 | deq_data_strd, |
7833 | 0 | pu1_cur_pred, |
7834 | 0 | pred_strd, |
7835 | 0 | pu1_cur_recon, |
7836 | 0 | i4_recon_stride, |
7837 | 0 | (pu1_ecd_data + total_bytes_offset), |
7838 | 0 | trans_size, |
7839 | 0 | cbf, |
7840 | 0 | zero_cols, |
7841 | 0 | zero_rows, |
7842 | 0 | V_PLANE); |
7843 | |
|
7844 | 0 | ps_recon_datastore |
7845 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
7846 | 0 | [i4_subtu_idx] = 0; |
7847 | 0 | } |
7848 | 0 | else |
7849 | 0 | { |
7850 | 0 | ps_recon_datastore |
7851 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
7852 | 0 | [i4_subtu_idx] = UCHAR_MAX; |
7853 | 0 | } |
7854 | 0 | } |
7855 | 0 | else |
7856 | 0 | { |
7857 | | /* num bytes is set to 0 */ |
7858 | 0 | num_bytes = 0; |
7859 | | |
7860 | | /* cbf is returned as 0 */ |
7861 | 0 | cbf = 0; |
7862 | |
|
7863 | 0 | curr_cr_cod_cost = trans_ssd_v = |
7864 | |
|
7865 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
7866 | 0 | pu1_cur_pred, |
7867 | 0 | pu1_cur_src, |
7868 | 0 | pred_strd, |
7869 | 0 | chrm_src_stride, |
7870 | 0 | trans_size, |
7871 | 0 | trans_size, |
7872 | 0 | V_PLANE); |
7873 | |
|
7874 | 0 | if(u1_compute_spatial_ssd) |
7875 | 0 | { |
7876 | | /* buffer copy fromp pred to recon */ |
7877 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
7878 | 0 | pu1_cur_pred, |
7879 | 0 | pred_strd, |
7880 | 0 | pu1_cur_recon, |
7881 | 0 | i4_recon_stride, |
7882 | 0 | trans_size, |
7883 | 0 | trans_size, |
7884 | 0 | V_PLANE); |
7885 | |
|
7886 | 0 | ps_recon_datastore |
7887 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
7888 | 0 | [i4_subtu_idx] = 0; |
7889 | 0 | } |
7890 | |
|
7891 | 0 | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
7892 | 0 | { |
7893 | 0 | trans_ssd_v = ihevce_inject_stim_into_distortion( |
7894 | 0 | pu1_cur_src, |
7895 | 0 | chrm_src_stride, |
7896 | 0 | pu1_cur_pred, |
7897 | 0 | pred_strd, |
7898 | 0 | trans_ssd_v, |
7899 | 0 | i4_alpha_stim_multiplier, |
7900 | 0 | trans_size, |
7901 | 0 | 0, |
7902 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
7903 | 0 | V_PLANE); |
7904 | 0 | } |
7905 | |
|
7906 | 0 | #if ENABLE_INTER_ZCU_COST |
7907 | | #if !WEIGH_CHROMA_COST |
7908 | | /* cbf = 0, accumulate cu not coded cost */ |
7909 | | ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost; |
7910 | | #else |
7911 | | /* cbf = 0, accumulate cu not coded cost */ |
7912 | |
|
7913 | 0 | ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( |
7914 | 0 | (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
7915 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
7916 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
7917 | 0 | #endif |
7918 | 0 | #endif |
7919 | 0 | } |
7920 | |
|
7921 | | #if !WEIGH_CHROMA_COST |
7922 | | curr_rdopt_cost += curr_cr_cod_cost; |
7923 | | #else |
7924 | 0 | curr_rdopt_cost += |
7925 | 0 | ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor + |
7926 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
7927 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
7928 | 0 | #endif |
7929 | |
|
7930 | 0 | chrm_cod_cost += curr_cr_cod_cost; |
7931 | 0 | i8_ssd_cr += trans_ssd_v; |
7932 | |
|
7933 | 0 | if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1) |
7934 | 0 | { |
7935 | | /* Early exit : If the current running cost exceeds |
7936 | | the prev. best mode cost, break */ |
7937 | 0 | if(curr_rdopt_cost > prev_best_rdopt_cost) |
7938 | 0 | { |
7939 | 0 | u1_is_early_exit_condition_satisfied = 1; |
7940 | 0 | break; |
7941 | 0 | } |
7942 | 0 | } |
7943 | | |
7944 | | /* inter cu is coded if any of the tu is coded in it */ |
7945 | 0 | ps_best_cu_prms->u1_is_cu_coded |= cbf; |
7946 | | |
7947 | | /* update CR related params */ |
7948 | 0 | ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] = |
7949 | 0 | total_bytes_offset + init_bytes_offset; |
7950 | |
|
7951 | 0 | if(0 == i4_subtu_idx) |
7952 | 0 | { |
7953 | 0 | ps_tu->s_tu.b1_cr_cbf = cbf; |
7954 | 0 | } |
7955 | 0 | else |
7956 | 0 | { |
7957 | 0 | ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf; |
7958 | 0 | } |
7959 | |
|
7960 | 0 | total_bytes_offset += num_bytes; |
7961 | |
|
7962 | 0 | ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols; |
7963 | 0 | ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows; |
7964 | 0 | ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes; |
7965 | 0 | } |
7966 | 0 | else |
7967 | 0 | { |
7968 | 0 | ps_recon_datastore |
7969 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] = |
7970 | 0 | UCHAR_MAX; |
7971 | 0 | ps_recon_datastore |
7972 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] = |
7973 | 0 | UCHAR_MAX; |
7974 | 0 | } |
7975 | 0 | } |
7976 | |
|
7977 | 0 | if(u1_is_early_exit_condition_satisfied) |
7978 | 0 | { |
7979 | 0 | break; |
7980 | 0 | } |
7981 | | |
7982 | | /* loop increments */ |
7983 | 0 | ps_tu++; |
7984 | 0 | ps_tu_temp_prms++; |
7985 | 0 | } |
7986 | | |
7987 | | /* Signal as luma mode. HIGH_QUALITY may update it */ |
7988 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; |
7989 | | |
7990 | | /* modify the cost chrm_cod_cost */ |
7991 | 0 | if(ps_ctxt->u1_enable_psyRDOPT) |
7992 | 0 | { |
7993 | 0 | UWORD8 *pu1_recon_cu; |
7994 | 0 | WORD32 recon_stride; |
7995 | 0 | WORD32 curr_pos_x; |
7996 | 0 | WORD32 curr_pos_y; |
7997 | 0 | WORD32 start_index; |
7998 | 0 | WORD32 num_horz_cu_in_ctb; |
7999 | 0 | WORD32 had_block_size; |
8000 | | /* tODO: sreenivasa ctb size has to be used appropriately */ |
8001 | 0 | had_block_size = 8; |
8002 | 0 | num_horz_cu_in_ctb = 2 * 64 / had_block_size; |
8003 | |
|
8004 | 0 | curr_pos_x = cu_pos_x << 3; /* pel units */ |
8005 | 0 | curr_pos_y = cu_pos_y << 3; /* pel units */ |
8006 | 0 | recon_stride = i4_recon_stride; |
8007 | 0 | pu1_recon_cu = pu1_recon; |
8008 | | |
8009 | | /* start index to index the source satd of curr cu int he current ctb*/ |
8010 | 0 | start_index = 2 * (curr_pos_x / had_block_size) + |
8011 | 0 | (curr_pos_y / had_block_size) * num_horz_cu_in_ctb; |
8012 | |
|
8013 | 0 | { |
8014 | 0 | chrm_cod_cost += ihevce_psy_rd_cost_croma( |
8015 | 0 | ps_ctxt->ai4_source_chroma_satd, |
8016 | 0 | pu1_recon, |
8017 | 0 | recon_stride, |
8018 | 0 | 1, // |
8019 | 0 | cu_size, |
8020 | 0 | 0, // pic type |
8021 | 0 | 0, //layer id |
8022 | 0 | ps_ctxt->i4_satd_lamda, // lambda |
8023 | 0 | start_index, |
8024 | 0 | ps_ctxt->u1_is_input_data_hbd, // 8 bit |
8025 | 0 | ps_ctxt->u1_chroma_array_type, |
8026 | 0 | &ps_ctxt->s_cmn_opt_func |
8027 | |
|
8028 | 0 | ); // chroma subsampling 420 |
8029 | 0 | } |
8030 | 0 | } |
8031 | 0 | } |
8032 | 0 | else |
8033 | 0 | { |
8034 | 0 | u1_is_mode_eq_chroma_satd_mode = 1; |
8035 | 0 | chrm_cod_cost = MAX_COST_64; |
8036 | 0 | } |
8037 | | |
8038 | | /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */ |
8039 | 0 | if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) && |
8040 | 0 | (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)) |
8041 | 0 | { |
8042 | 0 | if(64 == cu_size) |
8043 | 0 | { |
8044 | 0 | ASSERT(TU_EQ_CU != func_proc_mode); |
8045 | 0 | } |
8046 | | |
8047 | 0 | if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode] |
8048 | 0 | .i8_chroma_best_rdopt < chrm_cod_cost) |
8049 | 0 | { |
8050 | 0 | UWORD8 *pu1_src; |
8051 | 0 | UWORD8 *pu1_ecd_data_src_cb; |
8052 | 0 | UWORD8 *pu1_ecd_data_src_cr; |
8053 | |
|
8054 | 0 | chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt = |
8055 | 0 | &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]; |
8056 | |
|
8057 | 0 | UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0]; |
8058 | 0 | WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 }; |
8059 | 0 | WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 }; |
8060 | |
|
8061 | 0 | pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0]; |
8062 | 0 | chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt; |
8063 | 0 | chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode; |
8064 | 0 | chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits; |
8065 | |
|
8066 | 0 | if(u1_is_mode_eq_chroma_satd_mode) |
8067 | 0 | { |
8068 | 0 | chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode; |
8069 | 0 | } |
8070 | | |
8071 | | /*Resetting total_num_bytes_to 0*/ |
8072 | 0 | total_bytes_offset = 0; |
8073 | | |
8074 | | /* Update the CABAC state corresponding to chroma only */ |
8075 | | /* Chroma Cbf */ |
8076 | 0 | memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2); |
8077 | | /* Chroma transform skip */ |
8078 | 0 | memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1); |
8079 | | /* Chroma last coeff x prefix */ |
8080 | 0 | memcpy( |
8081 | 0 | pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15, |
8082 | 0 | pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15, |
8083 | 0 | 3); |
8084 | | /* Chroma last coeff y prefix */ |
8085 | 0 | memcpy( |
8086 | 0 | pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15, |
8087 | 0 | pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15, |
8088 | 0 | 3); |
8089 | | /* Chroma csbf */ |
8090 | 0 | memcpy( |
8091 | 0 | pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2, |
8092 | 0 | pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2, |
8093 | 0 | 2); |
8094 | | /* Chroma sig coeff flags */ |
8095 | 0 | memcpy( |
8096 | 0 | pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15); |
8097 | | /* Chroma absgt1 flags */ |
8098 | 0 | memcpy( |
8099 | 0 | pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16, |
8100 | 0 | pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16, |
8101 | 0 | 8); |
8102 | | /* Chroma absgt2 flags */ |
8103 | 0 | memcpy( |
8104 | 0 | pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4, |
8105 | 0 | pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4, |
8106 | 0 | 2); |
8107 | |
|
8108 | 0 | ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0]; |
8109 | 0 | ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
8110 | | |
8111 | | /* update to luma decision as we update chroma in final mode */ |
8112 | 0 | ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old; |
8113 | |
|
8114 | 0 | for(ctr = 0; ctr < u1_num_tus; ctr++) |
8115 | 0 | { |
8116 | 0 | for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++) |
8117 | 0 | { |
8118 | 0 | WORD32 cbf; |
8119 | 0 | WORD32 num_bytes; |
8120 | |
|
8121 | 0 | pu1_ecd_data_src_cb = |
8122 | 0 | &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0]; |
8123 | 0 | pu1_ecd_data_src_cr = |
8124 | 0 | &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0]; |
8125 | | |
8126 | | /* check if chroma present flag is set */ |
8127 | 0 | if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx) |
8128 | 0 | { |
8129 | 0 | UWORD8 *pu1_cur_pred_dest; |
8130 | 0 | UWORD8 *pu1_cur_pred_src; |
8131 | 0 | WORD32 pred_src_strd; |
8132 | 0 | WORD16 *pi2_cur_deq_data_dest; |
8133 | 0 | WORD16 *pi2_cur_deq_data_src_cb; |
8134 | 0 | WORD16 *pi2_cur_deq_data_src_cr; |
8135 | 0 | WORD32 deq_src_strd; |
8136 | |
|
8137 | 0 | WORD32 curr_pos_x, curr_pos_y; |
8138 | |
|
8139 | 0 | trans_size = ps_tu->s_tu.b3_size; |
8140 | 0 | trans_size = (1 << (trans_size + 1)); /* in chroma units */ |
8141 | | |
8142 | | /*Deriving stride values*/ |
8143 | 0 | pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride; |
8144 | 0 | deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride; |
8145 | | |
8146 | | /* since 2x2 transform is not allowed for chroma*/ |
8147 | 0 | if(2 == trans_size) |
8148 | 0 | { |
8149 | 0 | trans_size = 4; |
8150 | 0 | } |
8151 | | |
8152 | | /* get the current tu posx and posy w.r.t to cu */ |
8153 | 0 | curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3); |
8154 | 0 | curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) + |
8155 | 0 | (i4_subtu_idx * trans_size); |
8156 | | |
8157 | | /* 420sp case only vertical height will be half */ |
8158 | 0 | if(0 == u1_is_422) |
8159 | 0 | { |
8160 | 0 | curr_pos_y >>= 1; |
8161 | 0 | } |
8162 | | |
8163 | | /* increment the pointers to start of current TU */ |
8164 | 0 | pu1_cur_pred_src = |
8165 | 0 | ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x); |
8166 | 0 | pu1_cur_pred_src += (curr_pos_y * pred_src_strd); |
8167 | 0 | pu1_cur_pred_dest = (pu1_pred + curr_pos_x); |
8168 | 0 | pu1_cur_pred_dest += (curr_pos_y * pred_strd); |
8169 | |
|
8170 | 0 | pi2_cur_deq_data_src_cb = |
8171 | 0 | &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1); |
8172 | 0 | pi2_cur_deq_data_src_cr = |
8173 | 0 | &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1); |
8174 | 0 | pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd); |
8175 | 0 | pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd); |
8176 | 0 | pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x; |
8177 | 0 | pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd); |
8178 | | |
8179 | | /*Overwriting deq data with that belonging to the winning special mode |
8180 | | (luma mode != chroma mode) |
8181 | | ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to |
8182 | | correspondingly manipulate to copy WORD16 data*/ |
8183 | |
|
8184 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
8185 | 0 | (UWORD8 *)pi2_cur_deq_data_dest, |
8186 | 0 | (deq_data_strd << 1), |
8187 | 0 | (UWORD8 *)pi2_cur_deq_data_src_cb, |
8188 | 0 | (deq_src_strd << 1), |
8189 | 0 | (trans_size << 1), |
8190 | 0 | trans_size); |
8191 | |
|
8192 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
8193 | 0 | (UWORD8 *)(pi2_cur_deq_data_dest + trans_size), |
8194 | 0 | (deq_data_strd << 1), |
8195 | 0 | (UWORD8 *)pi2_cur_deq_data_src_cr, |
8196 | 0 | (deq_src_strd << 1), |
8197 | 0 | (trans_size << 1), |
8198 | 0 | trans_size); |
8199 | | |
8200 | | /*Overwriting pred data with that belonging to the winning special mode |
8201 | | (luma mode != chroma mode)*/ |
8202 | |
|
8203 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
8204 | 0 | pu1_cur_pred_dest, |
8205 | 0 | pred_strd, |
8206 | 0 | pu1_cur_pred_src, |
8207 | 0 | pred_src_strd, |
8208 | 0 | (trans_size << 1), |
8209 | 0 | trans_size); |
8210 | |
|
8211 | 0 | num_bytes = ps_chr_intra_satd_ctxt |
8212 | 0 | ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr]; |
8213 | 0 | cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr]; |
8214 | | /* inter cu is coded if any of the tu is coded in it */ |
8215 | 0 | ps_best_cu_prms->u1_is_cu_coded |= cbf; |
8216 | | |
8217 | | /* update CB related params */ |
8218 | 0 | ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] = |
8219 | 0 | total_bytes_offset + init_bytes_offset; |
8220 | |
|
8221 | 0 | if(0 == i4_subtu_idx) |
8222 | 0 | { |
8223 | 0 | ps_tu->s_tu.b1_cb_cbf = cbf; |
8224 | 0 | } |
8225 | 0 | else |
8226 | 0 | { |
8227 | 0 | ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf; |
8228 | 0 | } |
8229 | | |
8230 | | /*Overwriting the cb ecd data corresponding to the special mode*/ |
8231 | 0 | if(0 != num_bytes) |
8232 | 0 | { |
8233 | 0 | memcpy( |
8234 | 0 | (pu1_ecd_data + total_bytes_offset), |
8235 | 0 | pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx], |
8236 | 0 | num_bytes); |
8237 | 0 | } |
8238 | |
|
8239 | 0 | total_bytes_offset += num_bytes; |
8240 | 0 | ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes; |
8241 | 0 | ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes; |
8242 | |
|
8243 | 0 | num_bytes = ps_chr_intra_satd_ctxt |
8244 | 0 | ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr]; |
8245 | 0 | cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr]; |
8246 | | /* inter cu is coded if any of the tu is coded in it */ |
8247 | 0 | ps_best_cu_prms->u1_is_cu_coded |= cbf; |
8248 | | |
8249 | | /*Overwriting the cr ecd data corresponding to the special mode*/ |
8250 | 0 | if(0 != num_bytes) |
8251 | 0 | { |
8252 | 0 | memcpy( |
8253 | 0 | (pu1_ecd_data + total_bytes_offset), |
8254 | 0 | pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx], |
8255 | 0 | num_bytes); |
8256 | 0 | } |
8257 | | |
8258 | | /* update CR related params */ |
8259 | 0 | ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] = |
8260 | 0 | total_bytes_offset + init_bytes_offset; |
8261 | |
|
8262 | 0 | if(0 == i4_subtu_idx) |
8263 | 0 | { |
8264 | 0 | ps_tu->s_tu.b1_cr_cbf = cbf; |
8265 | 0 | } |
8266 | 0 | else |
8267 | 0 | { |
8268 | 0 | ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf; |
8269 | 0 | } |
8270 | |
|
8271 | 0 | total_bytes_offset += num_bytes; |
8272 | 0 | ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes; |
8273 | | |
8274 | | /*Updating zero rows and zero cols*/ |
8275 | 0 | ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = |
8276 | 0 | ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr]; |
8277 | 0 | ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = |
8278 | 0 | ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr]; |
8279 | 0 | ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = |
8280 | 0 | ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr]; |
8281 | 0 | ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = |
8282 | 0 | ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr]; |
8283 | |
|
8284 | 0 | ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes; |
8285 | |
|
8286 | 0 | if((u1_num_tus > 1) && |
8287 | 0 | ps_recon_datastore->au1_is_chromaRecon_available[2]) |
8288 | 0 | { |
8289 | 0 | ps_recon_datastore |
8290 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
8291 | 0 | [i4_subtu_idx] = 2; |
8292 | 0 | ps_recon_datastore |
8293 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
8294 | 0 | [i4_subtu_idx] = 2; |
8295 | 0 | } |
8296 | 0 | else if( |
8297 | 0 | (1 == u1_num_tus) && |
8298 | 0 | ps_recon_datastore->au1_is_chromaRecon_available[1]) |
8299 | 0 | { |
8300 | 0 | ps_recon_datastore |
8301 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
8302 | 0 | [i4_subtu_idx] = 1; |
8303 | 0 | ps_recon_datastore |
8304 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
8305 | 0 | [i4_subtu_idx] = 1; |
8306 | 0 | } |
8307 | 0 | else |
8308 | 0 | { |
8309 | 0 | ps_recon_datastore |
8310 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr] |
8311 | 0 | [i4_subtu_idx] = UCHAR_MAX; |
8312 | 0 | ps_recon_datastore |
8313 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr] |
8314 | 0 | [i4_subtu_idx] = UCHAR_MAX; |
8315 | 0 | } |
8316 | 0 | } |
8317 | 0 | } |
8318 | | |
8319 | | /* loop increments */ |
8320 | 0 | ps_tu++; |
8321 | 0 | ps_tu_temp_prms++; |
8322 | 0 | } |
8323 | 0 | } |
8324 | |
|
8325 | 0 | if(!u1_is_422) |
8326 | 0 | { |
8327 | 0 | if(chrm_pred_mode == luma_pred_mode) |
8328 | 0 | { |
8329 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; |
8330 | 0 | } |
8331 | 0 | else if(chrm_pred_mode == 0) |
8332 | 0 | { |
8333 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 0; |
8334 | 0 | } |
8335 | 0 | else if(chrm_pred_mode == 1) |
8336 | 0 | { |
8337 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 3; |
8338 | 0 | } |
8339 | 0 | else if(chrm_pred_mode == 10) |
8340 | 0 | { |
8341 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 2; |
8342 | 0 | } |
8343 | 0 | else if(chrm_pred_mode == 26) |
8344 | 0 | { |
8345 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 1; |
8346 | 0 | } |
8347 | 0 | else |
8348 | 0 | { |
8349 | 0 | ASSERT(0); /*Should not come here*/ |
8350 | 0 | } |
8351 | 0 | } |
8352 | 0 | else |
8353 | 0 | { |
8354 | 0 | if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode]) |
8355 | 0 | { |
8356 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 4; |
8357 | 0 | } |
8358 | 0 | else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0]) |
8359 | 0 | { |
8360 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 0; |
8361 | 0 | } |
8362 | 0 | else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1]) |
8363 | 0 | { |
8364 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 3; |
8365 | 0 | } |
8366 | 0 | else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10]) |
8367 | 0 | { |
8368 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 2; |
8369 | 0 | } |
8370 | 0 | else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26]) |
8371 | 0 | { |
8372 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_mode = 1; |
8373 | 0 | } |
8374 | 0 | else |
8375 | 0 | { |
8376 | 0 | ASSERT(0); /*Should not come here*/ |
8377 | 0 | } |
8378 | 0 | } |
8379 | 0 | } |
8380 | | |
8381 | | /* Store the actual chroma mode */ |
8382 | 0 | ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode; |
8383 | 0 | } |
8384 | | |
8385 | | /* update the total bytes produced */ |
8386 | 0 | ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset; |
8387 | | |
8388 | | /* store the final chrm bits accumulated */ |
8389 | 0 | *pi4_chrm_tu_bits = chrm_tu_bits; |
8390 | |
|
8391 | 0 | return (chrm_cod_cost); |
8392 | 0 | } |
8393 | | |
8394 | | /*! |
8395 | | ****************************************************************************** |
8396 | | * \if Function name : ihevce_final_rdopt_mode_prcs \endif |
8397 | | * |
8398 | | * \brief |
8399 | | * Final RDOPT mode process function. Performs Recon computation for the |
8400 | | * final mode. Re-use or Compute pred, iq-data, coeff based on the flags. |
8401 | | * |
8402 | | * \param[in] pv_ctxt : pointer to enc_loop module |
8403 | | * \param[in] ps_prms : pointer to struct containing requisite parameters |
8404 | | * |
8405 | | * \return |
8406 | | * None |
8407 | | * |
8408 | | * \author |
8409 | | * Ittiam |
8410 | | * |
8411 | | ***************************************************************************** |
8412 | | */ |
8413 | | void ihevce_final_rdopt_mode_prcs( |
8414 | | ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms) |
8415 | 0 | { |
8416 | 0 | enc_loop_cu_final_prms_t *ps_best_cu_prms; |
8417 | 0 | tu_enc_loop_out_t *ps_tu_enc_loop; |
8418 | 0 | tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms; |
8419 | 0 | nbr_avail_flags_t s_nbr; |
8420 | 0 | recon_datastore_t *ps_recon_datastore; |
8421 | |
|
8422 | 0 | ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; |
8423 | 0 | ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr; |
8424 | 0 | ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr; |
8425 | |
|
8426 | 0 | WORD32 num_tu_in_cu; |
8427 | 0 | LWORD64 rd_opt_cost; |
8428 | 0 | WORD32 ctr; |
8429 | 0 | WORD32 i4_subtu_idx; |
8430 | 0 | WORD32 cu_size; |
8431 | 0 | WORD32 cu_pos_x, cu_pos_y; |
8432 | 0 | WORD32 chrm_present_flag = 1; |
8433 | 0 | WORD32 num_bytes, total_bytes = 0; |
8434 | 0 | WORD32 chrm_ctr = 0; |
8435 | 0 | WORD32 u1_is_cu_coded; |
8436 | 0 | UWORD8 *pu1_old_ecd_data; |
8437 | 0 | UWORD8 *pu1_chrm_old_ecd_data; |
8438 | 0 | UWORD8 *pu1_cur_pred; |
8439 | 0 | WORD16 *pi2_deq_data; |
8440 | 0 | WORD16 *pi2_chrm_deq_data; |
8441 | 0 | WORD16 *pi2_cur_deq_data; |
8442 | 0 | WORD16 *pi2_cur_deq_data_chrm; |
8443 | 0 | UWORD8 *pu1_cur_luma_recon; |
8444 | 0 | UWORD8 *pu1_cur_chroma_recon; |
8445 | 0 | UWORD8 *pu1_cur_src; |
8446 | 0 | UWORD8 *pu1_cur_src_chrm; |
8447 | 0 | UWORD8 *pu1_cur_pred_chrm; |
8448 | 0 | UWORD8 *pu1_intra_pred_mode; |
8449 | 0 | UWORD32 *pu4_nbr_flags; |
8450 | 0 | LWORD64 i8_ssd; |
8451 | |
|
8452 | 0 | cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms; |
8453 | 0 | cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand; |
8454 | 0 | enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms; |
8455 | |
|
8456 | 0 | WORD32 packed_pred_mode = ps_prms->packed_pred_mode; |
8457 | 0 | WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx; |
8458 | 0 | UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src; |
8459 | 0 | WORD32 src_strd = ps_prms->src_strd; |
8460 | 0 | UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred; |
8461 | 0 | WORD32 pred_strd = ps_prms->pred_strd; |
8462 | 0 | UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm; |
8463 | 0 | WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd; |
8464 | 0 | UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data; |
8465 | 0 | UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf; |
8466 | 0 | WORD32 csbf_strd = ps_prms->csbf_strd; |
8467 | 0 | UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon; |
8468 | 0 | WORD32 recon_luma_strd = ps_prms->recon_luma_strd; |
8469 | 0 | UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon; |
8470 | 0 | WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd; |
8471 | 0 | UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x; |
8472 | 0 | UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y; |
8473 | 0 | UWORD8 u1_cu_size = ps_prms->u1_cu_size; |
8474 | 0 | WORD8 i1_cu_qp = ps_prms->i1_cu_qp; |
8475 | 0 | UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2); |
8476 | 0 | UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1; |
8477 | | /* Get the Chroma pointer and parameters */ |
8478 | 0 | UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src; |
8479 | 0 | WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride; |
8480 | 0 | UWORD8 u1_compute_spatial_ssd_luma = 0; |
8481 | 0 | UWORD8 u1_compute_spatial_ssd_chroma = 0; |
8482 | | /* Get the pointer for function selector */ |
8483 | 0 | ihevc_intra_pred_luma_ref_substitution_fptr = |
8484 | 0 | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
8485 | |
|
8486 | 0 | ihevc_intra_pred_ref_filtering_fptr = |
8487 | 0 | ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr; |
8488 | |
|
8489 | 0 | ihevc_intra_pred_chroma_ref_substitution_fptr = |
8490 | 0 | ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr; |
8491 | | |
8492 | | /* Get the best CU parameters */ |
8493 | 0 | ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx]; |
8494 | 0 | num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu; |
8495 | 0 | cu_size = ps_best_cu_prms->u1_cu_size; |
8496 | 0 | cu_pos_x = u1_cu_pos_x; |
8497 | 0 | cu_pos_y = u1_cu_pos_y; |
8498 | 0 | pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0]; |
8499 | 0 | pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0]; |
8500 | 0 | ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore; |
8501 | | |
8502 | | /* get the first TU pointer */ |
8503 | 0 | ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0]; |
8504 | | /* get the first TU only enc_loop prms pointer */ |
8505 | 0 | ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
8506 | | /*modify quant related param in ctxt based on current cu qp*/ |
8507 | 0 | if((ps_ctxt->i1_cu_qp_delta_enable)) |
8508 | 0 | { |
8509 | | /*recompute quant related param at every cu level*/ |
8510 | 0 | ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp); |
8511 | | |
8512 | | /* get frame level lambda params */ |
8513 | 0 | ihevce_get_cl_cu_lambda_prms( |
8514 | 0 | ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp); |
8515 | 0 | } |
8516 | |
|
8517 | 0 | ps_best_cu_prms->i8_cu_ssd = 0; |
8518 | 0 | ps_best_cu_prms->u4_cu_open_intra_sad = 0; |
8519 | | |
8520 | | /* For skip case : Set TU_size = CU_size and make cbf = 0 |
8521 | | so that same TU loop can be used for all modes */ |
8522 | 0 | if(PRED_MODE_SKIP == packed_pred_mode) |
8523 | 0 | { |
8524 | 0 | for(ctr = 0; ctr < num_tu_in_cu; ctr++) |
8525 | 0 | { |
8526 | 0 | ps_tu_enc_loop->s_tu.b1_y_cbf = 0; |
8527 | |
|
8528 | 0 | ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0; |
8529 | |
|
8530 | 0 | ps_tu_enc_loop++; |
8531 | 0 | ps_tu_enc_loop_temp_prms++; |
8532 | 0 | } |
8533 | | |
8534 | | /* go back to the first TU pointer */ |
8535 | 0 | ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0]; |
8536 | 0 | ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0]; |
8537 | 0 | } |
8538 | | /** For inter case, pred calculation is outside the loop **/ |
8539 | 0 | if(PRED_MODE_INTRA != packed_pred_mode) |
8540 | 0 | { |
8541 | | /**------------- Compute pred data if required --------------**/ |
8542 | 0 | if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)) |
8543 | 0 | { |
8544 | 0 | nbr_4x4_t *ps_topleft_nbr_4x4; |
8545 | 0 | nbr_4x4_t *ps_left_nbr_4x4; |
8546 | 0 | nbr_4x4_t *ps_top_nbr_4x4; |
8547 | 0 | WORD32 nbr_4x4_left_strd; |
8548 | |
|
8549 | 0 | ps_best_inter_cand->pu1_pred_data = pu1_pred; |
8550 | 0 | ps_best_inter_cand->i4_pred_data_stride = pred_strd; |
8551 | | |
8552 | | /* Get the CU nbr information */ |
8553 | 0 | ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4; |
8554 | 0 | ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4; |
8555 | 0 | ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4; |
8556 | 0 | nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd; |
8557 | | |
8558 | | /* MVP ,MVD calc and Motion compensation */ |
8559 | 0 | rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)( |
8560 | 0 | ps_ctxt, |
8561 | 0 | ps_best_inter_cand, |
8562 | 0 | u1_cu_size, |
8563 | 0 | cu_pos_x, |
8564 | 0 | cu_pos_y, |
8565 | 0 | ps_left_nbr_4x4, |
8566 | 0 | ps_top_nbr_4x4, |
8567 | 0 | ps_topleft_nbr_4x4, |
8568 | 0 | nbr_4x4_left_strd, |
8569 | 0 | rd_opt_best_idx); |
8570 | 0 | } |
8571 | | |
8572 | | /** ------ Motion Compensation for Chroma -------- **/ |
8573 | 0 | if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data) |
8574 | 0 | { |
8575 | 0 | UWORD8 *pu1_cur_pred; |
8576 | 0 | pu1_cur_pred = pu1_pred_chrm; |
8577 | | |
8578 | | /* run a loop over all the partitons in cu */ |
8579 | 0 | for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++) |
8580 | 0 | { |
8581 | 0 | pu_t *ps_pu; |
8582 | 0 | WORD32 inter_pu_wd, inter_pu_ht; |
8583 | |
|
8584 | 0 | ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr]; |
8585 | | |
8586 | | /* IF AMP then each partitions can have diff wd ht */ |
8587 | 0 | inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */ |
8588 | 0 | inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1; |
8589 | 0 | inter_pu_ht <<= u1_is_422; |
8590 | | /* chroma mc func */ |
8591 | 0 | ihevce_chroma_inter_pred_pu( |
8592 | 0 | &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd); |
8593 | 0 | if(2 == ps_best_cu_prms->u2_num_pus_in_cu) |
8594 | 0 | { |
8595 | | /* 2Nx__ partion case */ |
8596 | 0 | if(inter_pu_wd == ps_best_cu_prms->u1_cu_size) |
8597 | 0 | { |
8598 | 0 | pu1_cur_pred += (inter_pu_ht * pred_chrm_strd); |
8599 | 0 | } |
8600 | | /* __x2N partion case */ |
8601 | 0 | if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0))) |
8602 | 0 | { |
8603 | 0 | pu1_cur_pred += inter_pu_wd; |
8604 | 0 | } |
8605 | 0 | } |
8606 | 0 | } |
8607 | 0 | } |
8608 | 0 | } |
8609 | 0 | pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0]; |
8610 | 0 | pi2_chrm_deq_data = |
8611 | 0 | &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx; |
8612 | 0 | pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0]; |
8613 | 0 | pu1_chrm_old_ecd_data = |
8614 | 0 | &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx; |
8615 | | |
8616 | | /* default value for cu coded flag */ |
8617 | 0 | u1_is_cu_coded = 0; |
8618 | | |
8619 | | /* If we are re-computing coeff, set sad to 0 and start accumulating */ |
8620 | | /* else use the best cand. sad from RDOPT stage */ |
8621 | 0 | if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) |
8622 | 0 | { |
8623 | | /*init of ssd of CU accuumulated over all TU*/ |
8624 | 0 | ps_best_cu_prms->u4_cu_sad = 0; |
8625 | | |
8626 | | /* reset the luma residual bits */ |
8627 | 0 | ps_best_cu_prms->u4_cu_luma_res_bits = 0; |
8628 | 0 | } |
8629 | |
|
8630 | 0 | if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data) |
8631 | 0 | { |
8632 | | /* reset the chroma residual bits */ |
8633 | 0 | ps_best_cu_prms->u4_cu_chroma_res_bits = 0; |
8634 | 0 | } |
8635 | |
|
8636 | 0 | if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) || |
8637 | 0 | (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)) |
8638 | 0 | { |
8639 | | /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as |
8640 | | the quantized coefficients might be changed. |
8641 | | We are copying only those states which correspond to the header from the cabac state |
8642 | | of the previous CU, because the header is going to be recomputed for this condition*/ |
8643 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1; |
8644 | 0 | memcpy( |
8645 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
8646 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0], |
8647 | 0 | IHEVC_CAB_COEFFX_PREFIX); |
8648 | |
|
8649 | 0 | if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)) |
8650 | 0 | { |
8651 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
8652 | 0 | (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX), |
8653 | 0 | (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] + |
8654 | 0 | IHEVC_CAB_COEFFX_PREFIX), |
8655 | 0 | (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX)); |
8656 | 0 | } |
8657 | 0 | else |
8658 | 0 | { |
8659 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
8660 | 0 | (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX), |
8661 | 0 | (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
8662 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
8663 | 0 | IHEVC_CAB_COEFFX_PREFIX), |
8664 | 0 | (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX)); |
8665 | 0 | } |
8666 | 0 | ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx; |
8667 | 0 | } |
8668 | 0 | else |
8669 | 0 | { |
8670 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0; |
8671 | 0 | } |
8672 | | |
8673 | | /* Zero cbf tool is disabled for intra CUs */ |
8674 | 0 | if(PRED_MODE_INTRA == packed_pred_mode) |
8675 | 0 | { |
8676 | | #if ENABLE_ZERO_CBF_IN_INTRA |
8677 | | ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; |
8678 | | #else |
8679 | 0 | ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; |
8680 | 0 | #endif |
8681 | 0 | } |
8682 | 0 | else |
8683 | 0 | { |
8684 | | #if DISABLE_ZERO_ZBF_IN_INTER |
8685 | | ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF; |
8686 | | #else |
8687 | 0 | ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE; |
8688 | 0 | #endif |
8689 | 0 | } |
8690 | | |
8691 | | /** Loop for all tu blocks in current cu and do reconstruction **/ |
8692 | 0 | for(ctr = 0; ctr < num_tu_in_cu; ctr++) |
8693 | 0 | { |
8694 | 0 | tu_t *ps_tu; |
8695 | 0 | WORD32 trans_size, num_4x4_in_tu; |
8696 | 0 | WORD32 cbf, zero_rows, zero_cols; |
8697 | 0 | WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4; |
8698 | 0 | WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix; |
8699 | 0 | WORD32 luma_pred_mode, chroma_pred_mode = 0; |
8700 | 0 | UWORD8 au1_is_recon_available[2]; |
8701 | |
|
8702 | 0 | ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */ |
8703 | |
|
8704 | 0 | u1_compute_spatial_ssd_luma = 0; |
8705 | 0 | u1_compute_spatial_ssd_chroma = 0; |
8706 | |
|
8707 | 0 | trans_size = 1 << (ps_tu->b3_size + 2); |
8708 | 0 | num_4x4_in_tu = (trans_size >> 2); |
8709 | 0 | cu_pos_x_in_4x4 = ps_tu->b4_pos_x; |
8710 | 0 | cu_pos_y_in_4x4 = ps_tu->b4_pos_y; |
8711 | | |
8712 | | /* populate the coeffs scan idx */ |
8713 | 0 | ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
8714 | | |
8715 | | /* get the current pos x and pos y in pixels */ |
8716 | 0 | cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3); |
8717 | 0 | cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3); |
8718 | | |
8719 | | /* Update pointers based on the location */ |
8720 | 0 | pu1_cur_src = pu1_src + cu_pos_x_in_pix; |
8721 | 0 | pu1_cur_src += (cu_pos_y_in_pix * src_strd); |
8722 | 0 | pu1_cur_pred = pu1_pred + cu_pos_x_in_pix; |
8723 | 0 | pu1_cur_pred += (cu_pos_y_in_pix * pred_strd); |
8724 | |
|
8725 | 0 | pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix; |
8726 | 0 | pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd); |
8727 | |
|
8728 | 0 | pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix; |
8729 | 0 | pi2_cur_deq_data += cu_pos_y_in_pix * cu_size; |
8730 | |
|
8731 | 0 | pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix; |
8732 | 0 | pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) + |
8733 | 0 | (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd)); |
8734 | |
|
8735 | 0 | pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix; |
8736 | 0 | pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) + |
8737 | 0 | (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd)); |
8738 | |
|
8739 | 0 | pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix; |
8740 | 0 | pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) + |
8741 | 0 | (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd)); |
8742 | |
|
8743 | 0 | pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix; |
8744 | 0 | pi2_cur_deq_data_chrm += |
8745 | 0 | ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size)); |
8746 | | |
8747 | | /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/ |
8748 | 0 | chrm_present_flag = 1; /* by default chroma present is set to 1*/ |
8749 | |
|
8750 | 0 | if(4 == trans_size) |
8751 | 0 | { |
8752 | | /* if tusize is 4x4 then only first luma 4x4 will have chroma*/ |
8753 | 0 | if(0 != chrm_ctr) |
8754 | 0 | { |
8755 | 0 | chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE; |
8756 | 0 | } |
8757 | | |
8758 | | /* increment the chrm ctr unconditionally */ |
8759 | 0 | chrm_ctr++; |
8760 | | /* after ctr reached 4 reset it */ |
8761 | 0 | if(4 == chrm_ctr) |
8762 | 0 | { |
8763 | 0 | chrm_ctr = 0; |
8764 | 0 | } |
8765 | 0 | } |
8766 | | |
8767 | | /**------------- Compute pred data if required --------------**/ |
8768 | 0 | if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */ |
8769 | 0 | { |
8770 | | /* Get the pred mode for scan idx calculation, even if pred is not required */ |
8771 | 0 | luma_pred_mode = *pu1_intra_pred_mode; |
8772 | |
|
8773 | 0 | if((ps_ctxt->i4_rc_pass == 1) || |
8774 | 0 | (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)) |
8775 | 0 | { |
8776 | 0 | WORD32 nbr_flags; |
8777 | 0 | WORD32 luma_pred_func_idx; |
8778 | 0 | UWORD8 *pu1_left; |
8779 | 0 | UWORD8 *pu1_top; |
8780 | 0 | UWORD8 *pu1_top_left; |
8781 | 0 | WORD32 left_strd; |
8782 | | |
8783 | | /* left cu boundary */ |
8784 | 0 | if(0 == cu_pos_x_in_pix) |
8785 | 0 | { |
8786 | 0 | left_strd = ps_cu_nbr_prms->cu_left_stride; |
8787 | 0 | pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd; |
8788 | 0 | } |
8789 | 0 | else |
8790 | 0 | { |
8791 | 0 | pu1_left = pu1_cur_luma_recon - 1; |
8792 | 0 | left_strd = recon_luma_strd; |
8793 | 0 | } |
8794 | | |
8795 | | /* top cu boundary */ |
8796 | 0 | if(0 == cu_pos_y_in_pix) |
8797 | 0 | { |
8798 | 0 | pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix; |
8799 | 0 | } |
8800 | 0 | else |
8801 | 0 | { |
8802 | 0 | pu1_top = pu1_cur_luma_recon - recon_luma_strd; |
8803 | 0 | } |
8804 | | |
8805 | | /* by default top left is set to cu top left */ |
8806 | 0 | pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left; |
8807 | | |
8808 | | /* top left based on position */ |
8809 | 0 | if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix)) |
8810 | 0 | { |
8811 | 0 | pu1_top_left = pu1_left - left_strd; |
8812 | 0 | } |
8813 | 0 | else if(0 != cu_pos_x_in_pix) |
8814 | 0 | { |
8815 | 0 | pu1_top_left = pu1_top - 1; |
8816 | 0 | } |
8817 | | |
8818 | | /* get the neighbour availability flags */ |
8819 | 0 | nbr_flags = ihevce_get_nbr_intra( |
8820 | 0 | &s_nbr, |
8821 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
8822 | 0 | ps_ctxt->i4_nbr_map_strd, |
8823 | 0 | cu_pos_x_in_4x4, |
8824 | 0 | cu_pos_y_in_4x4, |
8825 | 0 | num_4x4_in_tu); |
8826 | |
|
8827 | 0 | if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data) |
8828 | 0 | { |
8829 | | /* copy the nbr flags for chroma reuse */ |
8830 | 0 | if(4 != trans_size) |
8831 | 0 | { |
8832 | 0 | *pu4_nbr_flags = nbr_flags; |
8833 | 0 | } |
8834 | 0 | else if(1 == chrm_present_flag) |
8835 | 0 | { |
8836 | | /* compute the avail flags assuming luma trans is 8x8 */ |
8837 | | /* get the neighbour availability flags */ |
8838 | 0 | *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
8839 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
8840 | 0 | ps_ctxt->i4_nbr_map_strd, |
8841 | 0 | cu_pos_x_in_4x4, |
8842 | 0 | cu_pos_y_in_4x4, |
8843 | 0 | (num_4x4_in_tu << 1), |
8844 | 0 | (num_4x4_in_tu << 1)); |
8845 | 0 | } |
8846 | | |
8847 | | /* call reference array substitution */ |
8848 | 0 | ihevc_intra_pred_luma_ref_substitution_fptr( |
8849 | 0 | pu1_top_left, |
8850 | 0 | pu1_top, |
8851 | 0 | pu1_left, |
8852 | 0 | left_strd, |
8853 | 0 | trans_size, |
8854 | 0 | nbr_flags, |
8855 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
8856 | 0 | 1); |
8857 | | |
8858 | | /* call reference filtering */ |
8859 | 0 | ihevc_intra_pred_ref_filtering_fptr( |
8860 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
8861 | 0 | trans_size, |
8862 | 0 | (UWORD8 *)ps_ctxt->pv_ref_filt_out, |
8863 | 0 | luma_pred_mode, |
8864 | 0 | ps_ctxt->i1_strong_intra_smoothing_enable_flag); |
8865 | | |
8866 | | /* use the look up to get the function idx */ |
8867 | 0 | luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode]; |
8868 | | |
8869 | | /* call the intra prediction function */ |
8870 | 0 | ps_ctxt->apf_lum_ip[luma_pred_func_idx]( |
8871 | 0 | (UWORD8 *)ps_ctxt->pv_ref_filt_out, |
8872 | 0 | 1, |
8873 | 0 | pu1_cur_pred, |
8874 | 0 | pred_strd, |
8875 | 0 | trans_size, |
8876 | 0 | luma_pred_mode); |
8877 | 0 | } |
8878 | 0 | } |
8879 | 0 | else if( |
8880 | 0 | (1 == chrm_present_flag) && |
8881 | 0 | (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)) |
8882 | 0 | { |
8883 | 0 | WORD32 temp_num_4x4_in_tu = num_4x4_in_tu; |
8884 | |
|
8885 | 0 | if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */ |
8886 | 0 | { |
8887 | 0 | temp_num_4x4_in_tu = num_4x4_in_tu << 1; |
8888 | 0 | } |
8889 | |
|
8890 | 0 | *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu( |
8891 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
8892 | 0 | ps_ctxt->i4_nbr_map_strd, |
8893 | 0 | cu_pos_x_in_4x4, |
8894 | 0 | cu_pos_y_in_4x4, |
8895 | 0 | temp_num_4x4_in_tu, |
8896 | 0 | temp_num_4x4_in_tu); |
8897 | 0 | } |
8898 | | |
8899 | | /* Get the pred mode for scan idx calculation, even if pred is not required */ |
8900 | 0 | chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode; |
8901 | 0 | } |
8902 | |
|
8903 | 0 | if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) |
8904 | 0 | { |
8905 | 0 | WORD32 temp_bits; |
8906 | 0 | LWORD64 temp_cost; |
8907 | 0 | UWORD32 u4_tu_sad; |
8908 | 0 | WORD32 perform_sbh, perform_rdoq; |
8909 | |
|
8910 | 0 | if(PRED_MODE_INTRA == packed_pred_mode) |
8911 | 0 | { |
8912 | | /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/ |
8913 | 0 | if(trans_size < 16) |
8914 | 0 | { |
8915 | | /* for modes from 22 upto 30 horizontal scan is used */ |
8916 | 0 | if((luma_pred_mode > 21) && (luma_pred_mode < 31)) |
8917 | 0 | { |
8918 | 0 | ps_ctxt->i4_scan_idx = SCAN_HORZ; |
8919 | 0 | } |
8920 | | /* for modes from 6 upto 14 horizontal scan is used */ |
8921 | 0 | else if((luma_pred_mode > 5) && (luma_pred_mode < 15)) |
8922 | 0 | { |
8923 | 0 | ps_ctxt->i4_scan_idx = SCAN_VERT; |
8924 | 0 | } |
8925 | 0 | } |
8926 | 0 | } |
8927 | | |
8928 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
8929 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
8930 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
8931 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
8932 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
8933 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
8934 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
8935 | |
|
8936 | 0 | if(ps_prms->u1_recompute_sbh_and_rdoq) |
8937 | 0 | { |
8938 | 0 | perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH); |
8939 | 0 | perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ); |
8940 | 0 | } |
8941 | 0 | else |
8942 | 0 | { |
8943 | | /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/ |
8944 | 0 | perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh; |
8945 | | /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean |
8946 | | we would have to do RDOQ again.*/ |
8947 | 0 | perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq; |
8948 | 0 | } |
8949 | |
|
8950 | | #if DISABLE_RDOQ_INTRA |
8951 | | if(PRED_MODE_INTRA == packed_pred_mode) |
8952 | | { |
8953 | | perform_rdoq = 0; |
8954 | | } |
8955 | | #endif |
8956 | | /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled |
8957 | | so that all candidates and best candidate are quantized with same rounding factor */ |
8958 | 0 | if(1 == perform_rdoq) |
8959 | 0 | { |
8960 | 0 | ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING); |
8961 | 0 | } |
8962 | | |
8963 | 0 | cbf = ihevce_t_q_iq_ssd_scan_fxn( |
8964 | 0 | ps_ctxt, |
8965 | 0 | pu1_cur_pred, |
8966 | 0 | pred_strd, |
8967 | 0 | pu1_cur_src, |
8968 | 0 | src_strd, |
8969 | 0 | pi2_cur_deq_data, |
8970 | 0 | cu_size, /*deq_data stride is cu_size*/ |
8971 | 0 | pu1_cur_luma_recon, |
8972 | 0 | recon_luma_strd, |
8973 | 0 | pu1_final_ecd_data, |
8974 | 0 | pu1_csbf_buf, |
8975 | 0 | csbf_strd, |
8976 | 0 | trans_size, |
8977 | 0 | packed_pred_mode, |
8978 | 0 | &temp_cost, |
8979 | 0 | &num_bytes, |
8980 | 0 | &temp_bits, |
8981 | 0 | &u4_tu_sad, |
8982 | 0 | &zero_cols, |
8983 | 0 | &zero_rows, |
8984 | 0 | &au1_is_recon_available[0], |
8985 | 0 | perform_rdoq, //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level), |
8986 | 0 | perform_sbh, |
8987 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
8988 | 0 | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
8989 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
8990 | 0 | (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
8991 | 0 | 100.0, |
8992 | 0 | ps_prms->u1_is_cu_noisy, |
8993 | 0 | #endif |
8994 | 0 | u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD, |
8995 | 0 | 1 /*early cbf*/ |
8996 | 0 | ); //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level)); |
8997 | | |
8998 | | /* Accumulate luma residual bits */ |
8999 | 0 | ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits; |
9000 | | |
9001 | | /* RDOPT copy States : New updated after curr TU to TU init */ |
9002 | 0 | if(0 != cbf) |
9003 | 0 | { |
9004 | | /* update to new state only if CBF is non zero */ |
9005 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
9006 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
9007 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
9008 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
9009 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
9010 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
9011 | 0 | } |
9012 | | |
9013 | | /* accumulate the TU sad into cu sad */ |
9014 | 0 | ps_best_cu_prms->u4_cu_sad += u4_tu_sad; |
9015 | 0 | ps_tu->b1_y_cbf = cbf; |
9016 | 0 | ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes; |
9017 | | |
9018 | | /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */ |
9019 | 0 | if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass)) |
9020 | 0 | { |
9021 | 0 | WORD32 num_4x4_in_cu = u1_cu_size >> 2; |
9022 | 0 | nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0]; |
9023 | 0 | ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2)); |
9024 | 0 | ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu); |
9025 | | /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */ |
9026 | 0 | ps_cur_nbr_4x4->b1_y_cbf = cbf; |
9027 | | /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/ |
9028 | 0 | ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp; |
9029 | | /* Qp and cbf are stored for the all 4x4 in TU */ |
9030 | 0 | { |
9031 | 0 | WORD32 i, j; |
9032 | 0 | nbr_4x4_t *ps_tmp_4x4; |
9033 | 0 | ps_tmp_4x4 = ps_cur_nbr_4x4; |
9034 | |
|
9035 | 0 | for(i = 0; i < num_4x4_in_tu; i++) |
9036 | 0 | { |
9037 | 0 | for(j = 0; j < num_4x4_in_tu; j++) |
9038 | 0 | { |
9039 | 0 | ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp; |
9040 | 0 | ps_tmp_4x4[j].b1_y_cbf = cbf; |
9041 | 0 | } |
9042 | | /* row level update*/ |
9043 | 0 | ps_tmp_4x4 += num_4x4_in_cu; |
9044 | 0 | } |
9045 | 0 | } |
9046 | 0 | } |
9047 | 0 | } |
9048 | 0 | else |
9049 | 0 | { |
9050 | 0 | zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col; |
9051 | 0 | zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row; |
9052 | |
|
9053 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9054 | 0 | { |
9055 | 0 | num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed; |
9056 | 0 | } |
9057 | 0 | else |
9058 | 0 | { |
9059 | 0 | num_bytes = 0; |
9060 | 0 | } |
9061 | | |
9062 | | /* copy luma ecd data to final buffer */ |
9063 | 0 | memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes); |
9064 | |
|
9065 | 0 | pu1_old_ecd_data += num_bytes; |
9066 | |
|
9067 | 0 | au1_is_recon_available[0] = 0; |
9068 | 0 | } |
9069 | | |
9070 | | /**-------- Compute Recon data (Do IT & Recon) : Luma -----------**/ |
9071 | 0 | if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
9072 | 0 | (!u1_compute_spatial_ssd_luma || |
9073 | 0 | (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma))) |
9074 | 0 | { |
9075 | 0 | if(!ps_recon_datastore->u1_is_lumaRecon_available || |
9076 | 0 | (ps_recon_datastore->u1_is_lumaRecon_available && |
9077 | 0 | (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))) |
9078 | 0 | { |
9079 | 0 | ihevce_it_recon_fxn( |
9080 | 0 | ps_ctxt, |
9081 | 0 | pi2_cur_deq_data, |
9082 | 0 | cu_size, |
9083 | 0 | pu1_cur_pred, |
9084 | 0 | pred_strd, |
9085 | 0 | pu1_cur_luma_recon, |
9086 | 0 | recon_luma_strd, |
9087 | 0 | pu1_final_ecd_data, |
9088 | 0 | trans_size, |
9089 | 0 | packed_pred_mode, |
9090 | 0 | ps_tu->b1_y_cbf, |
9091 | 0 | zero_cols, |
9092 | 0 | zero_rows); |
9093 | 0 | } |
9094 | 0 | else if( |
9095 | 0 | ps_recon_datastore->u1_is_lumaRecon_available && |
9096 | 0 | (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])) |
9097 | 0 | { |
9098 | 0 | UWORD8 *pu1_recon_src = |
9099 | 0 | ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs |
9100 | 0 | [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) + |
9101 | 0 | cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride; |
9102 | |
|
9103 | 0 | ps_ctxt->s_cmn_opt_func.pf_copy_2d( |
9104 | 0 | pu1_cur_luma_recon, |
9105 | 0 | recon_luma_strd, |
9106 | 0 | pu1_recon_src, |
9107 | 0 | ps_recon_datastore->i4_lumaRecon_stride, |
9108 | 0 | trans_size, |
9109 | 0 | trans_size); |
9110 | 0 | } |
9111 | 0 | } |
9112 | |
|
9113 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9114 | 0 | { |
9115 | 0 | ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes; |
9116 | 0 | } |
9117 | |
|
9118 | 0 | pu1_final_ecd_data += num_bytes; |
9119 | | /* update total bytes consumed */ |
9120 | 0 | total_bytes += num_bytes; |
9121 | |
|
9122 | 0 | u1_is_cu_coded |= ps_tu->b1_y_cbf; |
9123 | | |
9124 | | /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/ |
9125 | 0 | if(1 == chrm_present_flag) |
9126 | 0 | { |
9127 | 0 | pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix; |
9128 | 0 | pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) + |
9129 | 0 | (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd)); |
9130 | |
|
9131 | 0 | pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix; |
9132 | 0 | pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) + |
9133 | 0 | (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd)); |
9134 | |
|
9135 | 0 | pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix; |
9136 | 0 | pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) + |
9137 | 0 | (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd)); |
9138 | |
|
9139 | 0 | pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix; |
9140 | 0 | pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) + |
9141 | 0 | (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size)); |
9142 | |
|
9143 | 0 | if(INCLUDE_CHROMA_DURING_TU_RECURSION && |
9144 | 0 | (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) && |
9145 | 0 | (PRED_MODE_INTRA != packed_pred_mode)) |
9146 | 0 | { |
9147 | 0 | WORD32 i4_num_bytes; |
9148 | 0 | UWORD8 *pu1_chroma_pred; |
9149 | 0 | UWORD8 *pu1_chroma_recon; |
9150 | 0 | WORD16 *pi2_chroma_deq; |
9151 | 0 | UWORD32 u4_zero_col; |
9152 | 0 | UWORD32 u4_zero_row; |
9153 | |
|
9154 | 0 | for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) |
9155 | 0 | { |
9156 | 0 | WORD32 chroma_trans_size = MAX(4, trans_size >> 1); |
9157 | 0 | WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; |
9158 | 0 | WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); |
9159 | |
|
9160 | 0 | if(0 == u1_is_422) |
9161 | 0 | { |
9162 | 0 | i4_subtu_pos_y >>= 1; |
9163 | 0 | } |
9164 | |
|
9165 | 0 | pu1_chroma_pred = |
9166 | 0 | pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); |
9167 | 0 | pu1_chroma_recon = pu1_cur_chroma_recon + |
9168 | 0 | (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); |
9169 | 0 | pi2_chroma_deq = |
9170 | 0 | pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size); |
9171 | |
|
9172 | 0 | u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx]; |
9173 | 0 | u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx]; |
9174 | |
|
9175 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9176 | 0 | { |
9177 | 0 | i4_num_bytes = |
9178 | 0 | ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx]; |
9179 | 0 | } |
9180 | 0 | else |
9181 | 0 | { |
9182 | 0 | i4_num_bytes = 0; |
9183 | 0 | } |
9184 | |
|
9185 | 0 | memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes); |
9186 | |
|
9187 | 0 | pu1_old_ecd_data += i4_num_bytes; |
9188 | |
|
9189 | 0 | au1_is_recon_available[U_PLANE] = 0; |
9190 | |
|
9191 | 0 | if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
9192 | 0 | (!u1_compute_spatial_ssd_chroma || |
9193 | 0 | (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma))) |
9194 | 0 | { |
9195 | 0 | if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || |
9196 | 0 | (ps_recon_datastore->au1_is_chromaRecon_available[0] && |
9197 | 0 | (UCHAR_MAX == |
9198 | 0 | ps_recon_datastore |
9199 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))) |
9200 | 0 | { |
9201 | 0 | ihevce_chroma_it_recon_fxn( |
9202 | 0 | ps_ctxt, |
9203 | 0 | pi2_chroma_deq, |
9204 | 0 | cu_size, |
9205 | 0 | pu1_chroma_pred, |
9206 | 0 | pred_chrm_strd, |
9207 | 0 | pu1_chroma_recon, |
9208 | 0 | recon_chrma_strd, |
9209 | 0 | pu1_final_ecd_data, |
9210 | 0 | chroma_trans_size, |
9211 | 0 | (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1, |
9212 | 0 | u4_zero_col, |
9213 | 0 | u4_zero_row, |
9214 | 0 | U_PLANE); |
9215 | 0 | } |
9216 | 0 | else if( |
9217 | 0 | ps_recon_datastore->au1_is_chromaRecon_available[0] && |
9218 | 0 | (UCHAR_MAX != |
9219 | 0 | ps_recon_datastore |
9220 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])) |
9221 | 0 | { |
9222 | 0 | UWORD8 *pu1_recon_src = |
9223 | 0 | ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs |
9224 | 0 | [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon |
9225 | 0 | [U_PLANE][ctr][i4_subtu_idx]]) + |
9226 | 0 | i4_subtu_pos_x + |
9227 | 0 | i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; |
9228 | |
|
9229 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
9230 | 0 | pu1_recon_src, |
9231 | 0 | ps_recon_datastore->i4_lumaRecon_stride, |
9232 | 0 | pu1_chroma_recon, |
9233 | 0 | recon_chrma_strd, |
9234 | 0 | chroma_trans_size, |
9235 | 0 | chroma_trans_size, |
9236 | 0 | U_PLANE); |
9237 | 0 | } |
9238 | 0 | } |
9239 | |
|
9240 | 0 | u1_is_cu_coded |= |
9241 | 0 | ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf); |
9242 | |
|
9243 | 0 | pu1_final_ecd_data += i4_num_bytes; |
9244 | 0 | total_bytes += i4_num_bytes; |
9245 | 0 | } |
9246 | |
|
9247 | 0 | for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) |
9248 | 0 | { |
9249 | 0 | WORD32 chroma_trans_size = MAX(4, trans_size >> 1); |
9250 | 0 | WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; |
9251 | 0 | WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); |
9252 | |
|
9253 | 0 | if(0 == u1_is_422) |
9254 | 0 | { |
9255 | 0 | i4_subtu_pos_y >>= 1; |
9256 | 0 | } |
9257 | |
|
9258 | 0 | pu1_chroma_pred = |
9259 | 0 | pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); |
9260 | 0 | pu1_chroma_recon = pu1_cur_chroma_recon + |
9261 | 0 | (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); |
9262 | 0 | pi2_chroma_deq = pi2_cur_deq_data_chrm + |
9263 | 0 | (i4_subtu_idx * chroma_trans_size * cu_size) + |
9264 | 0 | chroma_trans_size; |
9265 | |
|
9266 | 0 | u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx]; |
9267 | 0 | u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx]; |
9268 | |
|
9269 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9270 | 0 | { |
9271 | 0 | i4_num_bytes = |
9272 | 0 | ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx]; |
9273 | 0 | } |
9274 | 0 | else |
9275 | 0 | { |
9276 | 0 | i4_num_bytes = 0; |
9277 | 0 | } |
9278 | |
|
9279 | 0 | memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes); |
9280 | |
|
9281 | 0 | pu1_old_ecd_data += i4_num_bytes; |
9282 | |
|
9283 | 0 | au1_is_recon_available[V_PLANE] = 0; |
9284 | |
|
9285 | 0 | if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
9286 | 0 | (!u1_compute_spatial_ssd_chroma || |
9287 | 0 | (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma))) |
9288 | 0 | { |
9289 | 0 | if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || |
9290 | 0 | (ps_recon_datastore->au1_is_chromaRecon_available[0] && |
9291 | 0 | (UCHAR_MAX == |
9292 | 0 | ps_recon_datastore |
9293 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))) |
9294 | 0 | { |
9295 | 0 | ihevce_chroma_it_recon_fxn( |
9296 | 0 | ps_ctxt, |
9297 | 0 | pi2_chroma_deq, |
9298 | 0 | cu_size, |
9299 | 0 | pu1_chroma_pred, |
9300 | 0 | pred_chrm_strd, |
9301 | 0 | pu1_chroma_recon, |
9302 | 0 | recon_chrma_strd, |
9303 | 0 | pu1_final_ecd_data, |
9304 | 0 | chroma_trans_size, |
9305 | 0 | (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1, |
9306 | 0 | u4_zero_col, |
9307 | 0 | u4_zero_row, |
9308 | 0 | V_PLANE); |
9309 | 0 | } |
9310 | 0 | else if( |
9311 | 0 | ps_recon_datastore->au1_is_chromaRecon_available[0] && |
9312 | 0 | (UCHAR_MAX != |
9313 | 0 | ps_recon_datastore |
9314 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])) |
9315 | 0 | { |
9316 | 0 | UWORD8 *pu1_recon_src = |
9317 | 0 | ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs |
9318 | 0 | [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon |
9319 | 0 | [V_PLANE][ctr][i4_subtu_idx]]) + |
9320 | 0 | i4_subtu_pos_x + |
9321 | 0 | i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; |
9322 | |
|
9323 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
9324 | 0 | pu1_recon_src, |
9325 | 0 | ps_recon_datastore->i4_lumaRecon_stride, |
9326 | 0 | pu1_chroma_recon, |
9327 | 0 | recon_chrma_strd, |
9328 | 0 | chroma_trans_size, |
9329 | 0 | chroma_trans_size, |
9330 | 0 | V_PLANE); |
9331 | 0 | } |
9332 | 0 | } |
9333 | |
|
9334 | 0 | u1_is_cu_coded |= |
9335 | 0 | ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf); |
9336 | |
|
9337 | 0 | pu1_final_ecd_data += i4_num_bytes; |
9338 | 0 | total_bytes += i4_num_bytes; |
9339 | 0 | } |
9340 | 0 | } |
9341 | 0 | else |
9342 | 0 | { |
9343 | 0 | WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row; |
9344 | |
|
9345 | 0 | for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++) |
9346 | 0 | { |
9347 | 0 | WORD32 cb_cbf, cr_cbf; |
9348 | 0 | WORD32 cb_num_bytes, cr_num_bytes; |
9349 | |
|
9350 | 0 | WORD32 chroma_trans_size = MAX(4, trans_size >> 1); |
9351 | |
|
9352 | 0 | WORD32 i4_subtu_pos_x = cu_pos_x_in_pix; |
9353 | 0 | WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size); |
9354 | |
|
9355 | 0 | if(0 == u1_is_422) |
9356 | 0 | { |
9357 | 0 | i4_subtu_pos_y >>= 1; |
9358 | 0 | } |
9359 | |
|
9360 | 0 | pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd); |
9361 | 0 | pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd); |
9362 | 0 | pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd); |
9363 | 0 | pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size); |
9364 | |
|
9365 | 0 | if((PRED_MODE_INTRA == packed_pred_mode) && |
9366 | 0 | (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)) |
9367 | 0 | { |
9368 | 0 | WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx; |
9369 | 0 | UWORD8 *pu1_left_chrm; |
9370 | 0 | UWORD8 *pu1_top_chrm; |
9371 | 0 | UWORD8 *pu1_top_left_chrm; |
9372 | |
|
9373 | 0 | nbr_flags = ihevce_get_intra_chroma_tu_nbr( |
9374 | 0 | *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422); |
9375 | | |
9376 | | /* left cu boundary */ |
9377 | 0 | if(0 == i4_subtu_pos_x) |
9378 | 0 | { |
9379 | 0 | left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride; |
9380 | 0 | pu1_left_chrm = |
9381 | 0 | ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm; |
9382 | 0 | } |
9383 | 0 | else |
9384 | 0 | { |
9385 | 0 | pu1_left_chrm = pu1_cur_chroma_recon - 2; |
9386 | 0 | left_strd_chrm = recon_chrma_strd; |
9387 | 0 | } |
9388 | | |
9389 | | /* top cu boundary */ |
9390 | 0 | if(0 == i4_subtu_pos_y) |
9391 | 0 | { |
9392 | 0 | pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x; |
9393 | 0 | } |
9394 | 0 | else |
9395 | 0 | { |
9396 | 0 | pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd; |
9397 | 0 | } |
9398 | | |
9399 | | /* by default top left is set to cu top left */ |
9400 | 0 | pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left; |
9401 | | |
9402 | | /* top left based on position */ |
9403 | 0 | if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x)) |
9404 | 0 | { |
9405 | 0 | pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm; |
9406 | 0 | } |
9407 | 0 | else if(0 != i4_subtu_pos_x) |
9408 | 0 | { |
9409 | 0 | pu1_top_left_chrm = pu1_top_chrm - 2; |
9410 | 0 | } |
9411 | | |
9412 | | /* call the chroma reference array substitution */ |
9413 | 0 | ihevc_intra_pred_chroma_ref_substitution_fptr( |
9414 | 0 | pu1_top_left_chrm, |
9415 | 0 | pu1_top_chrm, |
9416 | 0 | pu1_left_chrm, |
9417 | 0 | left_strd_chrm, |
9418 | 0 | chroma_trans_size, |
9419 | 0 | nbr_flags, |
9420 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
9421 | 0 | 1); |
9422 | | |
9423 | | /* use the look up to get the function idx */ |
9424 | 0 | chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode]; |
9425 | | |
9426 | | /* call the intra prediction function */ |
9427 | 0 | ps_ctxt->apf_chrm_ip[chrm_pred_func_idx]( |
9428 | 0 | (UWORD8 *)ps_ctxt->pv_ref_sub_out, |
9429 | 0 | 1, |
9430 | 0 | pu1_cur_pred_chrm, |
9431 | 0 | pred_chrm_strd, |
9432 | 0 | chroma_trans_size, |
9433 | 0 | chroma_pred_mode); |
9434 | 0 | } |
9435 | | |
9436 | | /**---------- Compute iq&coeff data if required : Chroma ------------**/ |
9437 | 0 | if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data) |
9438 | 0 | { |
9439 | 0 | WORD32 perform_sbh, perform_rdoq, temp_bits; |
9440 | |
|
9441 | 0 | if(ps_prms->u1_recompute_sbh_and_rdoq) |
9442 | 0 | { |
9443 | 0 | perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH); |
9444 | 0 | perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ); |
9445 | 0 | } |
9446 | 0 | else |
9447 | 0 | { |
9448 | | /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/ |
9449 | 0 | perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh; |
9450 | | /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean |
9451 | | we would have to do RDOQ again.*/ |
9452 | 0 | perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq; |
9453 | 0 | } |
9454 | | |
9455 | | /* populate the coeffs scan idx */ |
9456 | 0 | ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT; |
9457 | |
|
9458 | 0 | if(PRED_MODE_INTRA == packed_pred_mode) |
9459 | 0 | { |
9460 | | /* for 4x4 transforms based on intra pred mode scan is choosen*/ |
9461 | 0 | if(4 == chroma_trans_size) |
9462 | 0 | { |
9463 | | /* for modes from 22 upto 30 horizontal scan is used */ |
9464 | 0 | if((chroma_pred_mode > 21) && (chroma_pred_mode < 31)) |
9465 | 0 | { |
9466 | 0 | ps_ctxt->i4_scan_idx = SCAN_HORZ; |
9467 | 0 | } |
9468 | | /* for modes from 6 upto 14 horizontal scan is used */ |
9469 | 0 | else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15)) |
9470 | 0 | { |
9471 | 0 | ps_ctxt->i4_scan_idx = SCAN_VERT; |
9472 | 0 | } |
9473 | 0 | } |
9474 | 0 | } |
9475 | |
|
9476 | | #if DISABLE_RDOQ_INTRA |
9477 | | if(PRED_MODE_INTRA == packed_pred_mode) |
9478 | | { |
9479 | | perform_rdoq = 0; |
9480 | | } |
9481 | | #endif |
9482 | | |
9483 | | /* RDOPT copy States : TU init (best until prev TU) to current */ |
9484 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
9485 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
9486 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
9487 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
9488 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
9489 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
9490 | |
|
9491 | 0 | ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx); |
9492 | | /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled |
9493 | | so that all candidates and best candidate are quantized with same rounding factor */ |
9494 | 0 | if(1 == perform_rdoq) |
9495 | 0 | { |
9496 | 0 | ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING); |
9497 | 0 | } |
9498 | | |
9499 | 0 | if(!ps_best_cu_prms->u1_skip_flag || |
9500 | 0 | !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) |
9501 | 0 | { |
9502 | | /* Cb */ |
9503 | 0 | cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
9504 | 0 | ps_ctxt, |
9505 | 0 | pu1_cur_pred_chrm, |
9506 | 0 | pred_chrm_strd, |
9507 | 0 | pu1_cur_src_chrm, |
9508 | 0 | src_chrm_strd, |
9509 | 0 | pi2_cur_deq_data_chrm, |
9510 | 0 | cu_size, |
9511 | 0 | pu1_chrm_recon, |
9512 | 0 | recon_chrma_strd, |
9513 | 0 | pu1_final_ecd_data, |
9514 | 0 | pu1_csbf_buf, |
9515 | 0 | csbf_strd, |
9516 | 0 | chroma_trans_size, |
9517 | 0 | ps_ctxt->i4_scan_idx, |
9518 | 0 | (PRED_MODE_INTRA == packed_pred_mode), |
9519 | 0 | &cb_num_bytes, |
9520 | 0 | &temp_bits, |
9521 | 0 | &cb_zero_col, |
9522 | 0 | &cb_zero_row, |
9523 | 0 | &au1_is_recon_available[U_PLANE], |
9524 | 0 | perform_sbh, |
9525 | 0 | perform_rdoq, |
9526 | 0 | &i8_ssd, |
9527 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
9528 | 0 | !ps_ctxt->u1_is_refPic |
9529 | 0 | ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
9530 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
9531 | 0 | (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
9532 | 0 | 100.0, |
9533 | 0 | ps_prms->u1_is_cu_noisy, |
9534 | 0 | #endif |
9535 | 0 | ps_best_cu_prms->u1_skip_flag && |
9536 | 0 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt, |
9537 | 0 | u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD |
9538 | 0 | : FREQUENCY_DOMAIN_SSD, |
9539 | 0 | U_PLANE); |
9540 | 0 | } |
9541 | 0 | else |
9542 | 0 | { |
9543 | 0 | cb_cbf = 0; |
9544 | 0 | temp_bits = 0; |
9545 | 0 | cb_num_bytes = 0; |
9546 | 0 | au1_is_recon_available[U_PLANE] = 0; |
9547 | 0 | cb_zero_col = 0; |
9548 | 0 | cb_zero_row = 0; |
9549 | 0 | } |
9550 | | |
9551 | | /* Accumulate chroma residual bits */ |
9552 | 0 | ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits; |
9553 | | |
9554 | | /* RDOPT copy States : New updated after curr TU to TU init */ |
9555 | 0 | if(0 != cb_cbf) |
9556 | 0 | { |
9557 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
9558 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
9559 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
9560 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
9561 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
9562 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
9563 | 0 | } |
9564 | | /* RDOPT copy States : Restoring back the Cb init state to Cr */ |
9565 | 0 | else |
9566 | 0 | { |
9567 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
9568 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
9569 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
9570 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
9571 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
9572 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
9573 | 0 | } |
9574 | |
|
9575 | 0 | if(!ps_best_cu_prms->u1_skip_flag || |
9576 | 0 | !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt) |
9577 | 0 | { |
9578 | | /* Cr */ |
9579 | 0 | cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn( |
9580 | 0 | ps_ctxt, |
9581 | 0 | pu1_cur_pred_chrm, |
9582 | 0 | pred_chrm_strd, |
9583 | 0 | pu1_cur_src_chrm, |
9584 | 0 | src_chrm_strd, |
9585 | 0 | pi2_cur_deq_data_chrm + chroma_trans_size, |
9586 | 0 | cu_size, |
9587 | 0 | pu1_chrm_recon, |
9588 | 0 | recon_chrma_strd, |
9589 | 0 | pu1_final_ecd_data + cb_num_bytes, |
9590 | 0 | pu1_csbf_buf, |
9591 | 0 | csbf_strd, |
9592 | 0 | chroma_trans_size, |
9593 | 0 | ps_ctxt->i4_scan_idx, |
9594 | 0 | (PRED_MODE_INTRA == packed_pred_mode), |
9595 | 0 | &cr_num_bytes, |
9596 | 0 | &temp_bits, |
9597 | 0 | &cr_zero_col, |
9598 | 0 | &cr_zero_row, |
9599 | 0 | &au1_is_recon_available[V_PLANE], |
9600 | 0 | perform_sbh, |
9601 | 0 | perform_rdoq, |
9602 | 0 | &i8_ssd, |
9603 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
9604 | 0 | !ps_ctxt->u1_is_refPic |
9605 | 0 | ? ALPHA_FOR_NOISE_TERM_IN_RDOPT |
9606 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
9607 | 0 | (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) / |
9608 | 0 | 100.0, |
9609 | 0 | ps_prms->u1_is_cu_noisy, |
9610 | 0 | #endif |
9611 | 0 | ps_best_cu_prms->u1_skip_flag && |
9612 | 0 | ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt, |
9613 | 0 | u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD |
9614 | 0 | : FREQUENCY_DOMAIN_SSD, |
9615 | 0 | V_PLANE); |
9616 | 0 | } |
9617 | 0 | else |
9618 | 0 | { |
9619 | 0 | cr_cbf = 0; |
9620 | 0 | temp_bits = 0; |
9621 | 0 | cr_num_bytes = 0; |
9622 | 0 | au1_is_recon_available[V_PLANE] = 0; |
9623 | 0 | cr_zero_col = 0; |
9624 | 0 | cr_zero_row = 0; |
9625 | 0 | } |
9626 | | |
9627 | | /* Accumulate chroma residual bits */ |
9628 | 0 | ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits; |
9629 | | |
9630 | | /* RDOPT copy States : New updated after curr TU to TU init */ |
9631 | 0 | if(0 != cr_cbf) |
9632 | 0 | { |
9633 | 0 | COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX( |
9634 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX, |
9635 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
9636 | 0 | .s_cabac_ctxt.au1_ctxt_models[0] + |
9637 | 0 | IHEVC_CAB_COEFFX_PREFIX, |
9638 | 0 | IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX); |
9639 | 0 | } |
9640 | |
|
9641 | 0 | if(0 == i4_subtu_idx) |
9642 | 0 | { |
9643 | 0 | ps_tu->b1_cb_cbf = cb_cbf; |
9644 | 0 | ps_tu->b1_cr_cbf = cr_cbf; |
9645 | 0 | } |
9646 | 0 | else |
9647 | 0 | { |
9648 | 0 | ps_tu->b1_cb_cbf_subtu1 = cb_cbf; |
9649 | 0 | ps_tu->b1_cr_cbf_subtu1 = cr_cbf; |
9650 | 0 | } |
9651 | 0 | } |
9652 | 0 | else |
9653 | 0 | { |
9654 | 0 | cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx]; |
9655 | 0 | cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx]; |
9656 | 0 | cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx]; |
9657 | 0 | cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx]; |
9658 | |
|
9659 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9660 | 0 | { |
9661 | 0 | cb_num_bytes = |
9662 | 0 | ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx]; |
9663 | 0 | } |
9664 | 0 | else |
9665 | 0 | { |
9666 | 0 | cb_num_bytes = 0; |
9667 | 0 | } |
9668 | |
|
9669 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9670 | 0 | { |
9671 | 0 | cr_num_bytes = |
9672 | 0 | ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx]; |
9673 | 0 | } |
9674 | 0 | else |
9675 | 0 | { |
9676 | 0 | cr_num_bytes = 0; |
9677 | 0 | } |
9678 | | |
9679 | | /* copy cb ecd data to final buffer */ |
9680 | 0 | memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes); |
9681 | |
|
9682 | 0 | pu1_chrm_old_ecd_data += cb_num_bytes; |
9683 | | |
9684 | | /* copy cb ecd data to final buffer */ |
9685 | 0 | memcpy( |
9686 | 0 | (pu1_final_ecd_data + cb_num_bytes), |
9687 | 0 | pu1_chrm_old_ecd_data, |
9688 | 0 | cr_num_bytes); |
9689 | |
|
9690 | 0 | pu1_chrm_old_ecd_data += cr_num_bytes; |
9691 | |
|
9692 | 0 | au1_is_recon_available[U_PLANE] = 0; |
9693 | 0 | au1_is_recon_available[V_PLANE] = 0; |
9694 | 0 | } |
9695 | | |
9696 | | /**-------- Compute Recon data (Do IT & Recon) : Chroma -----------**/ |
9697 | 0 | if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
9698 | 0 | (!u1_compute_spatial_ssd_chroma || |
9699 | 0 | (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma))) |
9700 | 0 | { |
9701 | 0 | if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || |
9702 | 0 | (ps_recon_datastore->au1_is_chromaRecon_available[0] && |
9703 | 0 | (UCHAR_MAX == |
9704 | 0 | ps_recon_datastore |
9705 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))) |
9706 | 0 | { |
9707 | 0 | ihevce_chroma_it_recon_fxn( |
9708 | 0 | ps_ctxt, |
9709 | 0 | pi2_cur_deq_data_chrm, |
9710 | 0 | cu_size, |
9711 | 0 | pu1_cur_pred_chrm, |
9712 | 0 | pred_chrm_strd, |
9713 | 0 | pu1_cur_chroma_recon, |
9714 | 0 | recon_chrma_strd, |
9715 | 0 | pu1_final_ecd_data, |
9716 | 0 | chroma_trans_size, |
9717 | 0 | (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1, |
9718 | 0 | cb_zero_col, |
9719 | 0 | cb_zero_row, |
9720 | 0 | U_PLANE); |
9721 | 0 | } |
9722 | 0 | else if( |
9723 | 0 | ps_recon_datastore->au1_is_chromaRecon_available[0] && |
9724 | 0 | (UCHAR_MAX != |
9725 | 0 | ps_recon_datastore |
9726 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])) |
9727 | 0 | { |
9728 | 0 | UWORD8 *pu1_recon_src = |
9729 | 0 | ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs |
9730 | 0 | [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon |
9731 | 0 | [U_PLANE][ctr][i4_subtu_idx]]) + |
9732 | 0 | i4_subtu_pos_x + |
9733 | 0 | i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; |
9734 | |
|
9735 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
9736 | 0 | pu1_recon_src, |
9737 | 0 | ps_recon_datastore->i4_lumaRecon_stride, |
9738 | 0 | pu1_cur_chroma_recon, |
9739 | 0 | recon_chrma_strd, |
9740 | 0 | chroma_trans_size, |
9741 | 0 | chroma_trans_size, |
9742 | 0 | U_PLANE); |
9743 | 0 | } |
9744 | 0 | } |
9745 | |
|
9746 | 0 | u1_is_cu_coded |= |
9747 | 0 | ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf); |
9748 | |
|
9749 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9750 | 0 | { |
9751 | 0 | ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes; |
9752 | 0 | } |
9753 | |
|
9754 | 0 | pu1_final_ecd_data += cb_num_bytes; |
9755 | | /* update total bytes consumed */ |
9756 | 0 | total_bytes += cb_num_bytes; |
9757 | |
|
9758 | 0 | if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data && |
9759 | 0 | (!u1_compute_spatial_ssd_chroma || |
9760 | 0 | (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma))) |
9761 | 0 | { |
9762 | 0 | if(!ps_recon_datastore->au1_is_chromaRecon_available[0] || |
9763 | 0 | (ps_recon_datastore->au1_is_chromaRecon_available[0] && |
9764 | 0 | (UCHAR_MAX == |
9765 | 0 | ps_recon_datastore |
9766 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))) |
9767 | 0 | { |
9768 | 0 | ihevce_chroma_it_recon_fxn( |
9769 | 0 | ps_ctxt, |
9770 | 0 | pi2_cur_deq_data_chrm + chroma_trans_size, |
9771 | 0 | cu_size, |
9772 | 0 | pu1_cur_pred_chrm, |
9773 | 0 | pred_chrm_strd, |
9774 | 0 | pu1_cur_chroma_recon, |
9775 | 0 | recon_chrma_strd, |
9776 | 0 | pu1_final_ecd_data, |
9777 | 0 | chroma_trans_size, |
9778 | 0 | (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1, |
9779 | 0 | cr_zero_col, |
9780 | 0 | cr_zero_row, |
9781 | 0 | V_PLANE); |
9782 | 0 | } |
9783 | 0 | else if( |
9784 | 0 | ps_recon_datastore->au1_is_chromaRecon_available[0] && |
9785 | 0 | (UCHAR_MAX != |
9786 | 0 | ps_recon_datastore |
9787 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])) |
9788 | 0 | { |
9789 | 0 | UWORD8 *pu1_recon_src = |
9790 | 0 | ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs |
9791 | 0 | [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon |
9792 | 0 | [V_PLANE][ctr][i4_subtu_idx]]) + |
9793 | 0 | i4_subtu_pos_x + |
9794 | 0 | i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride; |
9795 | |
|
9796 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
9797 | 0 | pu1_recon_src, |
9798 | 0 | ps_recon_datastore->i4_lumaRecon_stride, |
9799 | 0 | pu1_cur_chroma_recon, |
9800 | 0 | recon_chrma_strd, |
9801 | 0 | chroma_trans_size, |
9802 | 0 | chroma_trans_size, |
9803 | 0 | V_PLANE); |
9804 | 0 | } |
9805 | 0 | } |
9806 | |
|
9807 | 0 | u1_is_cu_coded |= |
9808 | 0 | ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf); |
9809 | |
|
9810 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9811 | 0 | { |
9812 | 0 | ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes; |
9813 | 0 | } |
9814 | |
|
9815 | 0 | pu1_final_ecd_data += cr_num_bytes; |
9816 | | /* update total bytes consumed */ |
9817 | 0 | total_bytes += cr_num_bytes; |
9818 | 0 | } |
9819 | 0 | } |
9820 | 0 | } |
9821 | 0 | else |
9822 | 0 | { |
9823 | 0 | ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes; |
9824 | 0 | ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes; |
9825 | 0 | ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes; |
9826 | 0 | ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes; |
9827 | 0 | ps_tu->b1_cb_cbf = 0; |
9828 | 0 | ps_tu->b1_cr_cbf = 0; |
9829 | 0 | ps_tu->b1_cb_cbf_subtu1 = 0; |
9830 | 0 | ps_tu->b1_cr_cbf_subtu1 = 0; |
9831 | 0 | } |
9832 | | |
9833 | | /* Update to next TU */ |
9834 | 0 | ps_tu_enc_loop++; |
9835 | 0 | ps_tu_enc_loop_temp_prms++; |
9836 | |
|
9837 | 0 | pu4_nbr_flags++; |
9838 | 0 | pu1_intra_pred_mode++; |
9839 | | |
9840 | | /*Do not set the nbr map for last pu in cu */ |
9841 | 0 | if((num_tu_in_cu - 1) != ctr) |
9842 | 0 | { |
9843 | | /* set the neighbour map to 1 */ |
9844 | 0 | ihevce_set_nbr_map( |
9845 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
9846 | 0 | ps_ctxt->i4_nbr_map_strd, |
9847 | 0 | cu_pos_x_in_4x4, |
9848 | 0 | cu_pos_y_in_4x4, |
9849 | 0 | (trans_size >> 2), |
9850 | 0 | 1); |
9851 | 0 | } |
9852 | 0 | } |
9853 | | |
9854 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9855 | 0 | { |
9856 | 0 | ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded; |
9857 | | |
9858 | | /* Modify skip flag, if luma is skipped & Chroma is coded */ |
9859 | 0 | if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode)) |
9860 | 0 | { |
9861 | 0 | ps_best_cu_prms->u1_skip_flag = 0; |
9862 | 0 | } |
9863 | 0 | } |
9864 | | |
9865 | | /* during chroma evaluation if skip decision was over written */ |
9866 | | /* then the current skip candidate is set to a non skip candidate */ |
9867 | 0 | if(PRED_MODE_INTRA != packed_pred_mode) |
9868 | 0 | { |
9869 | 0 | ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag; |
9870 | 0 | } |
9871 | | |
9872 | | /**------------- Compute header data if required --------------**/ |
9873 | 0 | if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data) |
9874 | 0 | { |
9875 | 0 | WORD32 cbf_bits; |
9876 | 0 | WORD32 cu_bits; |
9877 | 0 | WORD32 unit_4x4_size = cu_size >> 2; |
9878 | | |
9879 | | /*Restoring the running reference into the best rdopt_ctxt cabac states which will then |
9880 | | be copied as the base reference for the next cu |
9881 | | Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either |
9882 | | luma and chroma are being reevaluated*/ |
9883 | 0 | COPY_CABAC_STATES( |
9884 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx] |
9885 | 0 | .s_cabac_ctxt.au1_ctxt_models[0], |
9886 | 0 | &ps_ctxt->au1_rdopt_init_ctxt_models[0], |
9887 | 0 | IHEVC_CAB_CTXT_END); |
9888 | | |
9889 | | /* get the neighbour availability flags for current cu */ |
9890 | 0 | ihevce_get_only_nbr_flag( |
9891 | 0 | &s_nbr, |
9892 | 0 | ps_ctxt->pu1_ctb_nbr_map, |
9893 | 0 | ps_ctxt->i4_nbr_map_strd, |
9894 | 0 | (cu_pos_x << 1), |
9895 | 0 | (cu_pos_y << 1), |
9896 | 0 | unit_4x4_size, |
9897 | 0 | unit_4x4_size); |
9898 | |
|
9899 | 0 | cu_bits = ihevce_entropy_rdo_encode_cu( |
9900 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, |
9901 | 0 | ps_best_cu_prms, |
9902 | 0 | cu_pos_x, |
9903 | 0 | cu_pos_y, |
9904 | 0 | cu_size, |
9905 | 0 | ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail |
9906 | 0 | : s_nbr.u1_top_avail, |
9907 | 0 | s_nbr.u1_left_avail, |
9908 | 0 | (pu1_final_ecd_data - total_bytes), |
9909 | 0 | &cbf_bits); |
9910 | | |
9911 | | /* cbf bits are excluded from header bits, instead considered as texture bits */ |
9912 | 0 | ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits; |
9913 | 0 | ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits; |
9914 | 0 | } |
9915 | |
|
9916 | 0 | if(ps_prms->u1_will_cabac_state_change) |
9917 | 0 | { |
9918 | 0 | ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes; |
9919 | 0 | } |
9920 | 0 | } |
9921 | | |
9922 | | /*! |
9923 | | ****************************************************************************** |
9924 | | * \if Function name : ihevce_set_eval_flags \endif |
9925 | | * |
9926 | | * \brief |
9927 | | * Function which decides which eval flags have to be set based on present |
9928 | | * and RDOQ conditions |
9929 | | * |
9930 | | * \param[in] ps_ctxt : encoder ctxt pointer |
9931 | | * \param[in] enc_loop_cu_final_prms_t : pointer to final cu params |
9932 | | * |
9933 | | * \return |
9934 | | * None |
9935 | | * |
9936 | | * \author |
9937 | | * Ittiam |
9938 | | * |
9939 | | ***************************************************************************** |
9940 | | */ |
9941 | | void ihevce_set_eval_flags( |
9942 | | ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms) |
9943 | 0 | { |
9944 | 0 | WORD32 count = 0; |
9945 | |
|
9946 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0; |
9947 | |
|
9948 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = |
9949 | 0 | !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
9950 | |
|
9951 | 0 | if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1))) |
9952 | 0 | { |
9953 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0; |
9954 | 0 | } |
9955 | 0 | else |
9956 | 0 | { |
9957 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1; |
9958 | 0 | } |
9959 | |
|
9960 | 0 | if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) || |
9961 | 0 | (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh)) |
9962 | 0 | { |
9963 | | /* When rdoq is enabled only for the best candidate, in case of in Intra nTU |
9964 | | RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred |
9965 | | for the current CU will change. Therefore, we need to reevaluate the pred data*/ |
9966 | 0 | if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) && |
9967 | 0 | (ps_enc_loop_bestprms->u1_intra_flag == 1)) |
9968 | 0 | { |
9969 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1; |
9970 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1; |
9971 | 0 | } |
9972 | 0 | if(ps_enc_loop_bestprms->u1_skip_flag == 1) |
9973 | 0 | { |
9974 | 0 | for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
9975 | 0 | { |
9976 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
9977 | 0 | .b1_eval_luma_iq_and_coeff_data = 0; |
9978 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
9979 | 0 | .b1_eval_chroma_iq_and_coeff_data = 0; |
9980 | 0 | } |
9981 | 0 | } |
9982 | 0 | else |
9983 | 0 | { |
9984 | 0 | for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
9985 | 0 | { |
9986 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
9987 | 0 | .b1_eval_luma_iq_and_coeff_data = 1; |
9988 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
9989 | 0 | .b1_eval_chroma_iq_and_coeff_data = 1; |
9990 | 0 | } |
9991 | 0 | } |
9992 | 0 | } |
9993 | 0 | else |
9994 | 0 | { |
9995 | 0 | switch(ps_ctxt->i4_quality_preset) |
9996 | 0 | { |
9997 | 0 | case IHEVCE_QUALITY_P0: |
9998 | 0 | case IHEVCE_QUALITY_P2: |
9999 | 0 | case IHEVCE_QUALITY_P3: |
10000 | 0 | { |
10001 | 0 | for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
10002 | 0 | { |
10003 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
10004 | 0 | .b1_eval_luma_iq_and_coeff_data = 0; |
10005 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
10006 | 0 | .b1_eval_chroma_iq_and_coeff_data = |
10007 | 0 | !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
10008 | 0 | } |
10009 | |
|
10010 | 0 | break; |
10011 | 0 | } |
10012 | 0 | case IHEVCE_QUALITY_P4: |
10013 | 0 | case IHEVCE_QUALITY_P5: |
10014 | 0 | { |
10015 | 0 | for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
10016 | 0 | { |
10017 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
10018 | 0 | .b1_eval_luma_iq_and_coeff_data = 0; |
10019 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
10020 | 0 | .b1_eval_chroma_iq_and_coeff_data = |
10021 | 0 | !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
10022 | 0 | } |
10023 | |
|
10024 | 0 | break; |
10025 | 0 | } |
10026 | 0 | case IHEVCE_QUALITY_P6: |
10027 | 0 | { |
10028 | 0 | for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++) |
10029 | 0 | { |
10030 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
10031 | 0 | .b1_eval_luma_iq_and_coeff_data = 0; |
10032 | 0 | #if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25 |
10033 | 0 | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
10034 | 0 | .b1_eval_chroma_iq_and_coeff_data = |
10035 | 0 | !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
10036 | | #else |
10037 | | if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) && |
10038 | | (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2)) |
10039 | | { |
10040 | | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
10041 | | .b1_eval_chroma_iq_and_coeff_data = |
10042 | | ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf; |
10043 | | } |
10044 | | else |
10045 | | { |
10046 | | ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count] |
10047 | | .b1_eval_chroma_iq_and_coeff_data = |
10048 | | !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt; |
10049 | | } |
10050 | | #endif |
10051 | 0 | } |
10052 | |
|
10053 | 0 | break; |
10054 | 0 | } |
10055 | 0 | default: |
10056 | 0 | { |
10057 | 0 | break; |
10058 | 0 | } |
10059 | 0 | } |
10060 | 0 | } |
10061 | | |
10062 | | /* Not recomputing Luma pred-data and header data for any preset now */ |
10063 | 0 | ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1; |
10064 | 0 | } |
10065 | | |
10066 | | /** |
10067 | | ****************************************************************************** |
10068 | | * |
10069 | | * @brief Shrink's TU tree of inter CUs by merging redundnant child nodes |
10070 | | * (not coded children) into a parent node(not coded). |
10071 | | * |
10072 | | * @par Description |
10073 | | * This is required post RDO evaluation as TU decisions are |
10074 | | * pre-determined(pre RDO) based on recursive SATD, |
10075 | | * while the quad children TU's can be skipped during RDO |
10076 | | * |
10077 | | * The shrink process is applied iteratively till there are no |
10078 | | * more modes to shrink |
10079 | | * |
10080 | | * @param[inout] ps_tu_enc_loop |
10081 | | * pointer to tu enc loop params of inter cu |
10082 | | * |
10083 | | * @param[inout] ps_tu_enc_loop_temp_prms |
10084 | | * pointer to temp tu enc loop params of inter cu |
10085 | | * |
10086 | | * @param[in] num_tu_in_cu |
10087 | | * number of tus in cu |
10088 | | * |
10089 | | * @return modified number of tus in cu |
10090 | | * |
10091 | | ****************************************************************************** |
10092 | | */ |
10093 | | WORD32 ihevce_shrink_inter_tu_tree( |
10094 | | tu_enc_loop_out_t *ps_tu_enc_loop, |
10095 | | tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms, |
10096 | | recon_datastore_t *ps_recon_datastore, |
10097 | | WORD32 num_tu_in_cu, |
10098 | | UWORD8 u1_is_422) |
10099 | 0 | { |
10100 | 0 | WORD32 recurse = 1; |
10101 | 0 | WORD32 ctr; |
10102 | | |
10103 | | /* ------------- Quadtree TU Split Transform flag optimization ------------ */ |
10104 | | /* Post RDO, if all 4 child nodes are not coded the overheads of split TU */ |
10105 | | /* flags and cbf flags are saved by merging to parent node and marking */ |
10106 | | /* parent TU as not coded */ |
10107 | | /* */ |
10108 | | /* ParentTUSplit=1 */ |
10109 | | /* | */ |
10110 | | /* --------------------------------------------------------- */ |
10111 | | /* |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded) */ |
10112 | | /* || */ |
10113 | | /* \/ */ |
10114 | | /* */ |
10115 | | /* ParentTUSplit=0 (Not Coded) */ |
10116 | | /* */ |
10117 | | /* ------------- Quadtree TU Split Transform flag optimization ------------ */ |
10118 | 0 | while((num_tu_in_cu > 4) && recurse) |
10119 | 0 | { |
10120 | 0 | recurse = 0; |
10121 | | |
10122 | | /* Validate inter CU */ |
10123 | | //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */ |
10124 | | |
10125 | | /* loop for all tu blocks in current cu */ |
10126 | 0 | for(ctr = 0; ctr < num_tu_in_cu;) |
10127 | 0 | { |
10128 | | /* Get current tu posx, posy and size */ |
10129 | 0 | WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2; |
10130 | 0 | WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2; |
10131 | | /* +1 is for parents size */ |
10132 | 0 | WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1); |
10133 | | |
10134 | | /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */ |
10135 | 0 | WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0); |
10136 | 0 | eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0); |
10137 | | |
10138 | | /* As TUs are published in encode order (Z SCAN), */ |
10139 | | /* Four consecutive TUS of same size implies we have hit leaf nodes. */ |
10140 | 0 | if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) && |
10141 | 0 | ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) && |
10142 | 0 | ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) && |
10143 | 0 | eval_merge) |
10144 | 0 | { |
10145 | 0 | WORD32 merge_parent = 1; |
10146 | | |
10147 | | /* If any leaf noded is coded, it cannot be merged to parent */ |
10148 | 0 | if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) || |
10149 | 0 | (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) || |
10150 | |
|
10151 | 0 | (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) || |
10152 | 0 | (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) || |
10153 | 0 | (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) || |
10154 | |
|
10155 | 0 | (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) || |
10156 | 0 | (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) || |
10157 | 0 | (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) || |
10158 | |
|
10159 | 0 | (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) || |
10160 | 0 | (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) || |
10161 | 0 | (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf)) |
10162 | 0 | { |
10163 | 0 | merge_parent = 0; |
10164 | 0 | } |
10165 | |
|
10166 | 0 | if(u1_is_422) |
10167 | 0 | { |
10168 | 0 | if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) || |
10169 | 0 | (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) || |
10170 | |
|
10171 | 0 | (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) || |
10172 | 0 | (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) || |
10173 | |
|
10174 | 0 | (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) || |
10175 | 0 | (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) || |
10176 | |
|
10177 | 0 | (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) || |
10178 | 0 | (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1)) |
10179 | 0 | { |
10180 | 0 | merge_parent = 0; |
10181 | 0 | } |
10182 | 0 | } |
10183 | |
|
10184 | 0 | if(merge_parent) |
10185 | 0 | { |
10186 | | /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */ |
10187 | |
|
10188 | 0 | if(ps_recon_datastore->u1_is_lumaRecon_available) |
10189 | 0 | { |
10190 | 0 | ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX; |
10191 | |
|
10192 | 0 | memmove( |
10193 | 0 | &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1], |
10194 | 0 | &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4], |
10195 | 0 | (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
10196 | 0 | } |
10197 | |
|
10198 | 0 | if(ps_recon_datastore->au1_is_chromaRecon_available[0]) |
10199 | 0 | { |
10200 | 0 | ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] = |
10201 | 0 | UCHAR_MAX; |
10202 | 0 | ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] = |
10203 | 0 | UCHAR_MAX; |
10204 | |
|
10205 | 0 | memmove( |
10206 | 0 | &ps_recon_datastore |
10207 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0], |
10208 | 0 | &ps_recon_datastore |
10209 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0], |
10210 | 0 | (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
10211 | |
|
10212 | 0 | memmove( |
10213 | 0 | &ps_recon_datastore |
10214 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0], |
10215 | 0 | &ps_recon_datastore |
10216 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0], |
10217 | 0 | (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
10218 | |
|
10219 | 0 | if(u1_is_422) |
10220 | 0 | { |
10221 | 0 | ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] = |
10222 | 0 | UCHAR_MAX; |
10223 | 0 | ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] = |
10224 | 0 | UCHAR_MAX; |
10225 | |
|
10226 | 0 | memmove( |
10227 | 0 | &ps_recon_datastore |
10228 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1], |
10229 | 0 | &ps_recon_datastore |
10230 | 0 | ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1], |
10231 | 0 | (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
10232 | |
|
10233 | 0 | memmove( |
10234 | 0 | &ps_recon_datastore |
10235 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1], |
10236 | 0 | &ps_recon_datastore |
10237 | 0 | ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1], |
10238 | 0 | (num_tu_in_cu - ctr - 4) * sizeof(UWORD8)); |
10239 | 0 | } |
10240 | 0 | } |
10241 | | |
10242 | | /* Parent node size is one more than that of child */ |
10243 | 0 | ps_tu_enc_loop[ctr].s_tu.b3_size++; |
10244 | |
|
10245 | 0 | ctr++; |
10246 | | |
10247 | | /* move the subsequent TUs to next element */ |
10248 | 0 | ASSERT(num_tu_in_cu >= (ctr + 3)); |
10249 | 0 | memmove( |
10250 | 0 | (void *)(ps_tu_enc_loop + ctr), |
10251 | 0 | (void *)(ps_tu_enc_loop + ctr + 3), |
10252 | 0 | (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t)); |
10253 | | |
10254 | | /* Also memmove the temp TU params */ |
10255 | 0 | memmove( |
10256 | 0 | (void *)(ps_tu_enc_loop_temp_prms + ctr), |
10257 | 0 | (void *)(ps_tu_enc_loop_temp_prms + ctr + 3), |
10258 | 0 | (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t)); |
10259 | | |
10260 | | /* Number of TUs in CU are now less by 3 */ |
10261 | 0 | num_tu_in_cu -= 3; |
10262 | | |
10263 | | /* Recurse again as new parent also be can be merged later */ |
10264 | 0 | recurse = 1; |
10265 | 0 | } |
10266 | 0 | else |
10267 | 0 | { |
10268 | | /* Go to next set of leaf nodes */ |
10269 | 0 | ctr += 4; |
10270 | 0 | } |
10271 | 0 | } |
10272 | 0 | else |
10273 | 0 | { |
10274 | 0 | ctr++; |
10275 | 0 | } |
10276 | 0 | } |
10277 | 0 | } |
10278 | | |
10279 | | /* return the modified num TUs*/ |
10280 | 0 | ASSERT(num_tu_in_cu > 0); |
10281 | 0 | return (num_tu_in_cu); |
10282 | 0 | } |
10283 | | |
10284 | | UWORD8 ihevce_intra_mode_nxn_hash_updater( |
10285 | | UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes) |
10286 | 0 | { |
10287 | 0 | WORD32 i; |
10288 | 0 | WORD32 i4_mode; |
10289 | |
|
10290 | 0 | for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++) |
10291 | 0 | { |
10292 | 0 | if(pu1_mode_array[i] < 35) |
10293 | 0 | { |
10294 | 0 | if(pu1_mode_array[i] != 0) |
10295 | 0 | { |
10296 | 0 | i4_mode = pu1_mode_array[i] - 1; |
10297 | |
|
10298 | 0 | if(!pu1_hash_table[i4_mode]) |
10299 | 0 | { |
10300 | 0 | pu1_hash_table[i4_mode] = 1; |
10301 | 0 | pu1_mode_array[u1_num_ipe_modes] = i4_mode; |
10302 | 0 | u1_num_ipe_modes++; |
10303 | 0 | } |
10304 | 0 | } |
10305 | |
|
10306 | 0 | if(pu1_mode_array[i] != 34) |
10307 | 0 | { |
10308 | 0 | i4_mode = pu1_mode_array[i] + 1; |
10309 | |
|
10310 | 0 | if((!pu1_hash_table[i4_mode])) |
10311 | 0 | { |
10312 | 0 | pu1_hash_table[i4_mode] = 1; |
10313 | 0 | pu1_mode_array[u1_num_ipe_modes] = i4_mode; |
10314 | 0 | u1_num_ipe_modes++; |
10315 | 0 | } |
10316 | 0 | } |
10317 | 0 | } |
10318 | 0 | } |
10319 | |
|
10320 | 0 | if(!pu1_hash_table[INTRA_PLANAR]) |
10321 | 0 | { |
10322 | 0 | pu1_hash_table[INTRA_PLANAR] = 1; |
10323 | 0 | pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR; |
10324 | 0 | u1_num_ipe_modes++; |
10325 | 0 | } |
10326 | |
|
10327 | 0 | if(!pu1_hash_table[INTRA_DC]) |
10328 | 0 | { |
10329 | 0 | pu1_hash_table[INTRA_DC] = 1; |
10330 | 0 | pu1_mode_array[u1_num_ipe_modes] = INTRA_DC; |
10331 | 0 | u1_num_ipe_modes++; |
10332 | 0 | } |
10333 | |
|
10334 | 0 | return u1_num_ipe_modes; |
10335 | 0 | } |
10336 | | |
10337 | | #if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT |
10338 | | WORD32 ihevce_determine_tu_tree_distribution( |
10339 | | cu_inter_cand_t *ps_cu_data, |
10340 | | me_func_selector_t *ps_func_selector, |
10341 | | WORD16 *pi2_scratch_mem, |
10342 | | UWORD8 *pu1_inp, |
10343 | | WORD32 i4_inp_stride, |
10344 | | WORD32 i4_lambda, |
10345 | | UWORD8 u1_lambda_q_shift, |
10346 | | UWORD8 u1_cu_size, |
10347 | | UWORD8 u1_max_tr_depth) |
10348 | | { |
10349 | | err_prms_t s_err_prms; |
10350 | | |
10351 | | PF_SAD_FXN_TU_REC pf_err_compute[4]; |
10352 | | |
10353 | | WORD32 i4_satd; |
10354 | | |
10355 | | s_err_prms.pi4_sad_grid = &i4_satd; |
10356 | | s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag; |
10357 | | s_err_prms.pu1_inp = pu1_inp; |
10358 | | s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data; |
10359 | | s_err_prms.i4_inp_stride = i4_inp_stride; |
10360 | | s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride; |
10361 | | s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem; |
10362 | | |
10363 | | if(u1_cu_size == 64) |
10364 | | { |
10365 | | s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth); |
10366 | | } |
10367 | | else |
10368 | | { |
10369 | | s_err_prms.u1_max_tr_depth = u1_max_tr_depth; |
10370 | | } |
10371 | | |
10372 | | pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec; |
10373 | | pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec; |
10374 | | pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec; |
10375 | | pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec; |
10376 | | |
10377 | | i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4]( |
10378 | | &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector); |
10379 | | |
10380 | | if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64)) |
10381 | | { |
10382 | | ps_cu_data->ai4_tu_split_flag[0] = 1; |
10383 | | } |
10384 | | |
10385 | | return i4_satd; |
10386 | | } |
10387 | | #endif |
10388 | | |
10389 | | void ihevce_populate_nbr_4x4_with_pu_data( |
10390 | | nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride) |
10391 | 0 | { |
10392 | 0 | WORD32 i, j; |
10393 | |
|
10394 | 0 | nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4; |
10395 | |
|
10396 | 0 | WORD32 ht = (ps_pu->b4_ht + 1); |
10397 | 0 | WORD32 wd = (ps_pu->b4_wd + 1); |
10398 | |
|
10399 | 0 | ps_nbr_4x4->b1_intra_flag = 0; |
10400 | 0 | ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1); |
10401 | 0 | ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0); |
10402 | 0 | ps_nbr_4x4->mv = ps_pu->mv; |
10403 | |
|
10404 | 0 | for(i = 0; i < ht; i++) |
10405 | 0 | { |
10406 | 0 | for(j = 0; j < wd; j++) |
10407 | 0 | { |
10408 | 0 | ps_tmp_4x4[j] = *ps_nbr_4x4; |
10409 | 0 | } |
10410 | |
|
10411 | 0 | ps_tmp_4x4 += i4_nbr_buf_stride; |
10412 | 0 | } |
10413 | 0 | } |
10414 | | |
10415 | | void ihevce_call_luma_inter_pred_rdopt_pass1( |
10416 | | ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size) |
10417 | 0 | { |
10418 | 0 | pu_t *ps_pu; |
10419 | 0 | UWORD8 *pu1_pred; |
10420 | 0 | WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0; |
10421 | 0 | WORD32 inter_pu_wd, inter_pu_ht; |
10422 | |
|
10423 | 0 | pu1_pred = ps_inter_cand->pu1_pred_data_scr; |
10424 | 0 | pred_stride = ps_inter_cand->i4_pred_data_stride; |
10425 | 0 | num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1; |
10426 | |
|
10427 | 0 | for(ctr = 0; ctr < num_cu_part; ctr++) |
10428 | 0 | { |
10429 | 0 | ps_pu = &ps_inter_cand->as_inter_pu[ctr]; |
10430 | | |
10431 | | /* IF AMP then each partitions can have diff wd ht */ |
10432 | 0 | inter_pu_wd = (ps_pu->b4_wd + 1) << 2; |
10433 | 0 | inter_pu_ht = (ps_pu->b4_ht + 1) << 2; |
10434 | |
|
10435 | 0 | skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag; |
10436 | | //if(0 == skip_or_merge_flag) |
10437 | 0 | { |
10438 | 0 | ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1); |
10439 | 0 | } |
10440 | 0 | if((2 == num_cu_part) && (0 == ctr)) |
10441 | 0 | { |
10442 | | /* 2Nx__ partion case */ |
10443 | 0 | if(inter_pu_wd == cu_size) |
10444 | 0 | { |
10445 | 0 | pu1_pred += (inter_pu_ht * pred_stride); |
10446 | 0 | } |
10447 | | |
10448 | | /* __x2N partion case */ |
10449 | 0 | if(inter_pu_ht == cu_size) |
10450 | 0 | { |
10451 | 0 | pu1_pred += inter_pu_wd; |
10452 | 0 | } |
10453 | 0 | } |
10454 | 0 | } |
10455 | 0 | } |
10456 | | |
10457 | | LWORD64 ihevce_it_recon_ssd( |
10458 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
10459 | | UWORD8 *pu1_src, |
10460 | | WORD32 i4_src_strd, |
10461 | | UWORD8 *pu1_pred, |
10462 | | WORD32 i4_pred_strd, |
10463 | | WORD16 *pi2_deq_data, |
10464 | | WORD32 i4_deq_data_strd, |
10465 | | UWORD8 *pu1_recon, |
10466 | | WORD32 i4_recon_stride, |
10467 | | UWORD8 *pu1_ecd_data, |
10468 | | UWORD8 u1_trans_size, |
10469 | | UWORD8 u1_pred_mode, |
10470 | | WORD32 i4_cbf, |
10471 | | WORD32 i4_zero_col, |
10472 | | WORD32 i4_zero_row, |
10473 | | CHROMA_PLANE_ID_T e_chroma_plane) |
10474 | 0 | { |
10475 | 0 | if(NULL_PLANE == e_chroma_plane) |
10476 | 0 | { |
10477 | 0 | ihevce_it_recon_fxn( |
10478 | 0 | ps_ctxt, |
10479 | 0 | pi2_deq_data, |
10480 | 0 | i4_deq_data_strd, |
10481 | 0 | pu1_pred, |
10482 | 0 | i4_pred_strd, |
10483 | 0 | pu1_recon, |
10484 | 0 | i4_recon_stride, |
10485 | 0 | pu1_ecd_data, |
10486 | 0 | u1_trans_size, |
10487 | 0 | u1_pred_mode, |
10488 | 0 | i4_cbf, |
10489 | 0 | i4_zero_col, |
10490 | 0 | i4_zero_row); |
10491 | |
|
10492 | 0 | return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator( |
10493 | 0 | pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size, |
10494 | 0 | e_chroma_plane); |
10495 | 0 | } |
10496 | 0 | else |
10497 | 0 | { |
10498 | 0 | ihevce_chroma_it_recon_fxn( |
10499 | 0 | ps_ctxt, |
10500 | 0 | pi2_deq_data, |
10501 | 0 | i4_deq_data_strd, |
10502 | 0 | pu1_pred, |
10503 | 0 | i4_pred_strd, |
10504 | 0 | pu1_recon, |
10505 | 0 | i4_recon_stride, |
10506 | 0 | pu1_ecd_data, |
10507 | 0 | u1_trans_size, |
10508 | 0 | i4_cbf, |
10509 | 0 | i4_zero_col, |
10510 | 0 | i4_zero_row, |
10511 | 0 | e_chroma_plane); |
10512 | |
|
10513 | 0 | return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
10514 | 0 | pu1_recon, |
10515 | 0 | pu1_src, |
10516 | 0 | i4_recon_stride, |
10517 | 0 | i4_src_strd, |
10518 | 0 | u1_trans_size, |
10519 | 0 | u1_trans_size, |
10520 | 0 | e_chroma_plane); |
10521 | 0 | } |
10522 | 0 | } |
10523 | | |
10524 | | /*! |
10525 | | ****************************************************************************** |
10526 | | * \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif |
10527 | | * |
10528 | | * \brief |
10529 | | * Transform unit level (Chroma) enc_loop function |
10530 | | * |
10531 | | * \param[in] ps_ctxt enc_loop module ctxt pointer |
10532 | | * \param[in] pu1_pred pointer to predicted data buffer |
10533 | | * \param[in] pred_strd predicted buffer stride |
10534 | | * \param[in] pu1_src pointer to source data buffer |
10535 | | * \param[in] src_strd source buffer stride |
10536 | | * \param[in] pi2_deq_data pointer to store iq data |
10537 | | * \param[in] deq_data_strd iq data buffer stride |
10538 | | * \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod) |
10539 | | * \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current |
10540 | | * block |
10541 | | * \param[out] csbf_strd csbf buffer stride |
10542 | | * \param[in] trans_size transform size (4, 8, 16) |
10543 | | * \param[in] intra_flag 0:Inter/Skip 1:Intra |
10544 | | * \param[out] pi4_coeff_off pointer to store the number of bytes produced in |
10545 | | * coeff buffer |
10546 | | the current TU in RDopt Mode |
10547 | | * \param[out] pi4_zero_col pointer to store the zero_col info for the TU |
10548 | | * \param[out] pi4_zero_row pointer to store the zero_row info for the TU |
10549 | | * |
10550 | | * \return |
10551 | | * CBF of the current block |
10552 | | * |
10553 | | * \author |
10554 | | * Ittiam |
10555 | | * |
10556 | | ***************************************************************************** |
10557 | | */ |
10558 | | WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn( |
10559 | | ihevce_enc_loop_ctxt_t *ps_ctxt, |
10560 | | UWORD8 *pu1_pred, |
10561 | | WORD32 pred_strd, |
10562 | | UWORD8 *pu1_src, |
10563 | | WORD32 src_strd, |
10564 | | WORD16 *pi2_deq_data, |
10565 | | WORD32 deq_data_strd, |
10566 | | UWORD8 *pu1_recon, |
10567 | | WORD32 i4_recon_stride, |
10568 | | UWORD8 *pu1_ecd_data, |
10569 | | UWORD8 *pu1_csbf_buf, |
10570 | | WORD32 csbf_strd, |
10571 | | WORD32 trans_size, |
10572 | | WORD32 i4_scan_idx, |
10573 | | WORD32 intra_flag, |
10574 | | WORD32 *pi4_coeff_off, |
10575 | | WORD32 *pi4_tu_bits, |
10576 | | WORD32 *pi4_zero_col, |
10577 | | WORD32 *pi4_zero_row, |
10578 | | UWORD8 *pu1_is_recon_available, |
10579 | | WORD32 i4_perform_sbh, |
10580 | | WORD32 i4_perform_rdoq, |
10581 | | LWORD64 *pi8_cost, |
10582 | | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
10583 | | WORD32 i4_alpha_stim_multiplier, |
10584 | | UWORD8 u1_is_cu_noisy, |
10585 | | #endif |
10586 | | UWORD8 u1_is_skip, |
10587 | | SSD_TYPE_T e_ssd_type, |
10588 | | CHROMA_PLANE_ID_T e_chroma_plane) |
10589 | 0 | { |
10590 | 0 | WORD32 trans_idx, cbf, u4_blk_sad; |
10591 | 0 | WORD16 *pi2_quant_coeffs; |
10592 | 0 | WORD16 *pi2_trans_values; |
10593 | 0 | WORD32 quant_scale_mat_offset; |
10594 | 0 | WORD32 *pi4_trans_scratch; |
10595 | 0 | WORD32 *pi4_subBlock2csbfId_map = NULL; |
10596 | |
|
10597 | | #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
10598 | | WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i; |
10599 | | #endif |
10600 | |
|
10601 | 0 | rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt; |
10602 | |
|
10603 | 0 | WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) || |
10604 | 0 | (!intra_flag && ENABLE_INTER_ZCU_COST); |
10605 | 0 | WORD32 i4_perform_coeff_level_rdoq = |
10606 | 0 | (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) && |
10607 | 0 | (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING); |
10608 | |
|
10609 | 0 | ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE)); |
10610 | 0 | ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW); |
10611 | | |
10612 | 0 | *pi4_coeff_off = 0; |
10613 | 0 | *pi4_tu_bits = 0; |
10614 | 0 | pu1_is_recon_available[0] = 0; |
10615 | |
|
10616 | 0 | pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0]; |
10617 | 0 | pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0]; |
10618 | 0 | pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2); |
10619 | |
|
10620 | 0 | if(2 == trans_size) |
10621 | 0 | { |
10622 | 0 | trans_size = 4; |
10623 | 0 | } |
10624 | | |
10625 | | /* translate the transform size to index */ |
10626 | 0 | trans_idx = trans_size >> 2; |
10627 | |
|
10628 | 0 | if(16 == trans_size) |
10629 | 0 | { |
10630 | 0 | trans_idx = 3; |
10631 | 0 | } |
10632 | |
|
10633 | 0 | if(u1_is_skip) |
10634 | 0 | { |
10635 | 0 | pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
10636 | 0 | pu1_pred, |
10637 | 0 | pu1_src, |
10638 | 0 | pred_strd, |
10639 | 0 | src_strd, |
10640 | 0 | trans_size, |
10641 | 0 | trans_size, |
10642 | 0 | e_chroma_plane); |
10643 | |
|
10644 | 0 | if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
10645 | 0 | { |
10646 | | /* buffer copy fromp pred to recon */ |
10647 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
10648 | 0 | pu1_pred, |
10649 | 0 | pred_strd, |
10650 | 0 | pu1_recon, |
10651 | 0 | i4_recon_stride, |
10652 | 0 | trans_size, |
10653 | 0 | trans_size, |
10654 | 0 | e_chroma_plane); |
10655 | |
|
10656 | 0 | pu1_is_recon_available[0] = 1; |
10657 | 0 | } |
10658 | |
|
10659 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
10660 | 0 | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
10661 | 0 | { |
10662 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
10663 | 0 | pu1_src, |
10664 | 0 | src_strd, |
10665 | 0 | pu1_pred, |
10666 | 0 | pred_strd, |
10667 | 0 | pi8_cost[0], |
10668 | 0 | i4_alpha_stim_multiplier, |
10669 | 0 | trans_size, |
10670 | 0 | 0, |
10671 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
10672 | 0 | e_chroma_plane); |
10673 | 0 | } |
10674 | 0 | #endif |
10675 | |
|
10676 | 0 | #if ENABLE_INTER_ZCU_COST |
10677 | | #if !WEIGH_CHROMA_COST |
10678 | | /* cbf = 0, accumulate cu not coded cost */ |
10679 | | ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; |
10680 | | #else |
10681 | 0 | ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor + |
10682 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
10683 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT; |
10684 | 0 | #endif |
10685 | 0 | #endif |
10686 | |
|
10687 | 0 | return 0; |
10688 | 0 | } |
10689 | | |
10690 | 0 | if(intra_flag == 1) |
10691 | 0 | { |
10692 | 0 | quant_scale_mat_offset = 0; |
10693 | |
|
10694 | | #if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
10695 | | ai4_quant_rounding_factors[0][0] = |
10696 | | MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3); |
10697 | | |
10698 | | for(i = 0; i < trans_size * trans_size; i++) |
10699 | | { |
10700 | | ai4_quant_rounding_factors[1][i] = |
10701 | | MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i], |
10702 | | (1 << QUANT_ROUND_FACTOR_Q) / 3); |
10703 | | ai4_quant_rounding_factors[2][i] = |
10704 | | MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i], |
10705 | | (1 << QUANT_ROUND_FACTOR_Q) / 3); |
10706 | | } |
10707 | | #endif |
10708 | 0 | } |
10709 | 0 | else |
10710 | 0 | { |
10711 | 0 | quant_scale_mat_offset = NUM_TRANS_TYPES; |
10712 | 0 | } |
10713 | |
|
10714 | 0 | switch(trans_size) |
10715 | 0 | { |
10716 | 0 | case 4: |
10717 | 0 | { |
10718 | 0 | pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU; |
10719 | |
|
10720 | 0 | break; |
10721 | 0 | } |
10722 | 0 | case 8: |
10723 | 0 | { |
10724 | 0 | pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU; |
10725 | |
|
10726 | 0 | break; |
10727 | 0 | } |
10728 | 0 | case 16: |
10729 | 0 | { |
10730 | 0 | pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU; |
10731 | |
|
10732 | 0 | break; |
10733 | 0 | } |
10734 | 0 | case 32: |
10735 | 0 | { |
10736 | 0 | pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU; |
10737 | |
|
10738 | 0 | break; |
10739 | 0 | } |
10740 | 0 | } |
10741 | | |
10742 | | /* ---------- call residue and transform block ------- */ |
10743 | 0 | u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1]( |
10744 | 0 | pu1_src, |
10745 | 0 | pu1_pred, |
10746 | 0 | pi4_trans_scratch, |
10747 | 0 | pi2_trans_values, |
10748 | 0 | src_strd, |
10749 | 0 | pred_strd, |
10750 | 0 | trans_size, |
10751 | 0 | e_chroma_plane); |
10752 | 0 | (void)u4_blk_sad; |
10753 | | /* -------- calculate SSD calculation in Transform Domain ------ */ |
10754 | |
|
10755 | 0 | cbf = ps_ctxt->apf_quant_iquant_ssd |
10756 | 0 | [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2] |
10757 | |
|
10758 | 0 | (pi2_trans_values, |
10759 | 0 | ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset], |
10760 | 0 | pi2_quant_coeffs, |
10761 | 0 | pi2_deq_data, |
10762 | 0 | trans_size, |
10763 | 0 | ps_ctxt->i4_chrm_cu_qp_div6, |
10764 | 0 | ps_ctxt->i4_chrm_cu_qp_mod6, |
10765 | 0 | #if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 |
10766 | 0 | ps_ctxt->i4_quant_rnd_factor[intra_flag], |
10767 | 0 | ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3], |
10768 | 0 | ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3], |
10769 | | #else |
10770 | | intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag], |
10771 | | intra_flag ? ai4_quant_rounding_factors[1] |
10772 | | : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3], |
10773 | | intra_flag ? ai4_quant_rounding_factors[2] |
10774 | | : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3], |
10775 | | #endif |
10776 | 0 | trans_size, |
10777 | 0 | trans_size, |
10778 | 0 | deq_data_strd, |
10779 | 0 | pu1_csbf_buf, |
10780 | 0 | csbf_strd, |
10781 | 0 | pi4_zero_col, |
10782 | 0 | pi4_zero_row, |
10783 | 0 | ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset], |
10784 | 0 | pi8_cost); |
10785 | |
|
10786 | 0 | if(e_ssd_type != FREQUENCY_DOMAIN_SSD) |
10787 | 0 | { |
10788 | 0 | pi8_cost[0] = UINT_MAX; |
10789 | 0 | } |
10790 | |
|
10791 | 0 | if(0 != cbf) |
10792 | 0 | { |
10793 | 0 | if(i4_perform_sbh || i4_perform_rdoq) |
10794 | 0 | { |
10795 | 0 | ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd; |
10796 | 0 | ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size; |
10797 | |
|
10798 | 0 | ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6; |
10799 | 0 | ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6; |
10800 | 0 | ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx; |
10801 | 0 | ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost; |
10802 | 0 | ps_rdoq_sbh_ctxt->i4_trans_size = trans_size; |
10803 | |
|
10804 | 0 | ps_rdoq_sbh_ctxt->pi2_dequant_coeff = |
10805 | 0 | ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset]; |
10806 | 0 | ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data; |
10807 | 0 | ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs; |
10808 | 0 | ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values; |
10809 | 0 | ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf; |
10810 | 0 | ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map; |
10811 | |
|
10812 | 0 | if((!i4_perform_rdoq)) |
10813 | 0 | { |
10814 | 0 | ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); |
10815 | |
|
10816 | 0 | pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; |
10817 | 0 | } |
10818 | 0 | } |
10819 | | |
10820 | | /* ------- call coeffs scan function ------- */ |
10821 | 0 | *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( |
10822 | 0 | pi2_quant_coeffs, |
10823 | 0 | pi4_subBlock2csbfId_map, |
10824 | 0 | i4_scan_idx, |
10825 | 0 | trans_size, |
10826 | 0 | pu1_ecd_data, |
10827 | 0 | pu1_csbf_buf, |
10828 | 0 | csbf_strd); |
10829 | 0 | } |
10830 | | |
10831 | | /* Normalize Cost. Note : trans_idx, not (trans_idx-1) */ |
10832 | 0 | pi8_cost[0] >>= ga_trans_shift[trans_idx]; |
10833 | |
|
10834 | 0 | #if RDOPT_ZERO_CBF_ENABLE |
10835 | 0 | if((0 != cbf)) |
10836 | 0 | { |
10837 | 0 | WORD32 tu_bits; |
10838 | 0 | LWORD64 zero_cbf_cost_u, curr_cb_cod_cost; |
10839 | |
|
10840 | 0 | zero_cbf_cost_u = 0; |
10841 | | |
10842 | | /*Populating the feilds of rdoq_ctxt structure*/ |
10843 | 0 | if(i4_perform_rdoq) |
10844 | 0 | { |
10845 | | //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t)); |
10846 | | /* transform size to log2transform size */ |
10847 | 0 | GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size); |
10848 | 0 | ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1; |
10849 | |
|
10850 | 0 | ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf; |
10851 | 0 | ps_rdoq_sbh_ctxt->i4_is_luma = 0; |
10852 | 0 | ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx]; |
10853 | 0 | ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td = |
10854 | 0 | (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1)); |
10855 | 0 | ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0; |
10856 | 0 | ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col; |
10857 | 0 | ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row; |
10858 | 0 | } |
10859 | 0 | else if(i4_perform_zcbf) |
10860 | 0 | { |
10861 | | /* cost of zero cbf encoding */ |
10862 | 0 | zero_cbf_cost_u = |
10863 | |
|
10864 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator( |
10865 | 0 | pu1_pred, |
10866 | 0 | pu1_src, |
10867 | 0 | pred_strd, |
10868 | 0 | src_strd, |
10869 | 0 | trans_size, |
10870 | 0 | trans_size, |
10871 | 0 | e_chroma_plane); |
10872 | 0 | } |
10873 | | |
10874 | | /************************************************************************/ |
10875 | | /* call the entropy rdo encode to get the bit estimate for current tu */ |
10876 | | /* note that tu includes only residual coding bits and does not include */ |
10877 | | /* tu split, cbf and qp delta encoding bits for a TU */ |
10878 | | /************************************************************************/ |
10879 | 0 | if(i4_perform_rdoq) |
10880 | 0 | { |
10881 | 0 | tu_bits = ihevce_entropy_rdo_encode_tu_rdoq( |
10882 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, |
10883 | 0 | pu1_ecd_data, |
10884 | 0 | trans_size, |
10885 | 0 | 0, |
10886 | 0 | ps_rdoq_sbh_ctxt, |
10887 | 0 | pi8_cost, |
10888 | 0 | &zero_cbf_cost_u, |
10889 | 0 | 0); |
10890 | | //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on |
10891 | |
|
10892 | 0 | if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0) |
10893 | 0 | { |
10894 | 0 | cbf = 0; |
10895 | | |
10896 | | /* num bytes is set to 0 */ |
10897 | 0 | *pi4_coeff_off = 0; |
10898 | 0 | } |
10899 | |
|
10900 | 0 | (*pi4_tu_bits) += tu_bits; |
10901 | |
|
10902 | 0 | if((i4_perform_sbh) && (0 != cbf)) |
10903 | 0 | { |
10904 | 0 | ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0]; |
10905 | |
|
10906 | 0 | ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt); |
10907 | |
|
10908 | 0 | pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost; |
10909 | 0 | } |
10910 | | |
10911 | | /*Add round value before normalizing*/ |
10912 | 0 | pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td; |
10913 | 0 | pi8_cost[0] >>= ga_trans_shift[trans_idx]; |
10914 | |
|
10915 | 0 | if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1) |
10916 | 0 | { |
10917 | 0 | *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs( |
10918 | 0 | pi2_quant_coeffs, |
10919 | 0 | pi4_subBlock2csbfId_map, |
10920 | 0 | i4_scan_idx, |
10921 | 0 | trans_size, |
10922 | 0 | pu1_ecd_data, |
10923 | 0 | ps_rdoq_sbh_ctxt->pu1_csbf_buf, |
10924 | 0 | csbf_strd); |
10925 | 0 | } |
10926 | 0 | } |
10927 | 0 | else |
10928 | 0 | { |
10929 | | /************************************************************************/ |
10930 | | /* call the entropy rdo encode to get the bit estimate for current tu */ |
10931 | | /* note that tu includes only residual coding bits and does not include */ |
10932 | | /* tu split, cbf and qp delta encoding bits for a TU */ |
10933 | | /************************************************************************/ |
10934 | 0 | tu_bits = ihevce_entropy_rdo_encode_tu( |
10935 | 0 | &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh); |
10936 | |
|
10937 | 0 | (*pi4_tu_bits) += tu_bits; |
10938 | 0 | } |
10939 | |
|
10940 | 0 | if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
10941 | 0 | { |
10942 | 0 | pi8_cost[0] = ihevce_it_recon_ssd( |
10943 | 0 | ps_ctxt, |
10944 | 0 | pu1_src, |
10945 | 0 | src_strd, |
10946 | 0 | pu1_pred, |
10947 | 0 | pred_strd, |
10948 | 0 | pi2_deq_data, |
10949 | 0 | deq_data_strd, |
10950 | 0 | pu1_recon, |
10951 | 0 | i4_recon_stride, |
10952 | 0 | pu1_ecd_data, |
10953 | 0 | trans_size, |
10954 | 0 | PRED_MODE_INTRA, |
10955 | 0 | cbf, |
10956 | 0 | pi4_zero_col[0], |
10957 | 0 | pi4_zero_row[0], |
10958 | 0 | e_chroma_plane); |
10959 | |
|
10960 | 0 | pu1_is_recon_available[0] = 1; |
10961 | 0 | } |
10962 | |
|
10963 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
10964 | 0 | if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
10965 | 0 | { |
10966 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
10967 | 0 | pu1_src, |
10968 | 0 | src_strd, |
10969 | 0 | pu1_recon, |
10970 | 0 | i4_recon_stride, |
10971 | 0 | pi8_cost[0], |
10972 | 0 | i4_alpha_stim_multiplier, |
10973 | 0 | trans_size, |
10974 | 0 | 0, |
10975 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
10976 | 0 | e_chroma_plane); |
10977 | 0 | } |
10978 | 0 | else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
10979 | 0 | { |
10980 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
10981 | 0 | pu1_src, |
10982 | 0 | src_strd, |
10983 | 0 | pu1_pred, |
10984 | 0 | pred_strd, |
10985 | 0 | pi8_cost[0], |
10986 | 0 | i4_alpha_stim_multiplier, |
10987 | 0 | trans_size, |
10988 | 0 | 0, |
10989 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
10990 | 0 | e_chroma_plane); |
10991 | 0 | } |
10992 | 0 | #endif |
10993 | |
|
10994 | 0 | curr_cb_cod_cost = pi8_cost[0]; |
10995 | | |
10996 | | /* add the SSD cost to bits estimate given by ECD */ |
10997 | 0 | curr_cb_cod_cost += |
10998 | 0 | COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT); |
10999 | |
|
11000 | 0 | if(i4_perform_zcbf) |
11001 | 0 | { |
11002 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
11003 | 0 | if(u1_is_cu_noisy && i4_alpha_stim_multiplier) |
11004 | 0 | { |
11005 | 0 | zero_cbf_cost_u = ihevce_inject_stim_into_distortion( |
11006 | 0 | pu1_src, |
11007 | 0 | src_strd, |
11008 | 0 | pu1_pred, |
11009 | 0 | pred_strd, |
11010 | 0 | zero_cbf_cost_u, |
11011 | 0 | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
11012 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
11013 | 0 | (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
11014 | 0 | 100.0, |
11015 | 0 | trans_size, |
11016 | 0 | 0, |
11017 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
11018 | 0 | e_chroma_plane); |
11019 | 0 | } |
11020 | 0 | #endif |
11021 | | /* force the tu as zero cbf if zero_cbf_cost is lower */ |
11022 | 0 | if(zero_cbf_cost_u < curr_cb_cod_cost) |
11023 | 0 | { |
11024 | 0 | *pi4_coeff_off = 0; |
11025 | 0 | cbf = 0; |
11026 | 0 | (*pi4_tu_bits) = 0; |
11027 | 0 | pi8_cost[0] = zero_cbf_cost_u; |
11028 | |
|
11029 | 0 | pu1_is_recon_available[0] = 0; |
11030 | |
|
11031 | 0 | if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
11032 | 0 | { |
11033 | 0 | ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy( |
11034 | 0 | pu1_pred, |
11035 | 0 | pred_strd, |
11036 | 0 | pu1_recon, |
11037 | 0 | i4_recon_stride, |
11038 | 0 | trans_size, |
11039 | 0 | trans_size, |
11040 | 0 | e_chroma_plane); |
11041 | |
|
11042 | 0 | pu1_is_recon_available[0] = 1; |
11043 | 0 | } |
11044 | 0 | } |
11045 | |
|
11046 | 0 | #if ENABLE_INTER_ZCU_COST |
11047 | 0 | if(!intra_flag) |
11048 | 0 | { |
11049 | | #if !WEIGH_CHROMA_COST |
11050 | | ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u; |
11051 | | #else |
11052 | 0 | ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( |
11053 | 0 | (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor + |
11054 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
11055 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
11056 | 0 | #endif |
11057 | 0 | } |
11058 | 0 | #endif |
11059 | 0 | } |
11060 | 0 | } |
11061 | 0 | else |
11062 | 0 | { |
11063 | 0 | if(e_ssd_type == SPATIAL_DOMAIN_SSD) |
11064 | 0 | { |
11065 | 0 | pi8_cost[0] = ihevce_it_recon_ssd( |
11066 | 0 | ps_ctxt, |
11067 | 0 | pu1_src, |
11068 | 0 | src_strd, |
11069 | 0 | pu1_pred, |
11070 | 0 | pred_strd, |
11071 | 0 | pi2_deq_data, |
11072 | 0 | deq_data_strd, |
11073 | 0 | pu1_recon, |
11074 | 0 | i4_recon_stride, |
11075 | 0 | pu1_ecd_data, |
11076 | 0 | trans_size, |
11077 | 0 | PRED_MODE_INTRA, |
11078 | 0 | cbf, |
11079 | 0 | pi4_zero_col[0], |
11080 | 0 | pi4_zero_row[0], |
11081 | 0 | e_chroma_plane); |
11082 | |
|
11083 | 0 | pu1_is_recon_available[0] = 1; |
11084 | 0 | } |
11085 | |
|
11086 | 0 | #if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS |
11087 | 0 | if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
11088 | 0 | { |
11089 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
11090 | 0 | pu1_src, |
11091 | 0 | src_strd, |
11092 | 0 | pu1_recon, |
11093 | 0 | i4_recon_stride, |
11094 | 0 | pi8_cost[0], |
11095 | 0 | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
11096 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
11097 | 0 | (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
11098 | 0 | 100.0, |
11099 | 0 | trans_size, |
11100 | 0 | 0, |
11101 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
11102 | 0 | e_chroma_plane); |
11103 | 0 | } |
11104 | 0 | else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier) |
11105 | 0 | { |
11106 | 0 | pi8_cost[0] = ihevce_inject_stim_into_distortion( |
11107 | 0 | pu1_src, |
11108 | 0 | src_strd, |
11109 | 0 | pu1_pred, |
11110 | 0 | pred_strd, |
11111 | 0 | pi8_cost[0], |
11112 | 0 | !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS |
11113 | 0 | : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) * |
11114 | 0 | (double)ALPHA_FOR_ZERO_CODING_DECISIONS) / |
11115 | 0 | 100.0, |
11116 | 0 | trans_size, |
11117 | 0 | 0, |
11118 | 0 | ps_ctxt->u1_enable_psyRDOPT, |
11119 | 0 | e_chroma_plane); |
11120 | 0 | } |
11121 | 0 | #endif |
11122 | |
|
11123 | 0 | #if ENABLE_INTER_ZCU_COST |
11124 | 0 | if(!intra_flag) |
11125 | 0 | { |
11126 | | #if !WEIGH_CHROMA_COST |
11127 | | /* cbf = 0, accumulate cu not coded cost */ |
11128 | | ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0]; |
11129 | | #else |
11130 | | /* cbf = 0, accumulate cu not coded cost */ |
11131 | |
|
11132 | 0 | ps_ctxt->i8_cu_not_coded_cost += (LWORD64)( |
11133 | 0 | (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor + |
11134 | 0 | (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >> |
11135 | 0 | CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT); |
11136 | 0 | #endif |
11137 | 0 | } |
11138 | 0 | #endif |
11139 | 0 | } |
11140 | 0 | #endif /* RDOPT_ZERO_CBF_ENABLE */ |
11141 | |
|
11142 | 0 | return (cbf); |
11143 | 0 | } |