/src/libhevc/encoder/ihevce_recur_bracketing.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*! |
22 | | ****************************************************************************** |
23 | | * \file ihevce_recur_bracketing.c |
24 | | * |
25 | | * \brief |
26 | | * This file contains interface functions of recursive bracketing |
27 | | * module |
28 | | * \date |
29 | | * 12/02/2012 |
30 | | * |
31 | | * \author |
32 | | * Ittiam |
33 | | * |
34 | | * List of Functions |
35 | | * |
36 | | * |
37 | | ****************************************************************************** |
38 | | */ |
39 | | |
40 | | /*****************************************************************************/ |
41 | | /* File Includes */ |
42 | | /*****************************************************************************/ |
43 | | /* System include files */ |
44 | | #include <stdio.h> |
45 | | #include <string.h> |
46 | | #include <stdlib.h> |
47 | | #include <assert.h> |
48 | | #include <stdarg.h> |
49 | | #include <math.h> |
50 | | |
51 | | /* User include files */ |
52 | | #include "ihevc_typedefs.h" |
53 | | #include "itt_video_api.h" |
54 | | #include "ihevce_api.h" |
55 | | |
56 | | #include "rc_cntrl_param.h" |
57 | | #include "rc_frame_info_collector.h" |
58 | | #include "rc_look_ahead_params.h" |
59 | | |
60 | | #include "ihevc_defs.h" |
61 | | #include "ihevc_structs.h" |
62 | | #include "ihevc_platform_macros.h" |
63 | | #include "ihevc_deblk.h" |
64 | | #include "ihevc_itrans_recon.h" |
65 | | #include "ihevc_chroma_itrans_recon.h" |
66 | | #include "ihevc_chroma_intra_pred.h" |
67 | | #include "ihevc_intra_pred.h" |
68 | | #include "ihevc_inter_pred.h" |
69 | | #include "ihevc_mem_fns.h" |
70 | | #include "ihevc_padding.h" |
71 | | #include "ihevc_weighted_pred.h" |
72 | | #include "ihevc_sao.h" |
73 | | #include "ihevc_resi_trans.h" |
74 | | #include "ihevc_quant_iquant_ssd.h" |
75 | | #include "ihevc_cabac_tables.h" |
76 | | |
77 | | #include "ihevce_defs.h" |
78 | | #include "ihevce_lap_enc_structs.h" |
79 | | #include "ihevce_multi_thrd_structs.h" |
80 | | #include "ihevce_me_common_defs.h" |
81 | | #include "ihevce_had_satd.h" |
82 | | #include "ihevce_error_codes.h" |
83 | | #include "ihevce_bitstream.h" |
84 | | #include "ihevce_cabac.h" |
85 | | #include "ihevce_rdoq_macros.h" |
86 | | #include "ihevce_function_selector.h" |
87 | | #include "ihevce_enc_structs.h" |
88 | | #include "ihevce_entropy_structs.h" |
89 | | #include "ihevce_cmn_utils_instr_set_router.h" |
90 | | #include "ihevce_enc_loop_structs.h" |
91 | | #include "ihevce_ipe_instr_set_router.h" |
92 | | #include "ihevce_ipe_structs.h" |
93 | | #include "ihevce_ipe_pass.h" |
94 | | #include "ihevce_recur_bracketing.h" |
95 | | #include "ihevce_nbr_avail.h" |
96 | | #include "ihevc_common_tables.h" |
97 | | #include "ihevce_decomp_pre_intra_structs.h" |
98 | | #include "ihevce_decomp_pre_intra_pass.h" |
99 | | |
100 | | #include "cast_types.h" |
101 | | #include "osal.h" |
102 | | #include "osal_defaults.h" |
103 | | |
104 | | /*****************************************************************************/ |
105 | | /* Constant Macros */ |
106 | | /*****************************************************************************/ |
107 | | #define IP_DBG_L1_l2 0 |
108 | 1.49M | #define CHILD_BIAS 12 |
109 | | |
110 | | /*****************************************************************************/ |
111 | | /* Globals */ |
112 | | /*****************************************************************************/ |
113 | | extern pf_intra_pred g_apf_lum_ip[10]; |
114 | | |
115 | | extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES]; |
116 | | |
117 | | UWORD8 gau1_cu_pos_x[64] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, |
118 | | 6, 7, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, |
119 | | 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7 }; |
120 | | |
121 | | UWORD8 gau1_cu_pos_y[64] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 0, 0, 1, 1, 0, 0, |
122 | | 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, |
123 | | 6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7 }; |
124 | | |
125 | | #define RESET_BIT(x, bit) (x = x & ~((WORD32)1 << bit)) |
126 | | |
127 | | /*****************************************************************************/ |
128 | | /* Function Definitions */ |
129 | | /*****************************************************************************/ |
130 | | |
131 | | /*! |
132 | | ****************************************************************************** |
133 | | * \if Function name : ihevce_update_cand_list \endif |
134 | | * |
135 | | * \brief |
136 | | * Final Candidate list population, nbr flag andd nbr mode update function |
137 | | * |
138 | | * \param[in] ps_row_cu : pointer to cu analyse struct |
139 | | * \param[in] ps_cu_node : pointer to cu node info buffer |
140 | | * \param[in] ps_ed_blk_l1 : pointer to level 1 and 2 decision buffer |
141 | | * \param[in] pu1_cand_mode_list : pointer to candidate list buffer |
142 | | * |
143 | | * \return |
144 | | * None |
145 | | * |
146 | | * \author |
147 | | * Ittiam |
148 | | * |
149 | | ***************************************************************************** |
150 | | */ |
151 | | void ihevce_update_cand_list( |
152 | | ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt) |
153 | 1.77M | { |
154 | 1.77M | WORD32 row, col, x, y, size; |
155 | | |
156 | | /* Candidate mode Update */ |
157 | 1.77M | (void)ps_ed_blk_l1; |
158 | | /* Update CTB mode map for the finalised CU */ |
159 | 1.77M | x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; |
160 | 1.77M | y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; |
161 | 1.77M | size = ps_cu_node->u1_cu_size >> 2; |
162 | 9.90M | for(row = y; row < (y + size); row++) |
163 | 8.13M | { |
164 | 55.5M | for(col = x; col < (x + size); col++) |
165 | 47.4M | { |
166 | 47.4M | ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->best_mode; |
167 | 47.4M | } |
168 | 8.13M | } |
169 | 1.77M | return; |
170 | 1.77M | } |
171 | | |
172 | | /*! |
173 | | ****************************************************************************** |
174 | | * \if Function name : ihevce_intra_populate_mode_bits_cost_bracketing \endif |
175 | | * |
176 | | * \brief |
177 | | * Mpm indx calc function based on left and top available modes |
178 | | * |
179 | | * \param[in] top_intra_mode : Top available intra mode |
180 | | * \param[in] left_intra_mode : Left available intra mode |
181 | | * \param[in] available_top : Top availability flag |
182 | | * \param[in] available_left : Left availability flag |
183 | | * \param[in] cu_pos_y : cu position wrt to CTB |
184 | | * \param[in] mode_bits_cost : pointer to mode bits buffer |
185 | | * \param[in] lambda : Lambda value (SAD/SATD) |
186 | | * \param[in] cand_mode_list : pointer to candidate list buffer |
187 | | * |
188 | | * \return |
189 | | * None |
190 | | * |
191 | | * \author |
192 | | * Ittiam |
193 | | * |
194 | | ***************************************************************************** |
195 | | */ |
196 | | void ihevce_intra_populate_mode_bits_cost_bracketing( |
197 | | WORD32 top_intra_mode, |
198 | | WORD32 left_intra_mode, |
199 | | WORD32 available_top, |
200 | | WORD32 available_left, |
201 | | WORD32 cu_pos_y, |
202 | | UWORD16 *mode_bits_cost, |
203 | | UWORD16 *mode_bits, |
204 | | WORD32 lambda, |
205 | | WORD32 *cand_mode_list) |
206 | 9.36M | { |
207 | | /* local variables */ |
208 | 9.36M | WORD32 i; |
209 | 9.36M | WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top; |
210 | | |
211 | 9.36M | UWORD16 one_bits_cost = |
212 | 9.36M | COMPUTE_RATE_COST_CLIP30(4, lambda, (LAMBDA_Q_SHIFT + 1)); //1.5 * lambda |
213 | 9.36M | UWORD16 two_bits_cost = |
214 | 9.36M | COMPUTE_RATE_COST_CLIP30(6, lambda, (LAMBDA_Q_SHIFT + 1)); //2.5 * lambda |
215 | 9.36M | UWORD16 five_bits_cost = |
216 | 9.36M | COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)); //5.5 * lambda |
217 | | |
218 | 337M | for(i = 0; i < 35; i++) |
219 | 327M | { |
220 | 327M | mode_bits_cost[i] = five_bits_cost; |
221 | 327M | mode_bits[i] = 5; |
222 | 327M | } |
223 | | |
224 | | /* EIID: set availability flag to zero if modes are invalid. |
225 | | Required since some CU's might be skipped (though available) |
226 | | and their modes will be set to 255 (-1)*/ |
227 | 9.36M | if(35 < top_intra_mode || 0 > top_intra_mode) |
228 | 0 | available_top = 0; |
229 | 9.36M | if(35 < left_intra_mode || 0 > left_intra_mode) |
230 | 0 | available_left = 0; |
231 | | |
232 | | /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
233 | | /* N = top */ |
234 | 9.36M | if(0 == available_top) |
235 | 1.26M | { |
236 | 1.26M | cand_intra_pred_mode_top = INTRA_DC; |
237 | 1.26M | } |
238 | | /* for neighbour != INTRA, setting DC is done outside */ |
239 | 8.10M | else if(0 == cu_pos_y) /* It's on the CTB boundary */ |
240 | 1.09M | { |
241 | 1.09M | cand_intra_pred_mode_top = INTRA_DC; |
242 | 1.09M | } |
243 | 7.00M | else |
244 | 7.00M | { |
245 | 7.00M | cand_intra_pred_mode_top = top_intra_mode; |
246 | 7.00M | } |
247 | | |
248 | | /* N = left */ |
249 | 9.36M | if(0 == available_left) |
250 | 1.11M | { |
251 | 1.11M | cand_intra_pred_mode_left = INTRA_DC; |
252 | | //cand_intra_pred_mode_left = cand_intra_pred_mode_top; |
253 | 1.11M | } |
254 | | /* for neighbour != INTRA, setting DC is done outside */ |
255 | 8.24M | else |
256 | 8.24M | { |
257 | 8.24M | cand_intra_pred_mode_left = left_intra_mode; |
258 | 8.24M | } |
259 | | |
260 | | /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */ |
261 | 9.36M | if(cand_intra_pred_mode_left == cand_intra_pred_mode_top) |
262 | 4.66M | { |
263 | 4.66M | if(cand_intra_pred_mode_left < 2) |
264 | 3.66M | { |
265 | 3.66M | cand_mode_list[0] = INTRA_PLANAR; |
266 | 3.66M | cand_mode_list[1] = INTRA_DC; |
267 | 3.66M | cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */ |
268 | 3.66M | } |
269 | 998k | else |
270 | 998k | { |
271 | 998k | cand_mode_list[0] = cand_intra_pred_mode_left; |
272 | 998k | cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32); |
273 | 998k | cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32); |
274 | 998k | } |
275 | 4.66M | } |
276 | 4.70M | else |
277 | 4.70M | { |
278 | 4.70M | if(0 == available_left) |
279 | 786k | { |
280 | 786k | cand_mode_list[0] = cand_intra_pred_mode_top; |
281 | 786k | cand_mode_list[1] = cand_intra_pred_mode_left; |
282 | 786k | } |
283 | 3.91M | else |
284 | 3.91M | { |
285 | 3.91M | cand_mode_list[0] = cand_intra_pred_mode_left; |
286 | 3.91M | cand_mode_list[1] = cand_intra_pred_mode_top; |
287 | 3.91M | } |
288 | 4.70M | if((cand_intra_pred_mode_left != INTRA_PLANAR) && |
289 | 3.05M | (cand_intra_pred_mode_top != INTRA_PLANAR)) |
290 | 2.16M | { |
291 | 2.16M | cand_mode_list[2] = INTRA_PLANAR; |
292 | 2.16M | } |
293 | 2.53M | else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC)) |
294 | 570k | { |
295 | 570k | cand_mode_list[2] = INTRA_DC; |
296 | 570k | } |
297 | 1.96M | else |
298 | 1.96M | { |
299 | 1.96M | cand_mode_list[2] = INTRA_ANGULAR(26); |
300 | 1.96M | } |
301 | 4.70M | } |
302 | 9.36M | mode_bits_cost[cand_mode_list[0]] = one_bits_cost; |
303 | 9.36M | mode_bits_cost[cand_mode_list[1]] = two_bits_cost; |
304 | 9.36M | mode_bits_cost[cand_mode_list[2]] = two_bits_cost; |
305 | | |
306 | 9.36M | mode_bits[cand_mode_list[0]] = 2; |
307 | 9.36M | mode_bits[cand_mode_list[1]] = 3; |
308 | 9.36M | mode_bits[cand_mode_list[2]] = 3; |
309 | 9.36M | } |
310 | | |
311 | | /*! |
312 | | ****************************************************************************** |
313 | | * \if Function name : ihevce_pu_calc_4x4_blk \endif |
314 | | * |
315 | | * \brief |
316 | | * 4x4 pu (8x8 CU) mode decision using step 8421 method |
317 | | * |
318 | | * \param[in] ps_cu_node : pointer to cu node info buffer |
319 | | * \param[in] pu1_src : pointer to src pixels |
320 | | * \param[in] src_stride : frm source stride |
321 | | * \param[in] ref : pointer to reference pixels for prediction |
322 | | * \param[in] cand_mode_list : pointer to candidate list buffer |
323 | | * \param[in] best_costs_4x4 : pointer to 3 best cost buffer |
324 | | * \param[in] best_modes_4x4 : pointer to 3 best mode buffer |
325 | | * |
326 | | * \return |
327 | | * None |
328 | | * |
329 | | * \author |
330 | | * Ittiam |
331 | | * |
332 | | ***************************************************************************** |
333 | | */ |
334 | | void ihevce_pu_calc_4x4_blk( |
335 | | ihevce_ipe_ctxt_t *ps_ctxt, |
336 | | ihevce_ipe_cu_tree_t *ps_cu_node, |
337 | | UWORD8 *pu1_src, |
338 | | WORD32 src_stride, |
339 | | UWORD8 *ref, |
340 | | UWORD16 *mode_bits_cost, |
341 | | WORD32 *best_costs_4x4, |
342 | | UWORD8 *best_modes_4x4, |
343 | | func_selector_t *ps_func_selector) |
344 | 2.31M | { |
345 | 2.31M | WORD16 *pi2_trans_tmp = ps_ctxt->pi2_trans_tmp; |
346 | 2.31M | WORD16 *pi2_trans_out = ps_ctxt->pi2_trans_out; |
347 | 2.31M | UWORD8 u1_use_satd = ps_ctxt->u1_use_satd; |
348 | 2.31M | UWORD8 u1_level_1_refine_on = ps_ctxt->u1_level_1_refine_on; |
349 | | |
350 | 2.31M | WORD32 i, j = 0, i_end; |
351 | 2.31M | UWORD8 mode, best_amode = 255; |
352 | 2.31M | UWORD8 pred[16]; |
353 | | |
354 | 2.31M | UWORD16 sad; |
355 | 2.31M | WORD32 sad_cost = 0; |
356 | 2.31M | WORD32 best_asad_cost = 0xFFFFF; |
357 | 2.31M | WORD32 temp; |
358 | 2.31M | UWORD8 modes_to_eval[5]; |
359 | 2.31M | WORD32 costs_4x4[5]; |
360 | 2.31M | UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 }; |
361 | | |
362 | | /* LO resolution hence low resolution disable */ |
363 | 2.31M | WORD32 u1_low_resol = 0; |
364 | 2.31M | UWORD8 au1_best_modes[1] = { 0 }; |
365 | 2.31M | WORD32 ai4_best_sad_costs[1] = { 0 }; |
366 | | |
367 | 2.31M | WORD16 *pi2_tmp = &pi2_trans_tmp[0]; |
368 | | |
369 | 2.31M | ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list = |
370 | 2.31M | &ps_ctxt->s_ipe_optimised_function_list; |
371 | | |
372 | | //apf_resd_trns[0] = &ihevc_resi_trans_4x4_ttype1; |
373 | | //apf_resd_trns[0] = &ihevc_HAD_4x4_8bit; |
374 | | |
375 | 13.8M | for(i = 0; i < 5; i++) |
376 | 11.5M | { |
377 | 11.5M | costs_4x4[i] = MAX_INTRA_COST_IPE; |
378 | 11.5M | } |
379 | | |
380 | 2.31M | ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes( |
381 | 2.31M | pu1_src, |
382 | 2.31M | src_stride, |
383 | 2.31M | ref, |
384 | 2.31M | mode_bits_cost, |
385 | 2.31M | au1_best_modes, |
386 | 2.31M | ai4_best_sad_costs, |
387 | 2.31M | u1_low_resol, |
388 | 2.31M | ps_ipe_optimised_function_list->pf_4x4_sad_computer); |
389 | | |
390 | 2.31M | best_amode = au1_best_modes[0]; |
391 | 2.31M | best_asad_cost = ai4_best_sad_costs[0]; |
392 | | |
393 | 2.31M | ASSERT(best_amode != 255); |
394 | | /* Around best level 4 angular mode, search for best level 2 mode */ |
395 | 2.31M | modes_to_eval[0] = best_amode - 2; |
396 | 2.31M | modes_to_eval[1] = best_amode + 2; |
397 | 2.31M | i = 0; |
398 | 2.31M | i_end = 2; |
399 | 2.31M | if(best_amode == 2) |
400 | 170k | i = 1; |
401 | 2.14M | else if(best_amode == 34) |
402 | 78.1k | i_end = 1; |
403 | 6.69M | for(; i < i_end; i++) |
404 | 4.38M | { |
405 | 4.38M | mode = modes_to_eval[i]; |
406 | | |
407 | 4.38M | g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
408 | | |
409 | 4.38M | sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4); |
410 | | |
411 | 4.38M | sad_cost = sad; |
412 | 4.38M | sad_cost += mode_bits_cost[mode]; |
413 | | |
414 | 4.38M | if(sad_cost < best_asad_cost) |
415 | 297k | { |
416 | 297k | best_amode = mode; |
417 | 297k | best_asad_cost = sad_cost; |
418 | 297k | } |
419 | 4.38M | } |
420 | | |
421 | | /* Around best level 2 angular mode, search for best level 1 mode */ |
422 | | /* Also evaluate for non-angular mode */ |
423 | | |
424 | 2.31M | i = 0; |
425 | | /*Level 1 refinement is disabled for ES preset */ |
426 | 2.31M | if(1 == u1_level_1_refine_on) |
427 | 2.31M | { |
428 | 2.31M | if(best_amode != 2) |
429 | 2.15M | modes_to_eval[i++] = best_amode - 1; |
430 | 2.31M | modes_to_eval[i++] = best_amode; |
431 | 2.31M | } |
432 | | |
433 | 2.31M | modes_to_eval[i++] = 0; |
434 | 2.31M | modes_to_eval[i++] = 1; |
435 | | |
436 | 2.31M | if(1 == u1_level_1_refine_on) |
437 | 2.31M | { |
438 | 2.31M | if(best_amode != 34) |
439 | 2.24M | modes_to_eval[i++] = best_amode + 1; |
440 | 2.31M | } |
441 | 2.31M | i_end = i; |
442 | 2.31M | i = 0; |
443 | | |
444 | 13.6M | for(; i < i_end; i++) |
445 | 11.3M | { |
446 | 11.3M | mode = modes_to_eval[i]; |
447 | | |
448 | 11.3M | g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
449 | | |
450 | | /* Hard coding to use SATD */ |
451 | 11.3M | if(u1_use_satd) |
452 | 8.17M | { |
453 | 8.17M | ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr( |
454 | 8.17M | pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, 4, NULL_PLANE); |
455 | | |
456 | 8.17M | sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4); |
457 | 8.17M | } |
458 | 3.16M | else |
459 | 3.16M | { |
460 | 3.16M | sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer( |
461 | 3.16M | pu1_src, &pred[0], src_stride, 4); |
462 | 3.16M | } |
463 | 11.3M | sad_cost = sad; |
464 | 11.3M | sad_cost += mode_bits_cost[mode]; |
465 | | |
466 | 11.3M | costs_4x4[i] = sad_cost; |
467 | 11.3M | } |
468 | | |
469 | | /* Arrange the reference array in ascending order */ |
470 | 11.3M | for(i = 0; i < (i_end - 1); i++) |
471 | 9.03M | { |
472 | 31.2M | for(j = i + 1; j < i_end; j++) |
473 | 22.2M | { |
474 | 22.2M | if(costs_4x4[i] > costs_4x4[j]) |
475 | 7.21M | { |
476 | 7.21M | temp = costs_4x4[i]; |
477 | 7.21M | costs_4x4[i] = costs_4x4[j]; |
478 | 7.21M | costs_4x4[j] = temp; |
479 | | |
480 | 7.21M | temp = modes_4x4[i]; |
481 | 7.21M | modes_4x4[i] = modes_4x4[j]; |
482 | 7.21M | modes_4x4[j] = temp; |
483 | 7.21M | } |
484 | 22.2M | } |
485 | 9.03M | } |
486 | 9.26M | for(i = 0; i < 3; i++) |
487 | 6.94M | { |
488 | 6.94M | best_costs_4x4[i] = costs_4x4[i]; |
489 | 6.94M | best_modes_4x4[i] = modes_to_eval[modes_4x4[i]]; |
490 | 6.94M | } |
491 | | |
492 | 2.31M | { |
493 | 2.31M | ps_cu_node->best_mode = best_modes_4x4[0]; |
494 | 2.31M | ps_cu_node->best_cost = best_costs_4x4[0]; |
495 | 2.31M | ps_cu_node->best_satd = best_costs_4x4[0] - mode_bits_cost[ps_cu_node->best_mode]; |
496 | 2.31M | } |
497 | 2.31M | } |
498 | | |
499 | | /*! |
500 | | ****************************************************************************** |
501 | | * \if Function name : ihevce_pu_calc_8x8_blk \endif |
502 | | * |
503 | | * \brief |
504 | | * 4x4 pu (8x8 CU) mode decision loop using step 8421 method |
505 | | * |
506 | | * \param[in] ps_curr_src : pointer to src pixels struct |
507 | | * \param[in] ps_ctxt : pointer to IPE context struct |
508 | | * \param[in] ps_cu_node : pointer to cu node info buffer |
509 | | * |
510 | | * \return |
511 | | * None |
512 | | * |
513 | | * \author |
514 | | * Ittiam |
515 | | * |
516 | | ***************************************************************************** |
517 | | */ |
518 | | void ihevce_pu_calc_8x8_blk( |
519 | | iv_enc_yuv_buf_t *ps_curr_src, |
520 | | ihevce_ipe_ctxt_t *ps_ctxt, |
521 | | ihevce_ipe_cu_tree_t *ps_cu_node, |
522 | | func_selector_t *ps_func_selector) |
523 | 578k | { |
524 | 578k | WORD32 i, j; |
525 | 578k | WORD32 nbr_flags; |
526 | 578k | nbr_avail_flags_t s_nbr; |
527 | 578k | WORD32 trans_size = ps_cu_node->ps_parent->u1_cu_size >> 1; |
528 | | |
529 | 578k | UWORD8 *pu1_src_4x4; |
530 | 578k | WORD32 xA, xB, yA, yB; |
531 | | //WORD32 x, y, size; |
532 | 578k | WORD32 top_intra_mode; |
533 | 578k | WORD32 left_intra_mode; |
534 | | // WORD8 *top_intra_mode_ptr; |
535 | | // WORD8 *left_intra_mode_ptr; |
536 | 578k | UWORD8 *pu1_orig; |
537 | 578k | WORD32 src_strd = ps_curr_src->i4_y_strd; |
538 | | |
539 | 578k | WORD32 cu_pos_x = ps_cu_node->ps_parent->u2_x0 << 1; |
540 | 578k | WORD32 cu_pos_y = ps_cu_node->ps_parent->u2_y0 << 1; |
541 | 578k | ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; |
542 | | |
543 | 578k | ihevc_intra_pred_luma_ref_substitution_fptr = |
544 | 578k | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
545 | | |
546 | 578k | pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + |
547 | 578k | ((ps_cu_node->ps_parent->u2_y0 << 3) * src_strd) + |
548 | 578k | (ps_cu_node->ps_parent->u2_x0 << 3); |
549 | 1.73M | for(i = 0; i < 2; i++) |
550 | 1.15M | { |
551 | 3.47M | for(j = 0; j < 2; j++) |
552 | 2.31M | { |
553 | 2.31M | WORD32 cand_mode_list[3]; |
554 | 2.31M | pu1_src_4x4 = pu1_orig + (i * trans_size * src_strd) + (j * trans_size); |
555 | | /* get the neighbour availability flags */ |
556 | 2.31M | nbr_flags = ihevce_get_nbr_intra( |
557 | 2.31M | &s_nbr, |
558 | 2.31M | ps_ctxt->pu1_ctb_nbr_map, |
559 | 2.31M | ps_ctxt->i4_nbr_map_strd, |
560 | 2.31M | cu_pos_x + ((j) * (trans_size >> 2)), |
561 | 2.31M | cu_pos_y + ((i) * (trans_size >> 2)), |
562 | 2.31M | trans_size >> 2); |
563 | | |
564 | | /* call the function which populates sad cost for all the modes */ |
565 | 2.31M | xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + j; |
566 | 2.31M | yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i; |
567 | 2.31M | xB = xA + 1; |
568 | 2.31M | yB = yA - 1; |
569 | 2.31M | left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA]; |
570 | 2.31M | top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB]; |
571 | | |
572 | 2.31M | ihevce_intra_populate_mode_bits_cost_bracketing( |
573 | 2.31M | top_intra_mode, |
574 | 2.31M | left_intra_mode, |
575 | 2.31M | s_nbr.u1_top_avail, |
576 | 2.31M | s_nbr.u1_left_avail, |
577 | 2.31M | ps_cu_node->ps_parent->u2_y0, |
578 | 2.31M | &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0], |
579 | 2.31M | &ps_ctxt->au2_mode_bits_8x8_pu[0], |
580 | 2.31M | ps_ctxt->i4_ol_sad_lambda, |
581 | 2.31M | cand_mode_list); |
582 | | |
583 | | /* call the function which populates ref data for intra predicion */ |
584 | 2.31M | ihevc_intra_pred_luma_ref_substitution_fptr( |
585 | 2.31M | pu1_src_4x4 - src_strd - 1, |
586 | 2.31M | pu1_src_4x4 - src_strd, |
587 | 2.31M | pu1_src_4x4 - 1, |
588 | 2.31M | src_strd, |
589 | 2.31M | 4, |
590 | 2.31M | nbr_flags, |
591 | 2.31M | &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0], |
592 | 2.31M | 0); |
593 | | |
594 | 2.31M | ihevce_pu_calc_4x4_blk( |
595 | 2.31M | ps_ctxt, |
596 | 2.31M | ps_cu_node->ps_sub_cu[(i * 2) + j], |
597 | 2.31M | pu1_src_4x4, |
598 | 2.31M | src_strd, |
599 | 2.31M | &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0], |
600 | 2.31M | &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0], |
601 | 2.31M | &ps_cu_node->ps_sub_cu[(i * 2) + j]->au4_best_cost_1tu[0], |
602 | 2.31M | &ps_cu_node->ps_sub_cu[(i * 2) + j]->au1_best_mode_1tu[0], |
603 | 2.31M | ps_func_selector); |
604 | | |
605 | | /*&au4_cost_4x4[i*2 + j][0], |
606 | | &au1_modes_4x4[i*2 + j][0]);*/ //TTODO : mode will change for the four partition |
607 | | |
608 | 2.31M | ihevce_set_nbr_map( |
609 | 2.31M | ps_ctxt->pu1_ctb_nbr_map, |
610 | 2.31M | ps_ctxt->i4_nbr_map_strd, |
611 | 2.31M | cu_pos_x + ((j) * (trans_size >> 2)), |
612 | 2.31M | cu_pos_y + ((i) * (trans_size >> 2)), |
613 | 2.31M | (trans_size >> 2), |
614 | 2.31M | 1); |
615 | | |
616 | 2.31M | xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + 1 + j; |
617 | 2.31M | yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i; |
618 | 2.31M | ps_ctxt->au1_ctb_mode_map[yA][xA] = ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode; |
619 | 2.31M | ps_cu_node->ps_sub_cu[i * 2 + j]->u2_mode_bits_cost = |
620 | 2.31M | ps_ctxt->au2_mode_bits_8x8_pu[ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode]; |
621 | 2.31M | } |
622 | 1.15M | } |
623 | 578k | } |
624 | | |
625 | | /*! |
626 | | ****************************************************************************** |
627 | | * \if Function name : ihevce_bracketing_analysis \endif |
628 | | * |
629 | | * \brief |
630 | | * Interface function that evaluates MAX cu and MAX - 1 cu, with MAX cu size |
631 | | * info decided coarse resolution mode decision. Compares the SATD/SAD cost btwn |
632 | | * 2 CUS and determines the actual CU size and best 3 modes to be given to rdopt |
633 | | * |
634 | | * \param[in] ps_ctxt : pointer to IPE context struct |
635 | | * \param[in] ps_cu_node : pointer to cu node info buffer |
636 | | * \param[in] ps_curr_src : pointer to src pixels struct |
637 | | * \param[in] ps_ctb_out : pointer to ip ctb out struct |
638 | | * \param[in] ps_row_cu : pointer to cu analyse struct |
639 | | * \param[in] ps_ed_l1_ctb : pointer to level 1 early deci struct |
640 | | * \param[in] ps_ed_l2_ctb : pointer to level 2 early deci struct |
641 | | * \param[in] ps_l0_ipe_out_ctb : pointer to ipe_l0_ctb_analyse_for_me_t struct |
642 | | * |
643 | | * \return |
644 | | * None |
645 | | * |
646 | | * \author |
647 | | * Ittiam |
648 | | * |
649 | | ***************************************************************************** |
650 | | */ |
651 | | void ihevce_bracketing_analysis( |
652 | | ihevce_ipe_ctxt_t *ps_ctxt, |
653 | | ihevce_ipe_cu_tree_t *ps_cu_node, |
654 | | iv_enc_yuv_buf_t *ps_curr_src, |
655 | | ctb_analyse_t *ps_ctb_out, |
656 | | //cu_analyse_t *ps_row_cu, |
657 | | ihevce_ed_blk_t *ps_ed_l1_ctb, |
658 | | ihevce_ed_blk_t *ps_ed_l2_ctb, |
659 | | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, |
660 | | ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb) |
661 | 203k | { |
662 | 203k | WORD32 cu_pos_x = 0; |
663 | 203k | WORD32 cu_pos_y = 0; |
664 | | |
665 | 203k | UWORD8 u1_curr_ctb_wdt = ps_cu_node->u1_width; |
666 | 203k | UWORD8 u1_curr_ctb_hgt = ps_cu_node->u1_height; |
667 | 203k | WORD32 num_8x8_blks_x = (u1_curr_ctb_wdt >> 3); |
668 | 203k | WORD32 num_8x8_blks_y = (u1_curr_ctb_hgt >> 3); |
669 | | |
670 | 203k | ihevce_ed_blk_t *ps_ed_blk_l1 = ps_ed_l1_ctb; |
671 | 203k | ihevce_ed_blk_t *ps_ed_blk_l2 = ps_ed_l2_ctb; |
672 | | |
673 | 203k | WORD32 i; |
674 | 203k | WORD32 cand_mode_list[3]; |
675 | | //cu_analyse_t *ps_curr_cu = ps_row_cu; |
676 | 203k | WORD32 blk_cnt = 0; |
677 | 203k | WORD32 j = 0; |
678 | 203k | WORD32 merge_32x32_l1, merge_32x32_l2; |
679 | | |
680 | 203k | WORD32 i4_skip_intra_eval_32x32_l1; |
681 | | //EIID: flag indicating number of 16x16 blocks to be skipped for intra evaluation within 32x32 block |
682 | | |
683 | 203k | WORD32 parent_cost = 0; |
684 | 203k | WORD32 child_cost[4] = { 0 }; |
685 | 203k | WORD32 child_cost_least = 0; |
686 | 203k | WORD32 child_satd[4] = { 0 }; |
687 | 203k | WORD32 x, y, size; |
688 | 203k | WORD32 merge_64x64 = 1; |
689 | 203k | UWORD8 au1_best_32x32_modes[4]; |
690 | 203k | WORD32 au4_best_32x32_cost[4]; |
691 | 203k | WORD32 parent_best_mode; |
692 | 203k | UWORD8 best_mode; |
693 | | |
694 | 203k | WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset; |
695 | | /* flag to control 1CU-4TU modes based on quality preset */ |
696 | | /* if set 1CU-4TU are explicity evaluated else 1CU-1TU modes are copied */ |
697 | 203k | WORD32 i4_enable_1cu_4tu = (i4_quality_preset == IHEVCE_QUALITY_P2) || |
698 | 176k | (i4_quality_preset == IHEVCE_QUALITY_P0); |
699 | | |
700 | | /* flag to control 4CU-16TU mode based on quality preset */ |
701 | | /* if set 4CU-16TU are explicity evaluated else 4CU-4TU modes are copied*/ |
702 | 203k | WORD32 i4_enable_4cu_16tu = (i4_quality_preset == IHEVCE_QUALITY_P2) || |
703 | 176k | (i4_quality_preset == IHEVCE_QUALITY_P0); |
704 | | |
705 | 203k | WORD32 i4_mod_factor_num, i4_mod_factor_den = QP_MOD_FACTOR_DEN; //2; |
706 | 203k | float f_strength; |
707 | | /* Accumalte satd */ |
708 | 203k | LWORD64 i8_frame_acc_satd_cost = 0, i8_frame_acc_satd_by_modqp_q10 = 0; |
709 | 203k | WORD32 i4_ctb_acc_satd = 0; |
710 | | |
711 | | /* Accumalate Mode bits cost */ |
712 | 203k | LWORD64 i8_frame_acc_mode_bits_cost = 0; |
713 | | |
714 | | /* Step2 is bypassed for parent, uses children modes*/ |
715 | 203k | WORD32 step2_bypass = 1; |
716 | | |
717 | 203k | if(1 == ps_ctxt->u1_disable_child_cu_decide) |
718 | 0 | step2_bypass = 0; |
719 | | |
720 | 203k | ps_cu_node->ps_parent = ps_ctxt->ps_ipe_cu_tree; |
721 | 1.01M | for(i = 0; i < 4; i++) |
722 | 813k | { |
723 | 813k | ps_cu_node->ps_sub_cu[i] = ps_ctxt->ps_ipe_cu_tree + 1 + i; |
724 | 813k | } |
725 | | |
726 | | /* Loop for all 8x8 block in a CTB */ |
727 | 203k | ps_ctb_out->u4_cu_split_flags = 0x1; |
728 | | |
729 | | /* Initialize intra 64x64, 32x32 and 16x16 costs to max value */ |
730 | 1.01M | for(i = 0; i < (MAX_CU_IN_CTB >> 4); i++) |
731 | 813k | { |
732 | 813k | ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i] = MAX_INTRA_COST_IPE; |
733 | 813k | } |
734 | | |
735 | 3.45M | for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++) |
736 | 3.25M | { |
737 | 3.25M | ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[i] = MAX_INTRA_COST_IPE; |
738 | 3.25M | } |
739 | | |
740 | 13.2M | for(i = 0; i < (MAX_CU_IN_CTB); i++) |
741 | 13.0M | { |
742 | 13.0M | ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[i] = MAX_INTRA_COST_IPE; |
743 | 13.0M | } |
744 | | |
745 | 203k | ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = MAX_INTRA_COST_IPE; |
746 | | |
747 | | /* by default 64x64 modes are set to default values DC and Planar */ |
748 | 203k | ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = 0; |
749 | 203k | ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = 1; |
750 | 203k | ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = 255; |
751 | | |
752 | | /* by default 64x4 split is set to 1 */ |
753 | 203k | ps_l0_ipe_out_ctb->u1_split_flag = 1; |
754 | | |
755 | | /* Modulation factor calculated based on spatial variance instead of hardcoded val*/ |
756 | 203k | i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[1]; //16; |
757 | | |
758 | 203k | f_strength = ps_ctxt->f_strength; |
759 | | |
760 | | /* ------------------------------------------------ */ |
761 | | /* populate the early decisions done by L1 analysis */ |
762 | | /* ------------------------------------------------ */ |
763 | 3.45M | for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++) |
764 | 3.25M | { |
765 | 3.25M | ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_ipe[i] = ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i]; |
766 | 3.25M | ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[i] = ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i]; |
767 | 3.25M | ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_me[i] = ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i]; |
768 | 3.25M | ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_me[i] = ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i]; |
769 | 3.25M | } |
770 | | |
771 | | /* Init CTB level accumalated SATD and MPM bits */ |
772 | 203k | ps_l0_ipe_out_ctb->i4_ctb_acc_satd = 0; |
773 | 203k | ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = 0; |
774 | | |
775 | | /* ------------------------------------------------ */ |
776 | | /* Loop over all the blocks in current CTB */ |
777 | | /* ------------------------------------------------ */ |
778 | 203k | { |
779 | | /* 64 8x8 blocks should be encountered for the do,while loop to exit */ |
780 | 203k | do |
781 | 2.05M | { |
782 | 2.05M | intra32_analyse_t *ps_intra32_analyse; |
783 | 2.05M | intra16_analyse_t *ps_intra16_analyse; |
784 | 2.05M | WORD32 *pi4_intra_32_cost; |
785 | 2.05M | WORD32 *pi4_intra_16_cost; |
786 | 2.05M | WORD32 *pi4_intra_8_cost; |
787 | 2.05M | WORD32 merge_16x16_l1; |
788 | | |
789 | | /* Given the blk_cnt, get the CU's top-left 8x8 block's x and y positions within the CTB */ |
790 | 2.05M | cu_pos_x = gau1_cu_pos_x[blk_cnt]; |
791 | 2.05M | cu_pos_y = gau1_cu_pos_y[blk_cnt]; |
792 | | |
793 | | /* default value for 32x32 best mode - blk_cnt increases by 16 for each 32x32 */ |
794 | 2.05M | au1_best_32x32_modes[blk_cnt >> 4] = 255; |
795 | | |
796 | | /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */ |
797 | | /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */ |
798 | 2.05M | ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[blk_cnt >> 4]; |
799 | | |
800 | | /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/ |
801 | | /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */ |
802 | 2.05M | ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[(blk_cnt & 0xF) >> 2]; |
803 | | |
804 | | /* Line below assumes min_cu_size of 8 - checks whether CU starts are within picture */ |
805 | 2.05M | if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) |
806 | 1.30M | { |
807 | | /* Reset to zero for every cu decision */ |
808 | 1.30M | merge_32x32_l1 = 0; |
809 | | |
810 | 1.30M | child_cost_least = 0; |
811 | | |
812 | | /* At L2, each 4x4 corresponds to 16x16 at L0. Every 4 16x16 stores a merge_success flag */ |
813 | 1.30M | ps_ed_blk_l2 = ps_ed_l2_ctb + (blk_cnt >> 2); |
814 | | |
815 | 1.30M | pi4_intra_32_cost = &ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[blk_cnt >> 4]; |
816 | | |
817 | | /* by default 32x32 modes are set to default values DC and Planar */ |
818 | 1.30M | ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 0; |
819 | 1.30M | ps_intra32_analyse->au1_best_modes_32x32_tu[1] = 1; |
820 | 1.30M | ps_intra32_analyse->au1_best_modes_32x32_tu[2] = 255; |
821 | | |
822 | | /* By default 32x32 split is set to 1 */ |
823 | 1.30M | ps_intra32_analyse->b1_split_flag = 1; |
824 | | |
825 | 1.30M | ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 0; |
826 | 1.30M | ps_intra32_analyse->au1_best_modes_16x16_tu[1] = 1; |
827 | 1.30M | ps_intra32_analyse->au1_best_modes_16x16_tu[2] = 255; |
828 | | |
829 | | /* 16x16 cost & 8x8 cost are stored in Raster scan order */ |
830 | | /* stride of 16x16 buffer is MAX_CU_IN_CTB_ROW >> 1 */ |
831 | | /* stride of 8x8 buffer is MAX_CU_IN_CTB_ROW */ |
832 | 1.30M | { |
833 | 1.30M | WORD32 pos_x_8x8, pos_y_8x8; |
834 | | |
835 | 1.30M | pos_x_8x8 = gau1_cu_pos_x[blk_cnt]; |
836 | 1.30M | pos_y_8x8 = gau1_cu_pos_y[blk_cnt]; |
837 | | |
838 | 1.30M | pi4_intra_16_cost = &ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[0]; |
839 | | |
840 | 1.30M | pi4_intra_16_cost += |
841 | 1.30M | ((pos_x_8x8 >> 1) + ((pos_y_8x8 >> 1) * (MAX_CU_IN_CTB_ROW >> 1))); |
842 | | |
843 | 1.30M | pi4_intra_8_cost = &ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[0]; |
844 | | |
845 | 1.30M | pi4_intra_8_cost += (pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)); |
846 | 1.30M | } |
847 | | |
848 | 1.30M | merge_32x32_l1 = 0; |
849 | 1.30M | merge_32x32_l2 = 0; |
850 | 1.30M | i4_skip_intra_eval_32x32_l1 = 0; |
851 | | |
852 | | /* Enable 16x16 merge iff sufficient 8x8 blocks remain in the current CTB */ |
853 | 1.30M | merge_16x16_l1 = 0; |
854 | 1.30M | if(((num_8x8_blks_x - cu_pos_x) >= 2) && ((num_8x8_blks_y - cu_pos_y) >= 2)) |
855 | 1.23M | { |
856 | 1.23M | #if !ENABLE_UNIFORM_CU_SIZE_8x8 |
857 | 1.23M | merge_16x16_l1 = ps_ed_blk_l1->merge_success; |
858 | | #else |
859 | | merge_16x16_l1 = 0; |
860 | | #endif |
861 | 1.23M | } |
862 | | |
863 | | /* Enable 32x32 merge iff sufficient 8x8 blocks remain in the current CTB */ |
864 | 1.30M | if(((num_8x8_blks_x - cu_pos_x) >= 4) && ((num_8x8_blks_y - cu_pos_y) >= 4)) |
865 | 997k | { |
866 | | /* Check 4 flags of L1(8x8) say merge */ |
867 | 4.98M | for(i = 0; i < 4; i++) |
868 | 3.99M | { |
869 | 3.99M | merge_32x32_l1 += (ps_ed_blk_l1 + (i * 4))->merge_success; |
870 | | |
871 | | //EIDD: num 16x16 blocks for which inter_intra flag says eval only inter, i.e. skip intra eval |
872 | 3.99M | i4_skip_intra_eval_32x32_l1 += |
873 | 3.99M | ((ps_ed_blk_l1 + (i * 4))->intra_or_inter == 2) ? 1 : 0; |
874 | 3.99M | } |
875 | | |
876 | 997k | #if !ENABLE_UNIFORM_CU_SIZE_8x8 |
877 | | /* Check 1 flag from L2(16x16) say merge */ |
878 | 997k | merge_32x32_l2 = ps_ed_blk_l2->merge_success; |
879 | | #else |
880 | | merge_32x32_l1 = 0; |
881 | | merge_32x32_l2 = 0; |
882 | | #endif |
883 | 997k | } |
884 | | |
885 | 1.30M | #if DISABLE_L2_IPE_IN_PB_L1_IN_B |
886 | 1.30M | if((i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type != ISLICE)) |
887 | 219k | { |
888 | 219k | merge_32x32_l2 = 0; |
889 | 219k | ps_ed_blk_l2->merge_success = 0; |
890 | 219k | } |
891 | 1.30M | #endif |
892 | | |
893 | 1.30M | ps_intra32_analyse->b1_valid_cu = 1; |
894 | | |
895 | | /* If Merge success from all 4 L1 and L2, max CU size 32x32 is chosen */ |
896 | | /* EIID: if all blocks to be skipped then skip entire 32x32 for intra eval, |
897 | | if no blocks to be skipped then eval entire 32x32, |
898 | | else break the merge and go to 16x16 level eval */ |
899 | 1.30M | if((merge_32x32_l1 == 4) && merge_32x32_l2 && |
900 | 595k | ((i4_skip_intra_eval_32x32_l1 == 0) || |
901 | 40.3k | (i4_skip_intra_eval_32x32_l1 == 4)) //comment this line to disable break-merge |
902 | 1.30M | ) |
903 | 587k | { |
904 | | #if IP_DBG_L1_l2 |
905 | | /* Populate params for 32x32 block analysis */ |
906 | | ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; |
907 | | |
908 | | ps_cu_node->ps_parent->u1_cu_size = 32; |
909 | | ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
910 | | ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
911 | | ps_cu_node->ps_parent->best_mode = ps_ed_blk_l2->best_merge_mode; |
912 | | /* CU size 32x32 and fill the final cu params */ |
913 | | |
914 | | ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
915 | | |
916 | | /* Increment pointers */ |
917 | | ps_ed_blk_l1 += 16; |
918 | | blk_cnt += 16; |
919 | | ps_row_cu++; |
920 | | merge_64x64 &= 1; |
921 | | #else |
922 | | |
923 | | /* EIID: dont evaluate if all 4 blocks at L1 said inter is winning*/ |
924 | 587k | if(4 == i4_skip_intra_eval_32x32_l1 && (ps_ctxt->i4_slice_type != ISLICE)) |
925 | 32.4k | { |
926 | 32.4k | WORD32 i4_local_ctr1, i4_local_ctr2; |
927 | | |
928 | 32.4k | ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; |
929 | | |
930 | 32.4k | ps_cu_node->ps_parent->u1_cu_size = 32; |
931 | 32.4k | ps_cu_node->ps_parent->u2_x0 = |
932 | 32.4k | gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
933 | 32.4k | ps_cu_node->ps_parent->u2_y0 = |
934 | 32.4k | gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
935 | 32.4k | ps_cu_node->ps_parent->best_mode = |
936 | 32.4k | INTRA_DC; //ps_ed_blk_l2->best_merge_mode; |
937 | | /* CU size 32x32 and fill the final cu params */ |
938 | | |
939 | | /* fill in the first modes as invalid */ |
940 | 32.4k | ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; |
941 | 32.4k | ps_cu_node->ps_parent->au1_best_mode_1tu[1] = |
942 | 32.4k | INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 |
943 | 32.4k | ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; |
944 | | |
945 | 32.4k | ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; |
946 | 32.4k | ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; |
947 | 32.4k | ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; |
948 | | |
949 | 32.4k | ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
950 | | |
951 | | //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; |
952 | | //ps_row_cu->u1_num_intra_rdopt_cands = 0; |
953 | | |
954 | 32.4k | ps_intra32_analyse->b1_valid_cu = 0; |
955 | 32.4k | ps_intra32_analyse->b1_split_flag = 0; |
956 | 32.4k | ps_intra32_analyse->b1_merge_flag = 0; |
957 | | /*memset (&ps_intra32_analyse->au1_best_modes_32x32_tu, |
958 | | 255, |
959 | | NUM_BEST_MODES); |
960 | | memset (&ps_intra32_analyse->au1_best_modes_16x16_tu, |
961 | | 255, |
962 | | NUM_BEST_MODES);*/ |
963 | | //set only first mode since if it's 255. it wont go ahead |
964 | 32.4k | ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 255; |
965 | 32.4k | ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 255; |
966 | | |
967 | 32.4k | *pi4_intra_32_cost = MAX_INTRA_COST_IPE; |
968 | | |
969 | | /*since ME will start evaluating from bottom up, set the lower |
970 | | cu size data invalid */ |
971 | 162k | for(i4_local_ctr1 = 0; i4_local_ctr1 < 4; i4_local_ctr1++) |
972 | 129k | { |
973 | 129k | WORD32 *pi4_intra_8_cost_curr16; |
974 | | |
975 | 129k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
976 | 129k | .au1_best_modes_16x16_tu[0] = 255; |
977 | 129k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
978 | 129k | .au1_best_modes_8x8_tu[0] = 255; |
979 | 129k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_merge_flag = 0; |
980 | 129k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_valid_cu = 0; |
981 | 129k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_split_flag = 0; |
982 | | |
983 | 129k | pi4_intra_16_cost |
984 | 129k | [(i4_local_ctr1 & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * |
985 | 129k | (i4_local_ctr1 >> 1))] = MAX_INTRA_COST_IPE; |
986 | | |
987 | 129k | pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((i4_local_ctr1 & 1) << 1); |
988 | 129k | pi4_intra_8_cost_curr16 += |
989 | 129k | ((i4_local_ctr1 >> 1) << 1) * MAX_CU_IN_CTB_ROW; |
990 | | |
991 | 649k | for(i4_local_ctr2 = 0; i4_local_ctr2 < 4; i4_local_ctr2++) |
992 | 519k | { |
993 | 519k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
994 | 519k | .as_intra8_analyse[i4_local_ctr2] |
995 | 519k | .au1_4x4_best_modes[0][0] = 255; |
996 | 519k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
997 | 519k | .as_intra8_analyse[i4_local_ctr2] |
998 | 519k | .au1_4x4_best_modes[1][0] = 255; |
999 | 519k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
1000 | 519k | .as_intra8_analyse[i4_local_ctr2] |
1001 | 519k | .au1_4x4_best_modes[2][0] = 255; |
1002 | 519k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
1003 | 519k | .as_intra8_analyse[i4_local_ctr2] |
1004 | 519k | .au1_4x4_best_modes[3][0] = 255; |
1005 | 519k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
1006 | 519k | .as_intra8_analyse[i4_local_ctr2] |
1007 | 519k | .au1_best_modes_8x8_tu[0] = 255; |
1008 | 519k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
1009 | 519k | .as_intra8_analyse[i4_local_ctr2] |
1010 | 519k | .au1_best_modes_4x4_tu[0] = 255; |
1011 | 519k | ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1] |
1012 | 519k | .as_intra8_analyse[i4_local_ctr2] |
1013 | 519k | .b1_valid_cu = 0; |
1014 | | |
1015 | 519k | pi4_intra_8_cost_curr16 |
1016 | 519k | [(i4_local_ctr2 & 1) + |
1017 | 519k | (MAX_CU_IN_CTB_ROW * (i4_local_ctr2 >> 1))] = |
1018 | 519k | MAX_INTRA_COST_IPE; |
1019 | 519k | } |
1020 | 129k | } |
1021 | | |
1022 | | /* set neighbours even if intra is not evaluated, since source is always available. */ |
1023 | 32.4k | ihevce_set_nbr_map( |
1024 | 32.4k | ps_ctxt->pu1_ctb_nbr_map, |
1025 | 32.4k | ps_ctxt->i4_nbr_map_strd, |
1026 | 32.4k | ps_cu_node->ps_parent->u2_x0 << 1, |
1027 | 32.4k | ps_cu_node->ps_parent->u2_y0 << 1, |
1028 | 32.4k | (ps_cu_node->ps_parent->u1_cu_size >> 2), |
1029 | 32.4k | 1); |
1030 | | |
1031 | | /* cost accumalation of best cu size candiate */ |
1032 | | /*i8_frame_acc_satd_cost += parent_cost;*/ |
1033 | | |
1034 | | /* Mode bits cost accumalation for best cu size and cu mode */ |
1035 | | /*i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;*/ |
1036 | | |
1037 | | /*satd/mod_qp accumulation of best cu */ |
1038 | | /*i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3))/i4_q_scale_q3_mod;*/ |
1039 | | |
1040 | | /* Increment pointers */ |
1041 | 32.4k | ps_ed_blk_l1 += 16; |
1042 | 32.4k | blk_cnt += 16; |
1043 | | //ps_row_cu++; |
1044 | 32.4k | merge_64x64 = 0; |
1045 | | |
1046 | | /* increment for stat purpose only. Increment is valid only on single thread */ |
1047 | 32.4k | ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 4; |
1048 | 32.4k | } |
1049 | 555k | else |
1050 | 555k | { |
1051 | | /* Revaluation of 4 16x16 blocks at 8x8 prediction level */ |
1052 | | //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
1053 | | |
1054 | 555k | if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && |
1055 | 40.8k | (ps_ctxt->i4_slice_type == PSLICE)) |
1056 | 0 | { |
1057 | 0 | ps_ctxt->u1_disable_child_cu_decide = 1; |
1058 | 0 | step2_bypass = 0; |
1059 | 0 | } |
1060 | | |
1061 | | /* Based on the flag, Child modes decision can be disabled*/ |
1062 | 555k | if(0 == ps_ctxt->u1_disable_child_cu_decide) |
1063 | 555k | { |
1064 | 2.77M | for(j = 0; j < 4; j++) |
1065 | 2.22M | { |
1066 | 2.22M | ps_cu_node->ps_sub_cu[j]->u2_x0 = |
1067 | 2.22M | gau1_cu_pos_x[blk_cnt + (j * 4)]; /* Populate properly */ |
1068 | 2.22M | ps_cu_node->ps_sub_cu[j]->u2_y0 = |
1069 | 2.22M | gau1_cu_pos_y[blk_cnt + (j * 4)]; /* Populate properly */ |
1070 | 2.22M | ps_cu_node->ps_sub_cu[j]->u1_cu_size = 16; |
1071 | | |
1072 | 2.22M | { |
1073 | 2.22M | WORD32 best_ang_mode = |
1074 | 2.22M | (ps_ed_blk_l1 + (j * 4))->best_merge_mode; |
1075 | | |
1076 | 2.22M | if(best_ang_mode < 2) |
1077 | 2.14M | best_ang_mode = 26; |
1078 | | |
1079 | 2.22M | ihevce_mode_eval_filtering( |
1080 | 2.22M | ps_cu_node->ps_sub_cu[j], |
1081 | 2.22M | ps_cu_node, |
1082 | 2.22M | ps_ctxt, |
1083 | 2.22M | ps_curr_src, |
1084 | 2.22M | best_ang_mode, |
1085 | 2.22M | &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], |
1086 | 2.22M | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
1087 | 2.22M | !step2_bypass, |
1088 | 2.22M | 1); |
1089 | | |
1090 | 2.22M | if(i4_enable_4cu_16tu) |
1091 | 1.29M | { |
1092 | 1.29M | ihevce_mode_eval_filtering( |
1093 | 1.29M | ps_cu_node->ps_sub_cu[j], |
1094 | 1.29M | ps_cu_node, |
1095 | 1.29M | ps_ctxt, |
1096 | 1.29M | ps_curr_src, |
1097 | 1.29M | best_ang_mode, |
1098 | 1.29M | &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
1099 | 1.29M | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
1100 | 1.29M | !step2_bypass, |
1101 | 1.29M | 0); |
1102 | 1.29M | } |
1103 | 922k | else |
1104 | 922k | { |
1105 | | /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
1106 | 922k | memcpy( |
1107 | 922k | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
1108 | 922k | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
1109 | 922k | NUM_BEST_MODES); |
1110 | | |
1111 | | /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
1112 | 922k | memcpy( |
1113 | 922k | &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
1114 | 922k | &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], |
1115 | 922k | NUM_BEST_MODES * sizeof(WORD32)); |
1116 | 922k | } |
1117 | | |
1118 | 2.22M | child_cost[j] = |
1119 | 2.22M | MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
1120 | 2.22M | ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]); |
1121 | | |
1122 | | /* Child cost is sum of costs at 16x16 level */ |
1123 | 2.22M | child_cost_least += child_cost[j]; |
1124 | | |
1125 | | /* Select the best mode to be populated as top and left nbr depending on the |
1126 | | 4tu and 1tu cost */ |
1127 | 2.22M | if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] > |
1128 | 2.22M | ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]) |
1129 | 5.70k | { |
1130 | 5.70k | ps_cu_node->ps_sub_cu[j]->best_mode = |
1131 | 5.70k | ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; |
1132 | 5.70k | } |
1133 | 2.21M | else |
1134 | 2.21M | { |
1135 | 2.21M | ps_cu_node->ps_sub_cu[j]->best_mode = |
1136 | 2.21M | ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; |
1137 | 2.21M | } |
1138 | | |
1139 | 2.22M | { /* Update the CTB nodes only for MAX - 1 CU nodes */ |
1140 | 2.22M | WORD32 xA, yA, row, col; |
1141 | 2.22M | xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1; |
1142 | 2.22M | yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1; |
1143 | 2.22M | size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2; |
1144 | 11.1M | for(row = yA; row < (yA + size); row++) |
1145 | 8.88M | { |
1146 | 44.4M | for(col = xA; col < (xA + size); col++) |
1147 | 35.5M | { |
1148 | 35.5M | ps_ctxt->au1_ctb_mode_map[row][col] = |
1149 | 35.5M | ps_cu_node->ps_sub_cu[j]->best_mode; |
1150 | 35.5M | } |
1151 | 8.88M | } |
1152 | 2.22M | } |
1153 | 2.22M | } |
1154 | | |
1155 | | /*Child SATD cost*/ |
1156 | 2.22M | child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; |
1157 | | |
1158 | | /* store the child 16x16 costs */ |
1159 | 2.22M | pi4_intra_16_cost[(j & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (j >> 1))] = |
1160 | 2.22M | child_cost[j]; |
1161 | | |
1162 | | /* set the CU valid flag */ |
1163 | 2.22M | ps_intra16_analyse[j].b1_valid_cu = 1; |
1164 | | |
1165 | | /* All 16x16 merge is valid, if Cu 32x32 is chosen */ |
1166 | | /* To be reset, if CU 64x64 is chosen */ |
1167 | 2.22M | ps_intra16_analyse[j].b1_merge_flag = 1; |
1168 | | |
1169 | | /* storing the modes to intra 16 analyse */ |
1170 | | /* store the best 16x16 modes 8x8 tu */ |
1171 | 2.22M | memcpy( |
1172 | 2.22M | &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], |
1173 | 2.22M | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
1174 | 2.22M | sizeof(UWORD8) * (NUM_BEST_MODES)); |
1175 | 2.22M | ps_intra16_analyse[j].au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; |
1176 | | |
1177 | | /* store the best 16x16 modes 16x16 tu */ |
1178 | 2.22M | memcpy( |
1179 | 2.22M | &ps_intra16_analyse[j].au1_best_modes_16x16_tu[0], |
1180 | 2.22M | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
1181 | 2.22M | sizeof(UWORD8) * (NUM_BEST_MODES)); |
1182 | 2.22M | ps_intra16_analyse[j].au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; |
1183 | | |
1184 | | /* divide the 16x16 costs (pro rating) to 4 8x8 costs */ |
1185 | | /* store the same 16x16 modes as 4 8x8 child modes */ |
1186 | 2.22M | { |
1187 | 2.22M | WORD32 idx_8x8; |
1188 | 2.22M | WORD32 *pi4_intra_8_cost_curr16; |
1189 | 2.22M | intra8_analyse_t *ps_intra8_analyse; |
1190 | | |
1191 | 2.22M | pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((j & 1) << 1); |
1192 | 2.22M | pi4_intra_8_cost_curr16 += ((j >> 1) << 1) * MAX_CU_IN_CTB_ROW; |
1193 | | |
1194 | 11.1M | for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++) |
1195 | 8.88M | { |
1196 | 8.88M | pi4_intra_8_cost_curr16 |
1197 | 8.88M | [(idx_8x8 & 1) + (MAX_CU_IN_CTB_ROW * (idx_8x8 >> 1))] = |
1198 | 8.88M | (child_cost[j] + 3) >> 2; |
1199 | | |
1200 | 8.88M | ps_intra8_analyse = |
1201 | 8.88M | &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8]; |
1202 | | |
1203 | 8.88M | ps_intra8_analyse->b1_enable_nxn = 0; |
1204 | 8.88M | ps_intra8_analyse->b1_valid_cu = 1; |
1205 | | |
1206 | | /* store the best 8x8 modes 8x8 tu */ |
1207 | 8.88M | memcpy( |
1208 | 8.88M | &ps_intra8_analyse->au1_best_modes_8x8_tu[0], |
1209 | 8.88M | &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], |
1210 | 8.88M | sizeof(UWORD8) * (NUM_BEST_MODES + 1)); |
1211 | | |
1212 | | /* store the best 8x8 modes 4x4 tu */ |
1213 | 8.88M | memcpy( |
1214 | 8.88M | &ps_intra8_analyse->au1_best_modes_4x4_tu[0], |
1215 | 8.88M | &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0], |
1216 | 8.88M | sizeof(UWORD8) * (NUM_BEST_MODES + 1)); |
1217 | | |
1218 | | /* NXN modes not evaluated hence set to 0 */ |
1219 | 8.88M | memset( |
1220 | 8.88M | &ps_intra8_analyse->au1_4x4_best_modes[0][0], |
1221 | 8.88M | 255, |
1222 | 8.88M | sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); |
1223 | 8.88M | } |
1224 | 2.22M | } |
1225 | 2.22M | } |
1226 | | |
1227 | 555k | ihevce_set_nbr_map( |
1228 | 555k | ps_ctxt->pu1_ctb_nbr_map, |
1229 | 555k | ps_ctxt->i4_nbr_map_strd, |
1230 | 555k | ps_cu_node->ps_sub_cu[0]->u2_x0 << 1, |
1231 | 555k | ps_cu_node->ps_sub_cu[0]->u2_y0 << 1, |
1232 | 555k | (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1), |
1233 | 555k | 0); |
1234 | 555k | } |
1235 | 0 | #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1 |
1236 | 0 | else |
1237 | 0 | { |
1238 | 0 | for(j = 0; j < 4; j++) |
1239 | 0 | { |
1240 | 0 | WORD32 idx_8x8; |
1241 | 0 | intra8_analyse_t *ps_intra8_analyse; |
1242 | 0 | ps_intra16_analyse[j].au1_best_modes_8x8_tu[0] = 255; |
1243 | 0 | ps_intra16_analyse[j].au1_best_modes_16x16_tu[0] = 255; |
1244 | |
|
1245 | 0 | ps_intra16_analyse[j].b1_valid_cu = 0; |
1246 | |
|
1247 | 0 | for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++) |
1248 | 0 | { |
1249 | 0 | ps_intra8_analyse = |
1250 | 0 | &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8]; |
1251 | |
|
1252 | 0 | ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; |
1253 | 0 | ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; |
1254 | |
|
1255 | 0 | ps_intra8_analyse->b1_enable_nxn = 0; |
1256 | 0 | ps_intra8_analyse->b1_valid_cu = 0; |
1257 | | |
1258 | | /* NXN modes not evaluated hence set to 0 */ |
1259 | 0 | memset( |
1260 | 0 | &ps_intra8_analyse->au1_4x4_best_modes[0][0], |
1261 | 0 | 255, |
1262 | 0 | sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); |
1263 | 0 | } |
1264 | 0 | } |
1265 | |
|
1266 | 0 | child_cost_least = MAX_INTRA_COST_IPE; |
1267 | 0 | } |
1268 | 555k | #endif |
1269 | | |
1270 | | /* Populate params for 32x32 block analysis */ |
1271 | | |
1272 | 555k | ps_cu_node->ps_parent->u1_cu_size = 32; |
1273 | 555k | ps_cu_node->ps_parent->u2_x0 = |
1274 | 555k | gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
1275 | 555k | ps_cu_node->ps_parent->u2_y0 = |
1276 | 555k | gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
1277 | | |
1278 | | /* Revaluation for 32x32 parent block at 16x16 prediction level */ |
1279 | | //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
1280 | | |
1281 | 555k | { |
1282 | | /* Eval for TUSize = CuSize */ |
1283 | 555k | ihevce_mode_eval_filtering( |
1284 | 555k | ps_cu_node->ps_parent, |
1285 | 555k | ps_cu_node, |
1286 | 555k | ps_ctxt, |
1287 | 555k | ps_curr_src, |
1288 | 555k | 26, |
1289 | 555k | &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
1290 | 555k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
1291 | 555k | step2_bypass, |
1292 | 555k | 1); |
1293 | | |
1294 | 555k | if(i4_enable_1cu_4tu) |
1295 | 324k | { |
1296 | | /* Eval for TUSize = CuSize/2 */ |
1297 | 324k | ihevce_mode_eval_filtering( |
1298 | 324k | ps_cu_node->ps_parent, |
1299 | 324k | ps_cu_node, |
1300 | 324k | ps_ctxt, |
1301 | 324k | ps_curr_src, |
1302 | 324k | 26, |
1303 | 324k | &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
1304 | 324k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
1305 | 324k | step2_bypass, |
1306 | 324k | 0); |
1307 | 324k | } |
1308 | 230k | else |
1309 | 230k | { |
1310 | | /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
1311 | 230k | memcpy( |
1312 | 230k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
1313 | 230k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
1314 | 230k | NUM_BEST_MODES); |
1315 | | |
1316 | | /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
1317 | 230k | memcpy( |
1318 | 230k | &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
1319 | 230k | &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
1320 | 230k | NUM_BEST_MODES * sizeof(WORD32)); |
1321 | 230k | } |
1322 | 555k | } |
1323 | | |
1324 | 555k | ps_ctxt->u1_disable_child_cu_decide = 0; |
1325 | 555k | step2_bypass = 1; |
1326 | | |
1327 | | /* Update parent cost */ |
1328 | 555k | parent_cost = |
1329 | 555k | MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
1330 | 555k | ps_cu_node->ps_parent->au4_best_cost_1tu[0]); |
1331 | | |
1332 | | /* Select the best mode to be populated as top and left nbr depending on the |
1333 | | 4tu and 1tu cost */ |
1334 | 555k | if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > |
1335 | 555k | ps_cu_node->ps_parent->au4_best_cost_1tu[0]) |
1336 | 2.50k | { |
1337 | 2.50k | ps_cu_node->ps_parent->best_mode = |
1338 | 2.50k | ps_cu_node->ps_parent->au1_best_mode_1tu[0]; |
1339 | 2.50k | } |
1340 | 552k | else |
1341 | 552k | { |
1342 | 552k | ps_cu_node->ps_parent->best_mode = |
1343 | 552k | ps_cu_node->ps_parent->au1_best_mode_4tu[0]; |
1344 | 552k | } |
1345 | | |
1346 | | /* store the 32x32 cost */ |
1347 | 555k | *pi4_intra_32_cost = parent_cost; |
1348 | | |
1349 | | /* set the CU valid flag */ |
1350 | 555k | ps_intra32_analyse->b1_valid_cu = 1; |
1351 | | |
1352 | 555k | ps_intra32_analyse->b1_merge_flag = 1; |
1353 | | |
1354 | | /* storing the modes to intra 32 analyse */ |
1355 | 555k | { |
1356 | | /* store the best 32x32 modes 16x16 tu */ |
1357 | 555k | memcpy( |
1358 | 555k | &ps_intra32_analyse->au1_best_modes_16x16_tu[0], |
1359 | 555k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
1360 | 555k | sizeof(UWORD8) * (NUM_BEST_MODES)); |
1361 | 555k | ps_intra32_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; |
1362 | | |
1363 | | /* store the best 32x32 modes 32x32 tu */ |
1364 | 555k | memcpy( |
1365 | 555k | &ps_intra32_analyse->au1_best_modes_32x32_tu[0], |
1366 | 555k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
1367 | 555k | sizeof(UWORD8) * (NUM_BEST_MODES)); |
1368 | 555k | ps_intra32_analyse->au1_best_modes_32x32_tu[NUM_BEST_MODES] = 255; |
1369 | 555k | } |
1370 | 555k | parent_best_mode = ps_cu_node->ps_parent->best_mode; |
1371 | 555k | if((parent_cost <= |
1372 | 555k | child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> |
1373 | 555k | LAMBDA_Q_SHIFT))) //|| identical_modes) |
1374 | 504k | { |
1375 | 504k | WORD32 i4_q_scale_q3_mod; |
1376 | 504k | UWORD8 u1_cu_possible_qp; |
1377 | 504k | WORD32 i4_act_factor; |
1378 | | |
1379 | | /* CU size 32x32 and fill the final cu params */ |
1380 | | |
1381 | 504k | ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
1382 | | |
1383 | 504k | if((IHEVCE_QUALITY_P3 > i4_quality_preset)) |
1384 | 295k | { |
1385 | 1.47M | for(i = 0; i < 4; i++) |
1386 | 1.18M | { |
1387 | 1.18M | intra8_analyse_t *ps_intra8_analyse; |
1388 | 1.18M | ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; |
1389 | 5.91M | for(j = 0; j < 4; j++) |
1390 | 4.72M | { |
1391 | | /* Populate best 3 nxn modes */ |
1392 | 4.72M | ps_intra8_analyse->au1_4x4_best_modes[j][0] = |
1393 | 4.72M | ps_cu_node->ps_sub_cu[i]->au1_best_mode_4tu[0]; |
1394 | 4.72M | ps_intra8_analyse->au1_4x4_best_modes[j][1] = |
1395 | 4.72M | ps_cu_node->ps_sub_cu[i] |
1396 | 4.72M | ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode; |
1397 | 4.72M | ps_intra8_analyse->au1_4x4_best_modes[j][2] = |
1398 | 4.72M | ps_cu_node->ps_sub_cu[i] |
1399 | 4.72M | ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode; |
1400 | 4.72M | ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255; |
1401 | 4.72M | } |
1402 | 1.18M | } |
1403 | 295k | } |
1404 | | /* store the 32x32 non split flag */ |
1405 | 504k | ps_intra32_analyse->b1_split_flag = 0; |
1406 | 504k | ps_intra32_analyse->as_intra16_analyse[0].b1_split_flag = 0; |
1407 | 504k | ps_intra32_analyse->as_intra16_analyse[1].b1_split_flag = 0; |
1408 | 504k | ps_intra32_analyse->as_intra16_analyse[2].b1_split_flag = 0; |
1409 | 504k | ps_intra32_analyse->as_intra16_analyse[3].b1_split_flag = 0; |
1410 | | |
1411 | 504k | au1_best_32x32_modes[blk_cnt >> 4] = |
1412 | 504k | ps_cu_node->ps_parent->au1_best_mode_1tu[0]; |
1413 | | |
1414 | 504k | au4_best_32x32_cost[blk_cnt >> 4] = |
1415 | 504k | ps_cu_node->ps_parent->au4_best_cost_1tu[0]; |
1416 | | /*As 32*32 has won, pick L2 8x8 qp which maps |
1417 | | to L0 32x32 Qp*/ |
1418 | 504k | ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4)); |
1419 | 504k | ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2); |
1420 | 504k | u1_cu_possible_qp = ihevce_cu_level_qp_mod( |
1421 | 504k | ps_ctxt->i4_qscale, |
1422 | 504k | ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0], |
1423 | 504k | ps_ctxt->ld_curr_frame_16x16_log_avg[0], |
1424 | 504k | f_strength, |
1425 | 504k | &i4_act_factor, |
1426 | 504k | &i4_q_scale_q3_mod, |
1427 | 504k | ps_ctxt->ps_rc_quant_ctxt); |
1428 | | /* cost accumalation of best cu size candiate */ |
1429 | 504k | i8_frame_acc_satd_cost += parent_cost; |
1430 | | |
1431 | | /* satd and mpm bits accumalation of best cu size candiate */ |
1432 | 504k | i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; |
1433 | | |
1434 | | /* Mode bits cost accumalation for best cu size and cu mode */ |
1435 | 504k | i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; |
1436 | | |
1437 | | /*satd/mod_qp accumulation of best cu */ |
1438 | 504k | i8_frame_acc_satd_by_modqp_q10 += |
1439 | 504k | ((LWORD64)ps_cu_node->ps_parent->best_satd |
1440 | 504k | << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
1441 | 504k | i4_q_scale_q3_mod; |
1442 | | |
1443 | | /* Increment pointers */ |
1444 | 504k | ps_ed_blk_l1 += 16; |
1445 | 504k | blk_cnt += 16; |
1446 | | //ps_row_cu++; |
1447 | 504k | merge_64x64 &= 1; |
1448 | 504k | } |
1449 | 50.6k | else |
1450 | 50.6k | { |
1451 | | /* store the 32x32 split flag */ |
1452 | 50.6k | ps_intra32_analyse->b1_split_flag = 1; |
1453 | | |
1454 | | /* CU size 16x16 and fill the final cu params for all 4 blocks */ |
1455 | 253k | for(j = 0; j < 4; j++) |
1456 | 202k | { |
1457 | 202k | WORD32 i4_q_scale_q3_mod; |
1458 | 202k | UWORD8 u1_cu_possible_qp; |
1459 | 202k | WORD32 i4_act_factor; |
1460 | | |
1461 | | /* Set CU split flag */ |
1462 | 202k | ASSERT(blk_cnt % 4 == 0); |
1463 | | |
1464 | 202k | ihevce_update_cand_list( |
1465 | 202k | ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt); |
1466 | | |
1467 | | /* store the 16x16 non split flag */ |
1468 | 202k | ps_intra16_analyse[j].b1_split_flag = 0; |
1469 | | |
1470 | 202k | ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
1471 | 202k | ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2); |
1472 | | /*As 16*16 has won, pick L1 8x8 qp which maps |
1473 | | to L0 16x16 Qp*/ |
1474 | 202k | u1_cu_possible_qp = ihevce_cu_level_qp_mod( |
1475 | 202k | ps_ctxt->i4_qscale, |
1476 | 202k | ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0], |
1477 | 202k | ps_ctxt->ld_curr_frame_8x8_log_avg[0], |
1478 | 202k | f_strength, |
1479 | 202k | &i4_act_factor, |
1480 | 202k | &i4_q_scale_q3_mod, |
1481 | 202k | ps_ctxt->ps_rc_quant_ctxt); |
1482 | | |
1483 | | /*accum satd/qp for all child block*/ |
1484 | 202k | i8_frame_acc_satd_by_modqp_q10 += |
1485 | 202k | ((LWORD64)child_satd[j] |
1486 | 202k | << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
1487 | 202k | i4_q_scale_q3_mod; |
1488 | | |
1489 | | /* Accumalate mode bits for all child blocks */ |
1490 | 202k | i8_frame_acc_mode_bits_cost += |
1491 | 202k | ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; |
1492 | | |
1493 | | /* satd and mpm bits accumalation of best cu size candiate */ |
1494 | 202k | i4_ctb_acc_satd += child_satd[j]; |
1495 | | |
1496 | | /* Increment pointers */ |
1497 | | //ps_row_cu++; |
1498 | 202k | ps_ed_blk_l1 += 4; |
1499 | 202k | blk_cnt += 4; |
1500 | 202k | } |
1501 | | |
1502 | | /* cost accumalation of best cu size candiate */ |
1503 | 50.6k | i8_frame_acc_satd_cost += child_cost_least; |
1504 | | |
1505 | | /* 64x64 merge is not possible */ |
1506 | 50.6k | merge_64x64 = 0; |
1507 | 50.6k | } |
1508 | | |
1509 | | //ps_ed_blk_l2 += 4; |
1510 | | |
1511 | 555k | } //end of EIID's else |
1512 | 587k | #endif |
1513 | 587k | } |
1514 | | /* If Merge success for L1 max CU size 16x16 is chosen */ |
1515 | 714k | else if(merge_16x16_l1) |
1516 | 506k | { |
1517 | | #if IP_DBG_L1_l2 |
1518 | | ps_cu_node->ps_parent->u1_cu_size = 16; |
1519 | | ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
1520 | | ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
1521 | | ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_merge_mode; |
1522 | | ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
1523 | | |
1524 | | blk_cnt += 4; |
1525 | | ps_ed_blk_l1 += 4; |
1526 | | ps_row_cu++; |
1527 | | merge_64x64 = 0; |
1528 | | #else |
1529 | | |
1530 | | /*EIID: evaluate only if L1 early-inter-intra decision is not favouring inter*/ |
1531 | | /* enable this only in B pictures */ |
1532 | 506k | if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE)) |
1533 | 144k | { |
1534 | 144k | WORD32 i4_q_scale_q3_mod, i4_local_ctr; |
1535 | 144k | WORD8 i1_cu_possible_qp; |
1536 | 144k | WORD32 i4_act_factor; |
1537 | | /* make cost infinity. */ |
1538 | | /* make modes invalid */ |
1539 | | /* update loop variables */ |
1540 | | /* set other output variales */ |
1541 | | /* dont set neighbour flag so that next blocks wont access this cu */ |
1542 | | /* what happens to ctb_mode_map?? */ |
1543 | | |
1544 | 144k | ps_cu_node->ps_parent->u1_cu_size = 16; |
1545 | 144k | ps_cu_node->ps_parent->u2_x0 = |
1546 | 144k | gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
1547 | 144k | ps_cu_node->ps_parent->u2_y0 = |
1548 | 144k | gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
1549 | 144k | ps_cu_node->ps_parent->best_mode = |
1550 | 144k | INTRA_DC; //ps_ed_blk_l1->best_merge_mode; |
1551 | | |
1552 | | /* fill in the first modes as invalid */ |
1553 | | |
1554 | 144k | ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; |
1555 | 144k | ps_cu_node->ps_parent->au1_best_mode_1tu[1] = |
1556 | 144k | INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 |
1557 | 144k | ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; |
1558 | | |
1559 | 144k | ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; |
1560 | 144k | ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; |
1561 | 144k | ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; |
1562 | | |
1563 | 144k | ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
1564 | | |
1565 | | //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; |
1566 | | //ps_row_cu->u1_num_intra_rdopt_cands = 0; |
1567 | | |
1568 | 144k | ps_intra32_analyse->b1_split_flag = 1; |
1569 | 144k | ps_intra32_analyse->b1_merge_flag = 0; |
1570 | | |
1571 | 144k | ps_intra16_analyse->b1_valid_cu = 0; |
1572 | 144k | ps_intra16_analyse->b1_split_flag = 0; |
1573 | 144k | ps_intra16_analyse->b1_merge_flag = 1; |
1574 | | //memset (&ps_intra16_analyse->au1_best_modes_16x16_tu, |
1575 | | // 255, |
1576 | | // NUM_BEST_MODES); |
1577 | | //memset (&ps_intra16_analyse->au1_best_modes_8x8_tu, |
1578 | | // 255, |
1579 | | // NUM_BEST_MODES); |
1580 | | //set only first mode since if it's 255. it wont go ahead |
1581 | 144k | ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255; |
1582 | 144k | ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255; |
1583 | 144k | *pi4_intra_16_cost = MAX_INTRA_COST_IPE; |
1584 | | |
1585 | | /*since ME will start evaluating from bottom up, set the lower |
1586 | | cu size data invalid */ |
1587 | 723k | for(i4_local_ctr = 0; i4_local_ctr < 4; i4_local_ctr++) |
1588 | 578k | { |
1589 | 578k | ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
1590 | 578k | .au1_4x4_best_modes[0][0] = 255; |
1591 | 578k | ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
1592 | 578k | .au1_4x4_best_modes[1][0] = 255; |
1593 | 578k | ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
1594 | 578k | .au1_4x4_best_modes[2][0] = 255; |
1595 | 578k | ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
1596 | 578k | .au1_4x4_best_modes[3][0] = 255; |
1597 | 578k | ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
1598 | 578k | .au1_best_modes_8x8_tu[0] = 255; |
1599 | 578k | ps_intra16_analyse->as_intra8_analyse[i4_local_ctr] |
1600 | 578k | .au1_best_modes_4x4_tu[0] = 255; |
1601 | | |
1602 | 578k | pi4_intra_8_cost |
1603 | 578k | [(i4_local_ctr & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr >> 1))] = |
1604 | 578k | MAX_INTRA_COST_IPE; |
1605 | 578k | } |
1606 | | |
1607 | | /* set neighbours even if intra is not evaluated, since source is always available. */ |
1608 | 144k | ihevce_set_nbr_map( |
1609 | 144k | ps_ctxt->pu1_ctb_nbr_map, |
1610 | 144k | ps_ctxt->i4_nbr_map_strd, |
1611 | 144k | ps_cu_node->ps_parent->u2_x0 << 1, |
1612 | 144k | ps_cu_node->ps_parent->u2_y0 << 1, |
1613 | 144k | (ps_cu_node->ps_parent->u1_cu_size >> 2), |
1614 | 144k | 1); |
1615 | | |
1616 | | //what happends to RC variables?? |
1617 | | /* run only constant Qp */ |
1618 | 144k | ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
1619 | 144k | ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2); |
1620 | 144k | i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
1621 | 144k | ps_ctxt->i4_qscale, |
1622 | 144k | ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0], |
1623 | 144k | ps_ctxt->ld_curr_frame_8x8_log_avg[0], |
1624 | 144k | f_strength, |
1625 | 144k | &i4_act_factor, |
1626 | 144k | &i4_q_scale_q3_mod, |
1627 | 144k | ps_ctxt->ps_rc_quant_ctxt); |
1628 | | |
1629 | | /* cost accumalation of best cu size candiate */ |
1630 | 144k | i8_frame_acc_satd_cost += 0; //parent_cost; //incorrect accumulation |
1631 | | |
1632 | | /*satd/mod_qp accumulation of best cu */ |
1633 | 144k | i8_frame_acc_satd_by_modqp_q10 += 0; //incorrect accumulation |
1634 | | //((LWORD64)ps_cu_node->ps_parent->best_satd << SATD_BY_ACT_Q_FAC)/i4_q_scale_q3_mod; |
1635 | | |
1636 | | /* Accumalate mode bits for all child blocks */ |
1637 | 144k | i8_frame_acc_mode_bits_cost += |
1638 | 144k | 0; //ps_cu_node->ps_parent->u2_mode_bits_cost; |
1639 | | //incoorect accumulation |
1640 | | |
1641 | 144k | blk_cnt += 4; |
1642 | 144k | ps_ed_blk_l1 += 4; |
1643 | | //ps_row_cu++; |
1644 | 144k | merge_64x64 = 0; |
1645 | | |
1646 | | /* increment for stat purpose only. Increment is valid only on single thread */ |
1647 | 144k | ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 1; |
1648 | 144k | } |
1649 | 361k | else |
1650 | 361k | { |
1651 | | /* 64x64 merge is not possible */ |
1652 | 361k | merge_64x64 = 0; |
1653 | | |
1654 | | /* set the 32x32 split flag to 1 */ |
1655 | 361k | ps_intra32_analyse->b1_split_flag = 1; |
1656 | | |
1657 | 361k | ps_intra32_analyse->b1_merge_flag = 0; |
1658 | | |
1659 | 361k | ps_intra16_analyse->b1_merge_flag = 1; |
1660 | | |
1661 | 361k | if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && |
1662 | 151k | (ps_ctxt->i4_slice_type == PSLICE)) |
1663 | 142k | { |
1664 | 142k | ps_ctxt->u1_disable_child_cu_decide = 1; |
1665 | 142k | step2_bypass = 0; |
1666 | 142k | } |
1667 | | //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
1668 | | /* Based on the flag, Child modes decision can be disabled*/ |
1669 | 361k | if(0 == ps_ctxt->u1_disable_child_cu_decide) |
1670 | 218k | { |
1671 | 1.09M | for(j = 0; j < 4; j++) |
1672 | 874k | { |
1673 | 874k | intra8_analyse_t *ps_intra8_analyse; |
1674 | 874k | WORD32 best_ang_mode = (ps_ed_blk_l1 + j)->best_mode; |
1675 | | |
1676 | 874k | if(best_ang_mode < 2) |
1677 | 733k | best_ang_mode = 26; |
1678 | | |
1679 | | //ps_cu_node->ps_sub_cu[j]->best_cost = MAX_INTRA_COST_IPE; |
1680 | | //ps_cu_node->ps_sub_cu[j]->best_mode = (ps_ed_blk_l1 + j)->best_mode; |
1681 | | |
1682 | 874k | ps_cu_node->ps_sub_cu[j]->u2_x0 = |
1683 | 874k | gau1_cu_pos_x[blk_cnt + j]; /* Populate properly */ |
1684 | 874k | ps_cu_node->ps_sub_cu[j]->u2_y0 = |
1685 | 874k | gau1_cu_pos_y[blk_cnt + j]; /* Populate properly */ |
1686 | 874k | ps_cu_node->ps_sub_cu[j]->u1_cu_size = 8; |
1687 | | |
1688 | 874k | ihevce_mode_eval_filtering( |
1689 | 874k | ps_cu_node->ps_sub_cu[j], |
1690 | 874k | ps_cu_node, |
1691 | 874k | ps_ctxt, |
1692 | 874k | ps_curr_src, |
1693 | 874k | best_ang_mode, |
1694 | 874k | &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], |
1695 | 874k | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
1696 | 874k | !step2_bypass, |
1697 | 874k | 1); |
1698 | | |
1699 | 874k | if(i4_enable_4cu_16tu) |
1700 | 401k | { |
1701 | 401k | ihevce_mode_eval_filtering( |
1702 | 401k | ps_cu_node->ps_sub_cu[j], |
1703 | 401k | ps_cu_node, |
1704 | 401k | ps_ctxt, |
1705 | 401k | ps_curr_src, |
1706 | 401k | best_ang_mode, |
1707 | 401k | &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
1708 | 401k | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
1709 | 401k | !step2_bypass, |
1710 | 401k | 0); |
1711 | 401k | } |
1712 | 472k | else |
1713 | 472k | { |
1714 | | /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
1715 | 472k | memcpy( |
1716 | 472k | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
1717 | 472k | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
1718 | 472k | NUM_BEST_MODES); |
1719 | | |
1720 | | /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
1721 | 472k | memcpy( |
1722 | 472k | &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
1723 | 472k | &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0], |
1724 | 472k | NUM_BEST_MODES * sizeof(WORD32)); |
1725 | 472k | } |
1726 | | |
1727 | 874k | child_cost[j] = |
1728 | 874k | MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0], |
1729 | 874k | ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]); |
1730 | | |
1731 | 874k | child_cost_least += child_cost[j]; |
1732 | | |
1733 | | /* Select the best mode to be populated as top and left nbr depending on the |
1734 | | 4tu and 1tu cost */ |
1735 | 874k | if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] > |
1736 | 874k | ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]) |
1737 | 28.3k | { |
1738 | 28.3k | ps_cu_node->ps_sub_cu[j]->best_mode = |
1739 | 28.3k | ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; |
1740 | 28.3k | } |
1741 | 845k | else |
1742 | 845k | { |
1743 | 845k | ps_cu_node->ps_sub_cu[j]->best_mode = |
1744 | 845k | ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; |
1745 | 845k | } |
1746 | 874k | { /* Update the CTB nodes only for MAX - 1 CU nodes */ |
1747 | 874k | WORD32 xA, yA, row, col; |
1748 | 874k | xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1; |
1749 | 874k | yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1; |
1750 | 874k | size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2; |
1751 | 2.62M | for(row = yA; row < (yA + size); row++) |
1752 | 1.74M | { |
1753 | 5.24M | for(col = xA; col < (xA + size); col++) |
1754 | 3.49M | { |
1755 | 3.49M | ps_ctxt->au1_ctb_mode_map[row][col] = |
1756 | 3.49M | ps_cu_node->ps_sub_cu[j]->best_mode; |
1757 | 3.49M | } |
1758 | 1.74M | } |
1759 | 874k | } |
1760 | | |
1761 | | /*collect individual child satd for final SATD/qp accum*/ |
1762 | 874k | child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; |
1763 | | |
1764 | 874k | ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; |
1765 | | |
1766 | | /* store the child 8x8 costs */ |
1767 | 874k | pi4_intra_8_cost[(j & 1) + (MAX_CU_IN_CTB_ROW * (j >> 1))] = |
1768 | 874k | child_cost[j]; |
1769 | | |
1770 | | /* set the CU valid flag */ |
1771 | 874k | ps_intra8_analyse->b1_valid_cu = 1; |
1772 | 874k | ps_intra8_analyse->b1_enable_nxn = 0; |
1773 | | |
1774 | | /* storing the modes to intra8 analyse */ |
1775 | | |
1776 | | /* store the best 8x8 modes 8x8 tu */ |
1777 | 874k | memcpy( |
1778 | 874k | &ps_intra8_analyse->au1_best_modes_8x8_tu[0], |
1779 | 874k | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0], |
1780 | 874k | sizeof(UWORD8) * (NUM_BEST_MODES)); |
1781 | 874k | ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; |
1782 | | |
1783 | | /* store the best 8x8 modes 4x4 tu */ |
1784 | 874k | memcpy( |
1785 | 874k | &ps_intra8_analyse->au1_best_modes_4x4_tu[0], |
1786 | 874k | &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0], |
1787 | 874k | sizeof(UWORD8) * (NUM_BEST_MODES)); |
1788 | 874k | ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255; |
1789 | | |
1790 | | /* NXN modes not evaluated hence set to 255 */ |
1791 | 874k | memset( |
1792 | 874k | &ps_intra8_analyse->au1_4x4_best_modes[0][0], |
1793 | 874k | 255, |
1794 | 874k | sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); |
1795 | 874k | } |
1796 | | |
1797 | 218k | ihevce_set_nbr_map( |
1798 | 218k | ps_ctxt->pu1_ctb_nbr_map, |
1799 | 218k | ps_ctxt->i4_nbr_map_strd, |
1800 | 218k | ps_cu_node->ps_sub_cu[0]->u2_x0 << 1, |
1801 | 218k | ps_cu_node->ps_sub_cu[0]->u2_y0 << 1, |
1802 | 218k | (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1), |
1803 | 218k | 0); |
1804 | 218k | } |
1805 | 142k | #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1 |
1806 | 142k | else |
1807 | 142k | { |
1808 | 714k | for(j = 0; j < 4; j++) |
1809 | 571k | { |
1810 | 571k | intra8_analyse_t *ps_intra8_analyse; |
1811 | 571k | ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; |
1812 | 571k | ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; |
1813 | 571k | ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; |
1814 | | /* NXN modes not evaluated hence set to 255 */ |
1815 | 571k | memset( |
1816 | 571k | &ps_intra8_analyse->au1_4x4_best_modes[0][0], |
1817 | 571k | 255, |
1818 | 571k | sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1)); |
1819 | | |
1820 | 571k | ps_intra8_analyse->b1_valid_cu = 0; |
1821 | 571k | ps_intra8_analyse->b1_enable_nxn = 0; |
1822 | 571k | } |
1823 | 142k | child_cost_least = MAX_INTRA_COST_IPE; |
1824 | 142k | } |
1825 | 361k | #endif |
1826 | | //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; |
1827 | | //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; |
1828 | | |
1829 | 361k | ps_cu_node->ps_parent->u1_cu_size = 16; |
1830 | 361k | ps_cu_node->ps_parent->u2_x0 = |
1831 | 361k | gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
1832 | 361k | ps_cu_node->ps_parent->u2_y0 = |
1833 | 361k | gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
1834 | | |
1835 | | //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
1836 | | |
1837 | | /* Eval for TUSize = CuSize */ |
1838 | 361k | ihevce_mode_eval_filtering( |
1839 | 361k | ps_cu_node->ps_parent, |
1840 | 361k | ps_cu_node, |
1841 | 361k | ps_ctxt, |
1842 | 361k | ps_curr_src, |
1843 | 361k | 26, |
1844 | 361k | &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
1845 | 361k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
1846 | 361k | step2_bypass, |
1847 | 361k | 1); |
1848 | | |
1849 | 361k | if(i4_enable_1cu_4tu) |
1850 | 100k | { |
1851 | | /* Eval for TUSize = CuSize/2 */ |
1852 | 100k | ihevce_mode_eval_filtering( |
1853 | 100k | ps_cu_node->ps_parent, |
1854 | 100k | ps_cu_node, |
1855 | 100k | ps_ctxt, |
1856 | 100k | ps_curr_src, |
1857 | 100k | 26, |
1858 | 100k | &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
1859 | 100k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
1860 | 100k | step2_bypass, |
1861 | 100k | 0); |
1862 | 100k | } |
1863 | 260k | else |
1864 | 260k | { |
1865 | | /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
1866 | 260k | memcpy( |
1867 | 260k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
1868 | 260k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
1869 | 260k | NUM_BEST_MODES); |
1870 | | |
1871 | | /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
1872 | 260k | memcpy( |
1873 | 260k | &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
1874 | 260k | &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
1875 | 260k | NUM_BEST_MODES * sizeof(WORD32)); |
1876 | 260k | } |
1877 | | |
1878 | 361k | ps_ctxt->u1_disable_child_cu_decide = 0; |
1879 | 361k | step2_bypass = 1; |
1880 | | |
1881 | | /* Update parent cost */ |
1882 | 361k | parent_cost = |
1883 | 361k | MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
1884 | 361k | ps_cu_node->ps_parent->au4_best_cost_1tu[0]); |
1885 | | |
1886 | | /* Select the best mode to be populated as top and left nbr depending on the |
1887 | | 4tu and 1tu cost */ |
1888 | 361k | if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > |
1889 | 361k | ps_cu_node->ps_parent->au4_best_cost_1tu[0]) |
1890 | 8.45k | { |
1891 | 8.45k | ps_cu_node->ps_parent->best_mode = |
1892 | 8.45k | ps_cu_node->ps_parent->au1_best_mode_1tu[0]; |
1893 | 8.45k | } |
1894 | 352k | else |
1895 | 352k | { |
1896 | 352k | ps_cu_node->ps_parent->best_mode = |
1897 | 352k | ps_cu_node->ps_parent->au1_best_mode_4tu[0]; |
1898 | 352k | } |
1899 | | |
1900 | | /* store the 16x16 cost */ |
1901 | 361k | *pi4_intra_16_cost = parent_cost; |
1902 | | |
1903 | | /* accumulate the 32x32 cost */ |
1904 | 361k | if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) |
1905 | 88.5k | { |
1906 | 88.5k | *pi4_intra_32_cost = parent_cost; |
1907 | 88.5k | } |
1908 | 272k | else |
1909 | 272k | { |
1910 | 272k | *pi4_intra_32_cost += parent_cost; |
1911 | 272k | } |
1912 | | |
1913 | | /* set the CU valid flag */ |
1914 | 361k | ps_intra16_analyse->b1_valid_cu = 1; |
1915 | | |
1916 | | /* storing the modes to intra 16 analyse */ |
1917 | 361k | { |
1918 | | /* store the best 16x16 modes 16x16 tu */ |
1919 | 361k | memcpy( |
1920 | 361k | &ps_intra16_analyse->au1_best_modes_16x16_tu[0], |
1921 | 361k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
1922 | 361k | sizeof(UWORD8) * NUM_BEST_MODES); |
1923 | 361k | ps_intra16_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255; |
1924 | | |
1925 | | /* store the best 16x16 modes 8x8 tu */ |
1926 | 361k | memcpy( |
1927 | 361k | &ps_intra16_analyse->au1_best_modes_8x8_tu[0], |
1928 | 361k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
1929 | 361k | sizeof(UWORD8) * NUM_BEST_MODES); |
1930 | 361k | ps_intra16_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; |
1931 | 361k | } |
1932 | | |
1933 | 361k | parent_best_mode = ps_cu_node->ps_parent->best_mode; |
1934 | 361k | if(parent_cost <= |
1935 | 361k | child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> |
1936 | 361k | LAMBDA_Q_SHIFT)) //|| identical_modes) |
1937 | 330k | { |
1938 | 330k | WORD32 i4_q_scale_q3_mod; |
1939 | 330k | WORD8 i1_cu_possible_qp; |
1940 | 330k | WORD32 i4_act_factor; |
1941 | | //choose parent CU |
1942 | | |
1943 | 330k | ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
1944 | | |
1945 | | /* set the 16x16 non split flag */ |
1946 | 330k | ps_intra16_analyse->b1_split_flag = 0; |
1947 | | |
1948 | | /*As 16*16 has won, pick L1 8x8 qp which maps |
1949 | | to L0 16x16 Qp*/ |
1950 | 330k | ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4)); |
1951 | 330k | ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2); |
1952 | 330k | i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
1953 | 330k | ps_ctxt->i4_qscale, |
1954 | 330k | ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0], |
1955 | 330k | ps_ctxt->ld_curr_frame_8x8_log_avg[0], |
1956 | 330k | f_strength, |
1957 | 330k | &i4_act_factor, |
1958 | 330k | &i4_q_scale_q3_mod, |
1959 | 330k | ps_ctxt->ps_rc_quant_ctxt); |
1960 | | |
1961 | | /* cost accumalation of best cu size candiate */ |
1962 | 330k | i8_frame_acc_satd_cost += parent_cost; |
1963 | | |
1964 | | /* satd and mpm bits accumalation of best cu size candiate */ |
1965 | 330k | i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; |
1966 | | |
1967 | | /*satd/mod_qp accumulation of best cu */ |
1968 | 330k | i8_frame_acc_satd_by_modqp_q10 += |
1969 | 330k | ((LWORD64)ps_cu_node->ps_parent->best_satd |
1970 | 330k | << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
1971 | 330k | i4_q_scale_q3_mod; |
1972 | | |
1973 | | /* Accumalate mode bits for all child blocks */ |
1974 | 330k | i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost; |
1975 | | |
1976 | 330k | blk_cnt += 4; |
1977 | 330k | ps_ed_blk_l1 += 4; |
1978 | | //ps_row_cu++; |
1979 | 330k | } |
1980 | 30.7k | else |
1981 | 30.7k | { |
1982 | | //choose child CU |
1983 | 30.7k | WORD8 i1_cu_possible_qp; |
1984 | 30.7k | WORD32 i4_act_factor; |
1985 | 30.7k | WORD32 i4_q_scale_q3_mod; |
1986 | | |
1987 | 30.7k | ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
1988 | 30.7k | ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1] != -2); |
1989 | 30.7k | i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
1990 | 30.7k | ps_ctxt->i4_qscale, |
1991 | 30.7k | ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1], |
1992 | 30.7k | ps_ctxt->ld_curr_frame_8x8_log_avg[1], |
1993 | 30.7k | f_strength, |
1994 | 30.7k | &i4_act_factor, |
1995 | 30.7k | &i4_q_scale_q3_mod, |
1996 | 30.7k | ps_ctxt->ps_rc_quant_ctxt); |
1997 | | |
1998 | | /* set the 16x16 split flag */ |
1999 | 30.7k | ps_intra16_analyse->b1_split_flag = 1; |
2000 | | |
2001 | 153k | for(j = 0; j < 4; j++) |
2002 | 122k | { |
2003 | 122k | ihevce_update_cand_list( |
2004 | 122k | ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt); |
2005 | | |
2006 | 122k | if((IHEVCE_QUALITY_P3 > i4_quality_preset)) |
2007 | 53.7k | { |
2008 | 53.7k | WORD32 k; |
2009 | 53.7k | intra8_analyse_t *ps_intra8_analyse; |
2010 | 53.7k | ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j]; |
2011 | | |
2012 | 268k | for(k = 0; k < 4; k++) |
2013 | 214k | { |
2014 | | /* Populate best 3 nxn modes */ |
2015 | 214k | ps_intra8_analyse->au1_4x4_best_modes[k][0] = |
2016 | 214k | ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0]; |
2017 | 214k | ps_intra8_analyse->au1_4x4_best_modes[k][1] = |
2018 | 214k | ps_cu_node->ps_sub_cu[j] |
2019 | 214k | ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode; |
2020 | 214k | ps_intra8_analyse->au1_4x4_best_modes[k][2] = |
2021 | 214k | ps_cu_node->ps_sub_cu[j] |
2022 | 214k | ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode; |
2023 | 214k | ps_intra8_analyse->au1_4x4_best_modes[k][3] = 255; |
2024 | 214k | } |
2025 | 53.7k | } |
2026 | | /*accum satd/qp for all child block*/ |
2027 | 122k | i8_frame_acc_satd_by_modqp_q10 += |
2028 | 122k | ((LWORD64)child_satd[j] |
2029 | 122k | << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
2030 | 122k | i4_q_scale_q3_mod; |
2031 | | |
2032 | | /* Accumalate mode bits for all child blocks */ |
2033 | 122k | i8_frame_acc_mode_bits_cost += |
2034 | 122k | ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; |
2035 | | |
2036 | | /* satd and mpm bits accumalation of best cu size candiate */ |
2037 | 122k | i4_ctb_acc_satd += child_satd[j]; |
2038 | | |
2039 | 122k | blk_cnt += 1; |
2040 | 122k | ps_ed_blk_l1 += 1; |
2041 | | //ps_row_cu++; |
2042 | 122k | } |
2043 | | |
2044 | | /* cost accumalation of best cu size candiate */ |
2045 | 30.7k | i8_frame_acc_satd_cost += child_cost_least; |
2046 | 30.7k | } |
2047 | | |
2048 | 361k | } //else of EIID |
2049 | 506k | #endif |
2050 | 506k | } // if(merge_16x16_l1) |
2051 | | /* MAX CU SIZE 8x8 */ |
2052 | 208k | else |
2053 | 208k | { |
2054 | | #if IP_DBG_L1_l2 |
2055 | | for(i = 0; i < 4; i++) |
2056 | | { |
2057 | | ps_cu_node->ps_parent->u1_cu_size = 8; |
2058 | | ps_cu_node->ps_parent->u2_x0 = |
2059 | | gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
2060 | | ps_cu_node->ps_parent->u2_y0 = |
2061 | | gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
2062 | | ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; |
2063 | | |
2064 | | ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
2065 | | blk_cnt++; |
2066 | | ps_ed_blk_l1++; |
2067 | | ps_row_cu++; |
2068 | | merge_64x64 = 0; |
2069 | | } |
2070 | | #else |
2071 | | |
2072 | | /* EIID: Skip all 4 8x8 block if L1 decisions says skip intra */ |
2073 | 208k | if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE)) |
2074 | 34.1k | { |
2075 | 34.1k | WORD32 i4_q_scale_q3_mod; |
2076 | 34.1k | WORD8 i1_cu_possible_qp; |
2077 | 34.1k | WORD32 i4_act_factor; |
2078 | | |
2079 | 34.1k | merge_64x64 = 0; |
2080 | | |
2081 | 34.1k | ps_intra32_analyse->b1_merge_flag = 0; |
2082 | | |
2083 | 34.1k | ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255; |
2084 | 34.1k | ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 255; |
2085 | 34.1k | ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255; |
2086 | | |
2087 | 34.1k | ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255; |
2088 | 34.1k | ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 255; |
2089 | 34.1k | ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255; |
2090 | 34.1k | ps_intra16_analyse->b1_split_flag = 1; |
2091 | 34.1k | ps_intra16_analyse->b1_valid_cu = 0; |
2092 | 34.1k | ps_intra16_analyse->b1_merge_flag = 0; |
2093 | | |
2094 | 170k | for(i = 0; i < 4; i++) |
2095 | 136k | { |
2096 | 136k | intra8_analyse_t *ps_intra8_analyse; |
2097 | 136k | WORD32 ctr_sub_cu; |
2098 | | |
2099 | 136k | cu_pos_x = gau1_cu_pos_x[blk_cnt]; |
2100 | 136k | cu_pos_y = gau1_cu_pos_y[blk_cnt]; |
2101 | | |
2102 | 136k | if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) |
2103 | 121k | { |
2104 | 121k | ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; |
2105 | | |
2106 | 121k | ps_intra8_analyse->b1_valid_cu = 0; |
2107 | 121k | ps_intra8_analyse->b1_enable_nxn = 0; |
2108 | 121k | ps_intra8_analyse->au1_4x4_best_modes[0][0] = 255; |
2109 | 121k | ps_intra8_analyse->au1_4x4_best_modes[1][0] = 255; |
2110 | 121k | ps_intra8_analyse->au1_4x4_best_modes[2][0] = 255; |
2111 | 121k | ps_intra8_analyse->au1_4x4_best_modes[3][0] = 255; |
2112 | 121k | ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255; |
2113 | 121k | ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255; |
2114 | | |
2115 | 121k | ps_cu_node->ps_parent->u1_cu_size = 8; |
2116 | 121k | ps_cu_node->ps_parent->u2_x0 = |
2117 | 121k | gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
2118 | 121k | ps_cu_node->ps_parent->u2_y0 = |
2119 | 121k | gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
2120 | 121k | ps_cu_node->ps_parent->best_mode = |
2121 | 121k | INTRA_DC; //ps_ed_blk_l1->best_mode; |
2122 | | |
2123 | | /* fill in the first modes as invalid */ |
2124 | | |
2125 | 121k | ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC; |
2126 | 121k | ps_cu_node->ps_parent->au1_best_mode_1tu[1] = |
2127 | 121k | INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3 |
2128 | 121k | ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC; |
2129 | | |
2130 | 121k | ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC; |
2131 | 121k | ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC; |
2132 | 121k | ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC; |
2133 | | |
2134 | 121k | ihevce_update_cand_list( |
2135 | 121k | ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
2136 | | |
2137 | | //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0; |
2138 | | //ps_row_cu->u1_num_intra_rdopt_cands = 0; |
2139 | | |
2140 | 607k | for(ctr_sub_cu = 0; ctr_sub_cu < 4; ctr_sub_cu++) |
2141 | 486k | { |
2142 | 486k | ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_1tu[0] = |
2143 | 486k | INTRA_DC; |
2144 | 486k | ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_4tu[0] = |
2145 | 486k | INTRA_DC; |
2146 | 486k | ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_1tu[0] = |
2147 | 486k | MAX_INTRA_COST_IPE; |
2148 | | |
2149 | 486k | ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_4tu[0] = |
2150 | 486k | MAX_INTRA_COST_IPE; |
2151 | 486k | ps_cu_node->ps_sub_cu[ctr_sub_cu]->best_cost = |
2152 | 486k | MAX_INTRA_COST_IPE; |
2153 | 486k | } |
2154 | | |
2155 | 121k | pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = |
2156 | 121k | MAX_INTRA_COST_IPE; |
2157 | | |
2158 | 121k | ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
2159 | 121k | ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2); |
2160 | 121k | i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
2161 | 121k | ps_ctxt->i4_qscale, |
2162 | 121k | ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1], |
2163 | 121k | ps_ctxt->ld_curr_frame_8x8_log_avg[1], |
2164 | 121k | f_strength, |
2165 | 121k | &i4_act_factor, |
2166 | 121k | &i4_q_scale_q3_mod, |
2167 | 121k | ps_ctxt->ps_rc_quant_ctxt); |
2168 | | |
2169 | | /* set neighbours even if intra is not evaluated, since source is always available. */ |
2170 | 121k | ihevce_set_nbr_map( |
2171 | 121k | ps_ctxt->pu1_ctb_nbr_map, |
2172 | 121k | ps_ctxt->i4_nbr_map_strd, |
2173 | 121k | ps_cu_node->ps_parent->u2_x0 << 1, |
2174 | 121k | ps_cu_node->ps_parent->u2_y0 << 1, |
2175 | 121k | (ps_cu_node->ps_parent->u1_cu_size >> 2), |
2176 | 121k | 1); |
2177 | | |
2178 | | //ps_row_cu++; |
2179 | 121k | } |
2180 | 136k | blk_cnt++; |
2181 | 136k | ps_ed_blk_l1++; |
2182 | 136k | } |
2183 | 34.1k | } |
2184 | 174k | else |
2185 | 174k | { |
2186 | | //cu_intra_cand_t *ps_cu_intra_cand; |
2187 | 174k | WORD8 i1_cu_possible_qp; |
2188 | 174k | WORD32 i4_act_factor; |
2189 | 174k | WORD32 i4_q_scale_q3_mod; |
2190 | | |
2191 | 174k | ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2)); |
2192 | 174k | ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2); |
2193 | 174k | i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
2194 | 174k | ps_ctxt->i4_qscale, |
2195 | 174k | ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1], |
2196 | 174k | ps_ctxt->ld_curr_frame_8x8_log_avg[1], |
2197 | 174k | f_strength, |
2198 | 174k | &i4_act_factor, |
2199 | 174k | &i4_q_scale_q3_mod, |
2200 | 174k | ps_ctxt->ps_rc_quant_ctxt); |
2201 | | |
2202 | | /* 64x64 merge is not possible */ |
2203 | 174k | merge_64x64 = 0; |
2204 | | |
2205 | 174k | ps_intra32_analyse->b1_merge_flag = 0; |
2206 | | |
2207 | 174k | ps_intra16_analyse->b1_merge_flag = 0; |
2208 | | |
2209 | | /* by default 16x16 modes are set to default values DC and Planar */ |
2210 | 174k | ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 0; |
2211 | 174k | ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 1; |
2212 | 174k | ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255; |
2213 | | |
2214 | 174k | ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 0; |
2215 | 174k | ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 1; |
2216 | 174k | ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255; |
2217 | 174k | ps_intra16_analyse->b1_split_flag = 1; |
2218 | 174k | ps_intra16_analyse->b1_valid_cu = 1; |
2219 | | |
2220 | 871k | for(i = 0; i < 4; i++) |
2221 | 697k | { |
2222 | 697k | intra8_analyse_t *ps_intra8_analyse; |
2223 | 697k | cu_pos_x = gau1_cu_pos_x[blk_cnt]; |
2224 | 697k | cu_pos_y = gau1_cu_pos_y[blk_cnt]; |
2225 | 697k | if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y)) |
2226 | 578k | { |
2227 | | //ps_cu_intra_cand = &ps_row_cu->s_cu_intra_cand; |
2228 | | //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE; |
2229 | | |
2230 | | //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode; |
2231 | | |
2232 | 578k | child_cost_least = 0; |
2233 | | |
2234 | 578k | ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i]; |
2235 | 578k | ps_cu_node->ps_parent->u1_cu_size = 8; |
2236 | 578k | ps_cu_node->ps_parent->u2_x0 = |
2237 | 578k | gau1_cu_pos_x[blk_cnt]; /* Populate properly */ |
2238 | 578k | ps_cu_node->ps_parent->u2_y0 = |
2239 | 578k | gau1_cu_pos_y[blk_cnt]; /* Populate properly */ |
2240 | | |
2241 | | //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
2242 | | |
2243 | | /*EARLY DECISION 8x8 block */ |
2244 | 578k | ihevce_pu_calc_8x8_blk( |
2245 | 578k | ps_curr_src, ps_ctxt, ps_cu_node, ps_ctxt->ps_func_selector); |
2246 | 2.89M | for(j = 0; j < 4; j++) |
2247 | 2.31M | { |
2248 | 2.31M | child_cost_least += ps_cu_node->ps_sub_cu[j]->best_cost; |
2249 | 2.31M | child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd; |
2250 | 2.31M | } |
2251 | | |
2252 | | /* Based on the flag, CU = 4TU modes decision can be disabled, CU = 4PU is retained */ |
2253 | 578k | if(0 == ps_ctxt->u1_disable_child_cu_decide) |
2254 | 578k | { |
2255 | 578k | ihevce_set_nbr_map( |
2256 | 578k | ps_ctxt->pu1_ctb_nbr_map, |
2257 | 578k | ps_ctxt->i4_nbr_map_strd, |
2258 | 578k | ps_cu_node->ps_parent->u2_x0 << 1, |
2259 | 578k | ps_cu_node->ps_parent->u2_y0 << 1, |
2260 | 578k | (ps_cu_node->ps_parent->u1_cu_size >> 2), |
2261 | 578k | 0); |
2262 | | |
2263 | | //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map)); |
2264 | | |
2265 | | /* Eval for TUSize = CuSize */ |
2266 | 578k | ihevce_mode_eval_filtering( |
2267 | 578k | ps_cu_node->ps_parent, |
2268 | 578k | ps_cu_node, |
2269 | 578k | ps_ctxt, |
2270 | 578k | ps_curr_src, |
2271 | 578k | 26, |
2272 | 578k | &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
2273 | 578k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
2274 | 578k | step2_bypass, |
2275 | 578k | 1); |
2276 | | |
2277 | 578k | if(i4_enable_1cu_4tu) |
2278 | 314k | { |
2279 | | /* Eval for TUSize = CuSize/2 */ |
2280 | 314k | ihevce_mode_eval_filtering( |
2281 | 314k | ps_cu_node->ps_parent, |
2282 | 314k | ps_cu_node, |
2283 | 314k | ps_ctxt, |
2284 | 314k | ps_curr_src, |
2285 | 314k | 26, |
2286 | 314k | &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
2287 | 314k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
2288 | 314k | step2_bypass, |
2289 | 314k | 0); |
2290 | 314k | } |
2291 | 263k | else |
2292 | 263k | { |
2293 | | /* 4TU not evaluated : 4tu modes set same as 1tu modes */ |
2294 | 263k | memcpy( |
2295 | 263k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
2296 | 263k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
2297 | 263k | NUM_BEST_MODES); |
2298 | | |
2299 | | /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */ |
2300 | 263k | memcpy( |
2301 | 263k | &ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
2302 | 263k | &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
2303 | 263k | NUM_BEST_MODES * sizeof(WORD32)); |
2304 | 263k | } |
2305 | | |
2306 | | /* Update parent cost */ |
2307 | 578k | parent_cost = |
2308 | 578k | MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0], |
2309 | 578k | ps_cu_node->ps_parent->au4_best_cost_1tu[0]); |
2310 | | |
2311 | | /* Select the best mode to be populated as top and left nbr depending on the |
2312 | | 4tu and 1tu cost */ |
2313 | 578k | if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] > |
2314 | 578k | ps_cu_node->ps_parent->au4_best_cost_1tu[0]) |
2315 | 47.7k | { |
2316 | 47.7k | ps_cu_node->ps_parent->best_mode = |
2317 | 47.7k | ps_cu_node->ps_parent->au1_best_mode_1tu[0]; |
2318 | 47.7k | } |
2319 | 531k | else |
2320 | 531k | { |
2321 | 531k | ps_cu_node->ps_parent->best_mode = |
2322 | 531k | ps_cu_node->ps_parent->au1_best_mode_4tu[0]; |
2323 | 531k | } |
2324 | 578k | } |
2325 | | |
2326 | | /* set the CU valid flag */ |
2327 | 578k | ps_intra8_analyse->b1_valid_cu = 1; |
2328 | 578k | ps_intra8_analyse->b1_enable_nxn = 0; |
2329 | | |
2330 | | /* storing the modes to intra 8 analyse */ |
2331 | | |
2332 | | /* store the best 8x8 modes 8x8 tu */ |
2333 | 578k | memcpy( |
2334 | 578k | &ps_intra8_analyse->au1_best_modes_8x8_tu[0], |
2335 | 578k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
2336 | 578k | sizeof(UWORD8) * (NUM_BEST_MODES)); |
2337 | 578k | ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255; |
2338 | | |
2339 | | /* store the best 8x8 modes 4x4 tu */ |
2340 | 578k | memcpy( |
2341 | 578k | &ps_intra8_analyse->au1_best_modes_4x4_tu[0], |
2342 | 578k | &ps_cu_node->ps_parent->au1_best_mode_4tu[0], |
2343 | 578k | sizeof(UWORD8) * (NUM_BEST_MODES)); |
2344 | 578k | ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255; |
2345 | | |
2346 | | /*As 8*8 has won, pick L1 4x4 qp which is equal to |
2347 | | L1 8x8 Qp*/ |
2348 | | //ps_row_cu->u1_cu_possible_qp[0] = u1_cu_possible_qp; |
2349 | | //ps_row_cu->i4_act_factor[0][1] = i4_act_factor; |
2350 | | |
2351 | 578k | parent_best_mode = ps_cu_node->ps_parent->best_mode; |
2352 | 578k | if(parent_cost <= |
2353 | 578k | child_cost_least + |
2354 | 578k | (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT)) |
2355 | 317k | { |
2356 | | /*CU = 4TU */ |
2357 | 317k | ihevce_update_cand_list( |
2358 | 317k | ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt); |
2359 | | |
2360 | | /* store the child 8x8 costs */ |
2361 | 317k | pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = |
2362 | 317k | parent_cost; |
2363 | | |
2364 | | /* cost accumalation of best cu size candiate */ |
2365 | 317k | i8_frame_acc_satd_cost += parent_cost; |
2366 | | |
2367 | | /*satd/mod_qp accumulation of best cu */ |
2368 | 317k | i8_frame_acc_satd_by_modqp_q10 += |
2369 | 317k | ((LWORD64)ps_cu_node->ps_parent->best_satd |
2370 | 317k | << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
2371 | 317k | i4_q_scale_q3_mod; |
2372 | | |
2373 | | /* Accumalate mode bits for all child blocks */ |
2374 | 317k | i8_frame_acc_mode_bits_cost += |
2375 | 317k | ps_cu_node->ps_parent->u2_mode_bits_cost; |
2376 | | |
2377 | | /* satd and mpm bits accumalation of best cu size candiate */ |
2378 | 317k | i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd; |
2379 | | |
2380 | | /* accumulate the 16x16 cost*/ |
2381 | 317k | if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost) |
2382 | 91.1k | { |
2383 | 91.1k | *pi4_intra_16_cost = parent_cost; |
2384 | 91.1k | } |
2385 | 226k | else |
2386 | 226k | { |
2387 | 226k | *pi4_intra_16_cost += parent_cost; |
2388 | 226k | } |
2389 | | |
2390 | | /* accumulate the 32x32 cost*/ |
2391 | 317k | if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) |
2392 | 37.2k | { |
2393 | 37.2k | *pi4_intra_32_cost = parent_cost; |
2394 | 37.2k | } |
2395 | 280k | else |
2396 | 280k | { |
2397 | 280k | *pi4_intra_32_cost += parent_cost; |
2398 | 280k | } |
2399 | 317k | } |
2400 | 261k | else |
2401 | 261k | { |
2402 | | /*CU = 4PU*/ |
2403 | | //ps_row_cu->b3_cu_pos_x = (UWORD8) ps_cu_node->ps_parent->u2_x0; |
2404 | | //ps_row_cu->b3_cu_pos_y = (UWORD8) ps_cu_node->ps_parent->u2_y0; |
2405 | | //ps_row_cu->u1_cu_size = ps_cu_node->ps_parent->u1_cu_size; |
2406 | | |
2407 | | /* store the child 8x8 costs woth 4x4 pu summed cost */ |
2408 | 261k | pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] = |
2409 | 261k | (child_cost_least); |
2410 | | |
2411 | | /* accumulate the 16x16 cost*/ |
2412 | 261k | if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost) |
2413 | 83.2k | { |
2414 | 83.2k | *pi4_intra_16_cost = child_cost_least; |
2415 | 83.2k | } |
2416 | 177k | else |
2417 | 177k | { |
2418 | 177k | *pi4_intra_16_cost += child_cost_least; |
2419 | 177k | } |
2420 | | |
2421 | | /* cost accumalation of best cu size candiate */ |
2422 | 261k | i8_frame_acc_satd_cost += child_cost_least; |
2423 | | |
2424 | 1.30M | for(j = 0; j < 4; j++) |
2425 | 1.04M | { |
2426 | | /*satd/qp accumualtion*/ |
2427 | 1.04M | i8_frame_acc_satd_by_modqp_q10 += |
2428 | 1.04M | ((LWORD64)child_satd[j] |
2429 | 1.04M | << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
2430 | 1.04M | i4_q_scale_q3_mod; |
2431 | | |
2432 | | /* Accumalate mode bits for all child blocks */ |
2433 | 1.04M | i8_frame_acc_mode_bits_cost += |
2434 | 1.04M | ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost; |
2435 | | |
2436 | | /* satd and mpm bits accumalation of best cu size candiate */ |
2437 | 1.04M | i4_ctb_acc_satd += child_satd[j]; |
2438 | 1.04M | } |
2439 | | |
2440 | | /* accumulate the 32x32 cost*/ |
2441 | 261k | if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost) |
2442 | 32.9k | { |
2443 | 32.9k | *pi4_intra_32_cost = child_cost_least; |
2444 | 32.9k | } |
2445 | 228k | else |
2446 | 228k | { |
2447 | 228k | *pi4_intra_32_cost += child_cost_least; |
2448 | 228k | } |
2449 | | |
2450 | 261k | ps_intra8_analyse->b1_enable_nxn = 1; |
2451 | | |
2452 | | /* Insert the best 8x8 modes unconditionally */ |
2453 | | |
2454 | 261k | x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; |
2455 | 261k | y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; |
2456 | 261k | size = ps_cu_node->u1_cu_size >> 2; |
2457 | | |
2458 | 261k | ps_ctxt->au1_ctb_mode_map[y][x] = |
2459 | 261k | ps_cu_node->ps_sub_cu[0]->best_mode; |
2460 | 261k | ps_ctxt->au1_ctb_mode_map[y][x + 1] = |
2461 | 261k | ps_cu_node->ps_sub_cu[1]->best_mode; |
2462 | 261k | ps_ctxt->au1_ctb_mode_map[y + 1][x] = |
2463 | 261k | ps_cu_node->ps_sub_cu[2]->best_mode; |
2464 | 261k | ps_ctxt->au1_ctb_mode_map[y + 1][x + 1] = |
2465 | 261k | ps_cu_node->ps_sub_cu[3]->best_mode; |
2466 | 261k | } |
2467 | | /* NXN mode population */ |
2468 | 2.89M | for(j = 0; j < 4; j++) |
2469 | 2.31M | { |
2470 | 2.31M | cand_mode_list[0] = |
2471 | 2.31M | ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0]; |
2472 | 2.31M | cand_mode_list[1] = |
2473 | 2.31M | ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[1]; |
2474 | 2.31M | cand_mode_list[2] = |
2475 | 2.31M | ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[2]; |
2476 | | |
2477 | 2.31M | if(1) |
2478 | 2.31M | { |
2479 | | /* Populate best 3 nxn modes */ |
2480 | 2.31M | ps_intra8_analyse->au1_4x4_best_modes[j][0] = |
2481 | 2.31M | cand_mode_list[0]; |
2482 | 2.31M | ps_intra8_analyse->au1_4x4_best_modes[j][1] = |
2483 | 2.31M | cand_mode_list[1]; //(ps_ed + 1)->best_mode; |
2484 | 2.31M | ps_intra8_analyse->au1_4x4_best_modes[j][2] = |
2485 | 2.31M | cand_mode_list[2]; //(ps_ed + 2)->best_mode; |
2486 | 2.31M | ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255; |
2487 | | |
2488 | | //memcpy(ps_intra8_analyse->au1_4x4_best_modes[j], ps_row_cu->s_cu_intra_cand.au1_intra_luma_modes_nxn[j], 4); |
2489 | 2.31M | } |
2490 | | /* For HQ, all 35 modes to be used for RDOPT, removed from here for memory clean-up */ |
2491 | | |
2492 | 0 | else /* IHEVCE_QUALITY_P0 == i4_quality_preset */ |
2493 | 0 | { |
2494 | | /* To indicate to enc loop that NXN is enabled in HIGH QUALITY fior CU 8x8*/ |
2495 | 0 | ps_intra8_analyse->au1_4x4_best_modes[j][0] = 0; |
2496 | 0 | } |
2497 | | |
2498 | 2.31M | ps_intra8_analyse |
2499 | 2.31M | ->au1_4x4_best_modes[j][MAX_INTRA_CU_CANDIDATES] = 255; |
2500 | 2.31M | } |
2501 | | |
2502 | | //ps_row_cu++; |
2503 | 578k | } |
2504 | 118k | else |
2505 | 118k | { |
2506 | | /* For Incomplete CTB, 16x16 is not valid */ |
2507 | 118k | ps_intra16_analyse->b1_valid_cu = 0; |
2508 | 118k | } |
2509 | 697k | blk_cnt++; |
2510 | 697k | ps_ed_blk_l1++; |
2511 | 697k | } |
2512 | | //ps_ed_blk_l2 ++; |
2513 | 174k | } //else of EIID |
2514 | 208k | #endif |
2515 | 208k | } |
2516 | 1.30M | } |
2517 | 750k | else |
2518 | 750k | { |
2519 | | /* For incomplete CTB, init valid CU to 0 */ |
2520 | 750k | ps_ed_blk_l1++; |
2521 | 750k | ps_intra32_analyse->b1_valid_cu = 0; |
2522 | 750k | ps_intra16_analyse[0].b1_valid_cu = 0; |
2523 | 750k | blk_cnt++; |
2524 | 750k | merge_64x64 = 0; |
2525 | 750k | } |
2526 | 2.05M | } while(blk_cnt != MAX_CTB_SIZE); |
2527 | | /* if 64x64 merge is possible then check for 32x32 having same best modes */ |
2528 | 203k | if(1 == merge_64x64) |
2529 | 66.2k | { |
2530 | 66.2k | WORD32 act_mode = au1_best_32x32_modes[0]; |
2531 | | |
2532 | 66.2k | ps_ed_blk_l2 = ps_ed_l2_ctb; |
2533 | 66.2k | best_mode = ps_ed_blk_l2->best_mode; |
2534 | 66.2k | merge_64x64 = |
2535 | 66.2k | ((act_mode == au1_best_32x32_modes[0]) + (act_mode == au1_best_32x32_modes[1]) + |
2536 | 66.2k | (act_mode == au1_best_32x32_modes[2]) + |
2537 | 66.2k | (act_mode == au1_best_32x32_modes[3]) == |
2538 | 66.2k | 4); |
2539 | 66.2k | if(merge_64x64 == 1) |
2540 | 60.5k | best_mode = au1_best_32x32_modes[0]; |
2541 | 5.71k | else |
2542 | 5.71k | best_mode = ps_ed_blk_l2->best_mode; |
2543 | | /* All 32x32 costs are accumalated to 64x64 cost */ |
2544 | 66.2k | ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0; |
2545 | 331k | for(i = 0; i < 4; i++) |
2546 | 265k | { |
2547 | 265k | ps_l0_ipe_out_ctb->i4_best64x64_intra_cost += |
2548 | 265k | ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i]; |
2549 | 265k | } |
2550 | | |
2551 | | /* If all modes of 32x32 block is not same */ |
2552 | 66.2k | if(0 == merge_64x64) |
2553 | 5.71k | { |
2554 | | /*Compute CHILD cost for 32x32 */ |
2555 | 5.71k | WORD32 child_cost_64x64 = au4_best_32x32_cost[0] + au4_best_32x32_cost[1] + |
2556 | 5.71k | au4_best_32x32_cost[2] + au4_best_32x32_cost[3]; |
2557 | 5.71k | WORD32 cost = MAX_INTRA_COST_IPE; |
2558 | | |
2559 | 5.71k | WORD32 best_mode_temp = 0; |
2560 | | /*Compute 64x64 cost for each mode of 32x32*/ |
2561 | 28.5k | for(i = 0; i < 4; i++) |
2562 | 22.8k | { |
2563 | 22.8k | WORD32 mode = au1_best_32x32_modes[i]; |
2564 | 22.8k | if(mode < 2) |
2565 | 12.1k | mode = 26; |
2566 | 22.8k | ps_cu_node->ps_parent->u1_cu_size = 64; |
2567 | 22.8k | ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[0]; /* Populate properly */ |
2568 | 22.8k | ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[0]; /* Populate properly */ |
2569 | | |
2570 | 22.8k | ihevce_set_nbr_map( |
2571 | 22.8k | ps_ctxt->pu1_ctb_nbr_map, |
2572 | 22.8k | ps_ctxt->i4_nbr_map_strd, |
2573 | 22.8k | (ps_cu_node->ps_parent->u2_x0 << 1), |
2574 | 22.8k | (ps_cu_node->ps_parent->u2_y0 << 1), |
2575 | 22.8k | (ps_cu_node->ps_parent->u1_cu_size >> 2), |
2576 | 22.8k | 0); |
2577 | | |
2578 | 22.8k | ihevce_mode_eval_filtering( |
2579 | 22.8k | ps_cu_node->ps_parent, |
2580 | 22.8k | ps_cu_node, |
2581 | 22.8k | ps_ctxt, |
2582 | 22.8k | ps_curr_src, |
2583 | 22.8k | mode, |
2584 | 22.8k | &ps_cu_node->ps_parent->au4_best_cost_1tu[0], |
2585 | 22.8k | &ps_cu_node->ps_parent->au1_best_mode_1tu[0], |
2586 | 22.8k | !step2_bypass, |
2587 | 22.8k | 0); |
2588 | | |
2589 | 22.8k | parent_cost = ps_cu_node->ps_parent->best_cost; |
2590 | 22.8k | if(cost > parent_cost) |
2591 | 6.82k | { |
2592 | 6.82k | cost = parent_cost; |
2593 | 6.82k | best_mode_temp = ps_cu_node->ps_parent->best_mode; |
2594 | 6.82k | } |
2595 | 22.8k | } |
2596 | 5.71k | if(cost < child_cost_64x64) |
2597 | 2.41k | { |
2598 | 2.41k | merge_64x64 = 1; |
2599 | 2.41k | best_mode = best_mode_temp; |
2600 | | |
2601 | | /* Update 64x64 cost if CU 64x64 is chosen */ |
2602 | 2.41k | ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = cost; |
2603 | | |
2604 | | /* Accumalate the least cost for CU 64x64 */ |
2605 | 2.41k | i8_frame_acc_satd_cost = cost; |
2606 | 2.41k | i8_frame_acc_mode_bits_cost = ps_cu_node->ps_parent->u2_mode_bits_cost; |
2607 | | |
2608 | | /* satd and mpm bits accumalation of best cu size candiate */ |
2609 | 2.41k | i4_ctb_acc_satd = ps_cu_node->ps_parent->best_satd; |
2610 | 2.41k | } |
2611 | 5.71k | } |
2612 | 66.2k | } |
2613 | | |
2614 | 203k | if(merge_64x64) |
2615 | 62.9k | { |
2616 | 62.9k | WORD32 i, j; |
2617 | 62.9k | intra32_analyse_t *ps_intra32_analyse; |
2618 | 62.9k | intra16_analyse_t *ps_intra16_analyse; |
2619 | 62.9k | WORD32 row, col; |
2620 | 62.9k | WORD32 i4_q_scale_q3_mod; |
2621 | 62.9k | WORD8 i1_cu_possible_qp; |
2622 | 62.9k | WORD32 i4_act_factor; |
2623 | | //ps_row_cu = ps_curr_cu; |
2624 | 62.9k | ps_ctb_out->u4_cu_split_flags = 0x0; |
2625 | 62.9k | ps_ed_blk_l1 = ps_ed_l1_ctb; |
2626 | 62.9k | ps_ed_blk_l2 = ps_ed_l2_ctb; |
2627 | | |
2628 | 62.9k | ps_l0_ipe_out_ctb->u1_split_flag = 0; |
2629 | | |
2630 | | /* If CU size of 64x64 is chosen, disbale all the 16x16 flag*/ |
2631 | 314k | for(i = 0; i < 4; i++) |
2632 | 251k | { |
2633 | | /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */ |
2634 | | /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */ |
2635 | 251k | ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[i]; |
2636 | | |
2637 | 1.25M | for(j = 0; j < 4; j++) |
2638 | 1.00M | { |
2639 | | /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/ |
2640 | | /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */ |
2641 | 1.00M | ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[j]; |
2642 | 1.00M | ps_intra16_analyse->b1_merge_flag = 0; |
2643 | 1.00M | } |
2644 | 251k | } |
2645 | | |
2646 | | /* CU size 64x64 and fill the final cu params */ |
2647 | | //ps_row_cu->b3_cu_pos_x = gau1_cu_pos_x[0]; |
2648 | | //ps_row_cu->b3_cu_pos_y = gau1_cu_pos_y[0]; |
2649 | | //ps_row_cu->u1_cu_size = 64; |
2650 | | |
2651 | | /* Candidate mode Update */ |
2652 | 62.9k | cand_mode_list[0] = best_mode; |
2653 | 62.9k | if(cand_mode_list[0] > 1) |
2654 | 9.28k | { |
2655 | 9.28k | if(cand_mode_list[0] == 2) |
2656 | 1.02k | { |
2657 | 1.02k | cand_mode_list[1] = 34; |
2658 | 1.02k | cand_mode_list[2] = 3; |
2659 | 1.02k | } |
2660 | 8.26k | else if(cand_mode_list[0] == 34) |
2661 | 10 | { |
2662 | 10 | cand_mode_list[1] = 2; |
2663 | 10 | cand_mode_list[2] = 33; |
2664 | 10 | } |
2665 | 8.25k | else |
2666 | 8.25k | { |
2667 | 8.25k | cand_mode_list[1] = cand_mode_list[0] - 1; |
2668 | 8.25k | cand_mode_list[2] = cand_mode_list[0] + 1; |
2669 | 8.25k | } |
2670 | | //cand_mode_list[1] = ps_ed_blk_l1->nang_attr.best_mode; |
2671 | | //cand_mode_list[2] = ps_ed_blk_l1->ang_attr.best_mode; |
2672 | 9.28k | } |
2673 | 53.6k | else |
2674 | 53.6k | { |
2675 | 53.6k | cand_mode_list[0] = 0; |
2676 | 53.6k | cand_mode_list[1] = 1; |
2677 | 53.6k | cand_mode_list[2] = 26; |
2678 | | //cand_mode_list[2] = ps_ed_blk_l1->nang_attr.best_mode; |
2679 | 53.6k | } |
2680 | | |
2681 | | /* All 32x32 costs are accumalated to 64x64 cost */ |
2682 | 62.9k | ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0; |
2683 | 314k | for(i = 0; i < 4; i++) |
2684 | 251k | { |
2685 | 251k | ps_l0_ipe_out_ctb->i4_best64x64_intra_cost += |
2686 | 251k | ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i]; |
2687 | 251k | } |
2688 | | /* by default 64x64 modes are set to default values DC and Planar */ |
2689 | 62.9k | ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = cand_mode_list[0]; |
2690 | 62.9k | ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = cand_mode_list[1]; |
2691 | 62.9k | ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = cand_mode_list[2]; |
2692 | 62.9k | ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[3] = 255; |
2693 | | |
2694 | | /* Update CTB mode map for the finalised CU */ |
2695 | 62.9k | x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1; |
2696 | 62.9k | y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; |
2697 | 62.9k | size = ps_cu_node->u1_cu_size >> 2; |
2698 | | |
2699 | 585k | for(row = y; row < (y + size); row++) |
2700 | 523k | { |
2701 | 5.01M | for(col = x; col < (x + size); col++) |
2702 | 4.49M | { |
2703 | 4.49M | ps_ctxt->au1_ctb_mode_map[row][col] = best_mode; |
2704 | 4.49M | } |
2705 | 523k | } |
2706 | | |
2707 | 62.9k | ihevce_set_nbr_map( |
2708 | 62.9k | ps_ctxt->pu1_ctb_nbr_map, |
2709 | 62.9k | ps_ctxt->i4_nbr_map_strd, |
2710 | 62.9k | (ps_cu_node->u2_x0 << 1), |
2711 | 62.9k | (ps_cu_node->u2_y0 << 1), |
2712 | 62.9k | (ps_cu_node->u1_cu_size >> 2), |
2713 | 62.9k | 1); |
2714 | | |
2715 | | /*As 64*64 has won, pick L1 32x32 qp*/ |
2716 | | //ASSERT(((blk_cnt>>6) & 0xF) == (blk_cnt>>6)); |
2717 | | //ASSERT((blk_cnt>>6) == 0); |
2718 | 62.9k | ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2); |
2719 | 62.9k | i1_cu_possible_qp = ihevce_cu_level_qp_mod( |
2720 | 62.9k | ps_ctxt->i4_qscale, |
2721 | 62.9k | ps_ed_ctb_l1->i4_32x32_satd[0][0], |
2722 | 62.9k | ps_ctxt->ld_curr_frame_32x32_log_avg[0], |
2723 | 62.9k | f_strength, |
2724 | 62.9k | &i4_act_factor, |
2725 | 62.9k | &i4_q_scale_q3_mod, |
2726 | 62.9k | ps_ctxt->ps_rc_quant_ctxt); |
2727 | | |
2728 | 62.9k | i8_frame_acc_satd_by_modqp_q10 = |
2729 | 62.9k | (i8_frame_acc_satd_cost << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) / |
2730 | 62.9k | i4_q_scale_q3_mod; |
2731 | | /* Increment pointers */ |
2732 | 62.9k | ps_ed_blk_l1 += 64; |
2733 | 62.9k | ps_ed_blk_l2 += 16; |
2734 | | //ps_row_cu++; |
2735 | 62.9k | } |
2736 | 203k | } |
2737 | | |
2738 | | //ps_ctb_out->u1_num_cus_in_ctb = (UWORD8)(ps_row_cu - ps_curr_cu); |
2739 | | |
2740 | 203k | { |
2741 | 203k | WORD32 i4_i, i4_j; |
2742 | 203k | WORD32 dummy; |
2743 | 203k | WORD8 i1_cu_qp; |
2744 | 203k | (void)i1_cu_qp; |
2745 | | /*MAM_VAR_L1*/ |
2746 | 609k | for(i4_j = 0; i4_j < 2; i4_j++) |
2747 | 406k | { |
2748 | 406k | i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[i4_j]; |
2749 | 406k | f_strength = ps_ctxt->f_strength; |
2750 | | |
2751 | | //i4_mod_factor_num = 4; |
2752 | | |
2753 | 406k | ps_ed_blk_l1 = ps_ed_l1_ctb; |
2754 | 406k | ps_ed_blk_l2 = ps_ed_l2_ctb; |
2755 | | //ps_row_cu = ps_curr_cu; |
2756 | | |
2757 | | /*Valid only for complete CTB */ |
2758 | 406k | if((64 == u1_curr_ctb_wdt) && (64 == u1_curr_ctb_hgt)) |
2759 | 361k | { |
2760 | 361k | ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2); |
2761 | 361k | ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][1] != -2); |
2762 | 361k | ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][2] != -2); |
2763 | 361k | ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][3] != -2); |
2764 | | |
2765 | 361k | i1_cu_qp = ihevce_cu_level_qp_mod( |
2766 | 361k | ps_ctxt->i4_qscale, |
2767 | 361k | ps_ed_ctb_l1->i4_32x32_satd[0][0], |
2768 | 361k | ps_ctxt->ld_curr_frame_32x32_log_avg[0], |
2769 | 361k | f_strength, |
2770 | 361k | &ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j], |
2771 | 361k | &dummy, |
2772 | 361k | ps_ctxt->ps_rc_quant_ctxt); |
2773 | | |
2774 | 361k | i1_cu_qp = ihevce_cu_level_qp_mod( |
2775 | 361k | ps_ctxt->i4_qscale, |
2776 | 361k | ps_ed_ctb_l1->i4_32x32_satd[0][1], |
2777 | 361k | ps_ctxt->ld_curr_frame_32x32_log_avg[1], |
2778 | 361k | f_strength, |
2779 | 361k | &ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j], |
2780 | 361k | &dummy, |
2781 | 361k | ps_ctxt->ps_rc_quant_ctxt); |
2782 | 361k | i1_cu_qp = ihevce_cu_level_qp_mod( |
2783 | 361k | ps_ctxt->i4_qscale, |
2784 | 361k | ps_ed_ctb_l1->i4_32x32_satd[0][2], |
2785 | 361k | ps_ctxt->ld_curr_frame_32x32_log_avg[2], |
2786 | 361k | f_strength, |
2787 | 361k | &ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j], |
2788 | 361k | &dummy, |
2789 | 361k | ps_ctxt->ps_rc_quant_ctxt); |
2790 | | |
2791 | 361k | i1_cu_qp = ihevce_cu_level_qp_mod( |
2792 | 361k | ps_ctxt->i4_qscale, |
2793 | 361k | ps_ed_ctb_l1->i4_32x32_satd[0][3], |
2794 | 361k | 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0], |
2795 | 361k | f_strength, |
2796 | 361k | &ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j], |
2797 | 361k | &dummy, |
2798 | 361k | ps_ctxt->ps_rc_quant_ctxt); |
2799 | | |
2800 | 361k | ASSERT(ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] > 0); |
2801 | 361k | } |
2802 | 44.5k | else |
2803 | 44.5k | { |
2804 | 44.5k | ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j] = 1024; |
2805 | 44.5k | ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j] = 1024; |
2806 | 44.5k | ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j] = 1024; |
2807 | 44.5k | ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] = 1024; |
2808 | 44.5k | } |
2809 | | |
2810 | | /*Store the 8x8 Qps from L2 (in raster order) as output of intra prediction |
2811 | | for the usage by ME*/ |
2812 | | |
2813 | 406k | { |
2814 | 406k | WORD32 pos_x_32, pos_y_32, pos; |
2815 | | //WORD32 i4_incomplete_ctb_val_8; |
2816 | 406k | pos_x_32 = u1_curr_ctb_wdt / 16; |
2817 | 406k | pos_y_32 = u1_curr_ctb_hgt / 16; |
2818 | | |
2819 | 406k | pos = (pos_x_32 < pos_y_32) ? pos_x_32 : pos_y_32; |
2820 | | |
2821 | 2.03M | for(i4_i = 0; i4_i < 4; i4_i++) |
2822 | 1.62M | { |
2823 | 1.62M | if(i4_i < pos) |
2824 | 1.49M | { |
2825 | 1.49M | ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] != -2); |
2826 | 1.49M | ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] != -2); |
2827 | 1.49M | ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] != -2); |
2828 | 1.49M | i1_cu_qp = ihevce_cu_level_qp_mod( |
2829 | 1.49M | ps_ctxt->i4_qscale, |
2830 | 1.49M | ps_ed_ctb_l1->i4_16x16_satd[i4_i][0], |
2831 | 1.49M | ps_ctxt->ld_curr_frame_16x16_log_avg[0], |
2832 | 1.49M | f_strength, |
2833 | 1.49M | &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j], |
2834 | 1.49M | &dummy, |
2835 | 1.49M | ps_ctxt->ps_rc_quant_ctxt); |
2836 | 1.49M | i1_cu_qp = ihevce_cu_level_qp_mod( |
2837 | 1.49M | ps_ctxt->i4_qscale, |
2838 | 1.49M | ps_ed_ctb_l1->i4_16x16_satd[i4_i][1], |
2839 | 1.49M | ps_ctxt->ld_curr_frame_16x16_log_avg[1], |
2840 | 1.49M | f_strength, |
2841 | 1.49M | &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j], |
2842 | 1.49M | &dummy, |
2843 | 1.49M | ps_ctxt->ps_rc_quant_ctxt); |
2844 | 1.49M | i1_cu_qp = ihevce_cu_level_qp_mod( |
2845 | 1.49M | ps_ctxt->i4_qscale, |
2846 | 1.49M | ps_ed_ctb_l1->i4_16x16_satd[i4_i][2], |
2847 | 1.49M | ps_ctxt->ld_curr_frame_16x16_log_avg[2], |
2848 | 1.49M | f_strength, |
2849 | 1.49M | &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j], |
2850 | 1.49M | &dummy, |
2851 | 1.49M | ps_ctxt->ps_rc_quant_ctxt); |
2852 | 1.49M | } |
2853 | 126k | else |
2854 | 126k | { |
2855 | | /*For incomplete CTB */ |
2856 | 126k | ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j] = 1024; |
2857 | 126k | ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j] = 1024; |
2858 | 126k | ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j] = 1024; |
2859 | 126k | } |
2860 | 1.62M | } |
2861 | 406k | } |
2862 | | |
2863 | | /*Store the 8x8 Qps from L1 (in raster order) as output of intra prediction |
2864 | | for the usage by ME*/ |
2865 | 406k | { |
2866 | 406k | WORD32 pos_x_16, pos_y_16, pos; |
2867 | | //WORD32 i4_incomplete_ctb_val_8; |
2868 | 406k | pos_x_16 = u1_curr_ctb_wdt / 4; |
2869 | 406k | pos_y_16 = u1_curr_ctb_hgt / 4; |
2870 | | |
2871 | 406k | pos = (pos_x_16 < pos_y_16) ? pos_x_16 : pos_y_16; |
2872 | 6.91M | for(i4_i = 0; i4_i < 16; i4_i++) |
2873 | 6.50M | { |
2874 | 6.50M | if(i4_i < pos) |
2875 | 6.06M | { |
2876 | 6.06M | ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] != -2); |
2877 | 6.06M | ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] != -2); |
2878 | 6.06M | i1_cu_qp = ihevce_cu_level_qp_mod( |
2879 | 6.06M | ps_ctxt->i4_qscale, |
2880 | 6.06M | ps_ed_ctb_l1->i4_8x8_satd[i4_i][0], |
2881 | 6.06M | ps_ctxt->ld_curr_frame_8x8_log_avg[0], |
2882 | 6.06M | f_strength, |
2883 | 6.06M | &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j], |
2884 | 6.06M | &dummy, |
2885 | 6.06M | ps_ctxt->ps_rc_quant_ctxt); |
2886 | 6.06M | i1_cu_qp = ihevce_cu_level_qp_mod( |
2887 | 6.06M | ps_ctxt->i4_qscale, |
2888 | 6.06M | ps_ed_ctb_l1->i4_8x8_satd[i4_i][1], |
2889 | 6.06M | ps_ctxt->ld_curr_frame_8x8_log_avg[1], |
2890 | 6.06M | f_strength, |
2891 | 6.06M | &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j], |
2892 | 6.06M | &dummy, |
2893 | 6.06M | ps_ctxt->ps_rc_quant_ctxt); |
2894 | 6.06M | } |
2895 | 437k | else |
2896 | 437k | { |
2897 | | /*For incomplete CTB */ |
2898 | 437k | ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j] = 1024; |
2899 | 437k | ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j] = 1024; |
2900 | 437k | } |
2901 | 6.50M | } |
2902 | 406k | } |
2903 | 406k | } //for loop |
2904 | | |
2905 | | /* Accumalate the cost of ctb to the total cost */ |
2906 | 203k | ps_ctxt->i8_frame_acc_satd_cost += i8_frame_acc_satd_cost; |
2907 | 203k | ps_ctxt->i8_frame_acc_satd_by_modqp_q10 += i8_frame_acc_satd_by_modqp_q10; |
2908 | | |
2909 | 203k | ps_ctxt->i8_frame_acc_mode_bits_cost += i8_frame_acc_mode_bits_cost; |
2910 | | |
2911 | | /* satd and mpm bits accumalation of best cu size candiate for the ctb */ |
2912 | 203k | ps_l0_ipe_out_ctb->i4_ctb_acc_satd = i4_ctb_acc_satd; |
2913 | 203k | ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = i8_frame_acc_mode_bits_cost; |
2914 | | |
2915 | 203k | ps_ctxt->i8_frame_acc_satd += i4_ctb_acc_satd; |
2916 | 203k | } |
2917 | | |
2918 | 203k | { |
2919 | 203k | WORD32 ctr_8x8; |
2920 | 3.45M | for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++) |
2921 | 3.25M | { |
2922 | | /*Accumalate activity factor for Intra and Inter*/ |
2923 | 3.25M | if(ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] < |
2924 | 3.25M | ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8]) |
2925 | 64.3k | { |
2926 | 64.3k | ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] = |
2927 | 64.3k | ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0]; |
2928 | 64.3k | } |
2929 | 3.18M | else |
2930 | 3.18M | { |
2931 | 3.18M | ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] = |
2932 | 3.18M | ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0]; |
2933 | 3.18M | } |
2934 | | |
2935 | | /*Accumalate activity factor at frame level*/ |
2936 | 3.25M | ps_ctxt->i8_frame_acc_act_factor += ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8]; |
2937 | 3.25M | } |
2938 | 203k | } |
2939 | 203k | return; |
2940 | 203k | } |
2941 | | |
2942 | | WORD32 ihevce_nxn_sad_computer( |
2943 | | UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size) |
2944 | 1.90M | { |
2945 | 1.90M | WORD32 wd, ht, i, j; |
2946 | 1.90M | WORD32 sad = 0; |
2947 | | |
2948 | 1.90M | wd = trans_size; |
2949 | 1.90M | ht = trans_size; |
2950 | | |
2951 | 28.6M | for(i = 0; i < ht; i++) |
2952 | 26.7M | { |
2953 | 495M | for(j = 0; j < wd; j++) |
2954 | 468M | { |
2955 | 468M | sad += (ABS(((WORD32)pu1_inp[j] - (WORD32)pu1_ref[j]))); |
2956 | 468M | } |
2957 | 26.7M | pu1_inp += i4_inp_stride; |
2958 | 26.7M | pu1_ref += i4_ref_stride; |
2959 | 26.7M | } |
2960 | | |
2961 | 1.90M | return sad; |
2962 | 1.90M | } |
2963 | | |
2964 | | /*! |
2965 | | ****************************************************************************** |
2966 | | * \if Function name : ihevce_mode_eval_filtering \endif |
2967 | | * |
2968 | | * \brief |
2969 | | * Evaluates best 3 modes for the given CU size with probable modes from, |
2970 | | * early decision structure, mpm candidates and dc, planar mode |
2971 | | * |
2972 | | * \param[in] ps_cu_node : pointer to MAX cu node info buffer |
2973 | | * \param[in] ps_child_cu_node : pointer to (MAX - 1) cu node info buffer |
2974 | | * \param[in] ps_ctxt : pointer to IPE context struct |
2975 | | * \param[in] ps_curr_src : pointer to src pixels struct |
2976 | | * \param[in] best_amode : best angular mode from l1 layer or |
2977 | | from (MAX - 1) CU mode |
2978 | | * \param[in] best_costs_4x4 : pointer to 3 best cost buffer |
2979 | | * \param[in] best_modes_4x4 : pointer to 3 best mode buffer |
2980 | | * \param[in] step2_bypass : if 0, (MAX - 1) CU is evaluated |
2981 | | * if 1, (MAX CU) sugested is evaluated |
2982 | | * \param[in] tu_eq_cu : indicates if tu size is same as cu or cu/2 |
2983 | | * |
2984 | | * \return |
2985 | | * None |
2986 | | * |
2987 | | * \author |
2988 | | * Ittiam |
2989 | | * |
2990 | | ***************************************************************************** |
2991 | | */ |
2992 | | void ihevce_mode_eval_filtering( |
2993 | | ihevce_ipe_cu_tree_t *ps_cu_node, |
2994 | | ihevce_ipe_cu_tree_t *ps_child_cu_node, |
2995 | | ihevce_ipe_ctxt_t *ps_ctxt, |
2996 | | iv_enc_yuv_buf_t *ps_curr_src, |
2997 | | WORD32 best_amode, |
2998 | | WORD32 *best_costs_4x4, |
2999 | | UWORD8 *best_modes_4x4, |
3000 | | WORD32 step2_bypass, |
3001 | | WORD32 tu_eq_cu) |
3002 | 7.05M | { |
3003 | 7.05M | UWORD8 *pu1_origin, *pu1_orig; |
3004 | 7.05M | WORD32 src_strd = ps_curr_src->i4_y_strd; |
3005 | 7.05M | WORD32 nbr_flags; |
3006 | 7.05M | nbr_avail_flags_t s_nbr; |
3007 | 7.05M | WORD32 trans_size = tu_eq_cu ? ps_cu_node->u1_cu_size : ps_cu_node->u1_cu_size >> 1; |
3008 | 7.05M | WORD32 num_tu_in_x = tu_eq_cu ? 1 : 2; |
3009 | 7.05M | WORD32 num_tu_in_y = tu_eq_cu ? 1 : 2; |
3010 | 7.05M | UWORD8 mode; |
3011 | | |
3012 | 7.05M | WORD32 cost_ang_mode = MAX_INTRA_COST_IPE; |
3013 | 7.05M | WORD32 filter_flag; |
3014 | 7.05M | WORD32 cost_amode_step2[7] = { 0 }; |
3015 | | /*WORD32 best_sad[5]; // NOTE_A01: Not getting consumed at present */ |
3016 | 7.05M | WORD32 sad = 0; |
3017 | 7.05M | WORD32 cu_pos_x, cu_pos_y; |
3018 | 7.05M | WORD32 temp; |
3019 | 7.05M | WORD32 i = 0, j, k, i_end, z; |
3020 | | //WORD32 row, col, size; |
3021 | 7.05M | UWORD8 *pu1_ref; |
3022 | 7.05M | WORD32 xA, yA, xB, yB; |
3023 | 7.05M | WORD32 top_intra_mode; |
3024 | 7.05M | WORD32 left_intra_mode; |
3025 | 7.05M | UWORD8 *pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; |
3026 | 7.05M | UWORD8 *pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; |
3027 | | |
3028 | 7.05M | UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 }; |
3029 | 7.05M | WORD32 count; |
3030 | | |
3031 | 7.05M | pf_ipe_res_trans_had apf_resd_trns_had[4]; |
3032 | | |
3033 | 7.05M | WORD32 cand_mode_satd_list[3]; |
3034 | 7.05M | ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr; |
3035 | | |
3036 | 7.05M | ihevc_intra_pred_luma_ref_substitution_fptr = |
3037 | 7.05M | ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
3038 | | |
3039 | 7.05M | apf_resd_trns_had[0] = ps_ctxt->s_cmn_opt_func.pf_HAD_4x4_8bit; |
3040 | 7.05M | apf_resd_trns_had[1] = ps_ctxt->s_cmn_opt_func.pf_HAD_8x8_8bit; |
3041 | 7.05M | apf_resd_trns_had[2] = ps_ctxt->s_cmn_opt_func.pf_HAD_16x16_8bit; |
3042 | 7.05M | apf_resd_trns_had[3] = ps_ctxt->s_cmn_opt_func.pf_HAD_32x32_8bit; |
3043 | | |
3044 | | /* initialize modes_to_eval as zero */ |
3045 | 7.05M | memset(&ps_ctxt->au1_modes_to_eval, 0, MAX_NUM_IP_MODES); |
3046 | | |
3047 | | /* Compute the Parent Cost */ |
3048 | | |
3049 | | /* Pointer to top-left of the CU - y0,x0 in 8x8 granularity */ |
3050 | 7.05M | pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->u2_y0 << 3) * src_strd) + |
3051 | 7.05M | (ps_cu_node->u2_x0 << 3); |
3052 | | |
3053 | | /* Get position of CU within CTB at 4x4 granularity */ |
3054 | 7.05M | cu_pos_x = ps_cu_node->u2_x0 << 1; |
3055 | 7.05M | cu_pos_y = ps_cu_node->u2_y0 << 1; |
3056 | | |
3057 | | /* get the neighbour availability flags */ |
3058 | 7.05M | ihevce_get_only_nbr_flag( |
3059 | 7.05M | &s_nbr, |
3060 | 7.05M | ps_ctxt->pu1_ctb_nbr_map, |
3061 | 7.05M | ps_ctxt->i4_nbr_map_strd, |
3062 | 7.05M | cu_pos_x, |
3063 | 7.05M | cu_pos_y, |
3064 | 7.05M | trans_size >> 2, |
3065 | 7.05M | trans_size >> 2); |
3066 | | |
3067 | | /* Traverse for all 4 child blocks in the parent block */ |
3068 | 7.05M | xA = (ps_cu_node->u2_x0 << 3) >> 2; |
3069 | 7.05M | yA = ((ps_cu_node->u2_y0 << 3) >> 2) + 1; |
3070 | 7.05M | xB = xA + 1; |
3071 | 7.05M | yB = yA - 1; |
3072 | 7.05M | left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA]; |
3073 | 7.05M | top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB]; |
3074 | | /* call the function which populates sad cost for all the modes */ |
3075 | | |
3076 | 7.05M | ihevce_intra_populate_mode_bits_cost_bracketing( |
3077 | 7.05M | top_intra_mode, |
3078 | 7.05M | left_intra_mode, |
3079 | 7.05M | s_nbr.u1_top_avail, |
3080 | 7.05M | s_nbr.u1_left_avail, |
3081 | 7.05M | ps_cu_node->u2_y0, |
3082 | 7.05M | &ps_ctxt->au2_mode_bits_satd_cost[0], |
3083 | 7.05M | &ps_ctxt->au2_mode_bits_satd[0], |
3084 | 7.05M | ps_ctxt->i4_ol_satd_lambda, |
3085 | 7.05M | cand_mode_satd_list); |
3086 | | |
3087 | 16.5M | for(k = 0; k < num_tu_in_y; k++) |
3088 | 9.51M | { |
3089 | 23.9M | for(j = 0; j < num_tu_in_x; j++) |
3090 | 14.4M | { |
3091 | | /* get the neighbour availability flags */ |
3092 | 14.4M | nbr_flags = ihevce_get_nbr_intra( |
3093 | 14.4M | &s_nbr, |
3094 | 14.4M | ps_ctxt->pu1_ctb_nbr_map, |
3095 | 14.4M | ps_ctxt->i4_nbr_map_strd, |
3096 | 14.4M | cu_pos_x + ((j) * (trans_size >> 2)), |
3097 | 14.4M | cu_pos_y + ((k) * (trans_size >> 2)), |
3098 | 14.4M | trans_size >> 2); |
3099 | | |
3100 | 14.4M | pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); |
3101 | | |
3102 | | /* Create reference samples array */ |
3103 | 14.4M | ihevc_intra_pred_luma_ref_substitution_fptr( |
3104 | 14.4M | pu1_origin - src_strd - 1, |
3105 | 14.4M | pu1_origin - src_strd, |
3106 | 14.4M | pu1_origin - 1, |
3107 | 14.4M | src_strd, |
3108 | 14.4M | trans_size, |
3109 | 14.4M | nbr_flags, |
3110 | 14.4M | pu1_ref_orig, |
3111 | 14.4M | 0); |
3112 | | |
3113 | | /* Perform reference samples filtering */ |
3114 | 14.4M | ihevce_intra_pred_ref_filtering(pu1_ref_orig, trans_size, pu1_ref_filt); |
3115 | | |
3116 | 14.4M | ihevce_set_nbr_map( |
3117 | 14.4M | ps_ctxt->pu1_ctb_nbr_map, |
3118 | 14.4M | ps_ctxt->i4_nbr_map_strd, |
3119 | 14.4M | cu_pos_x + ((j) * (trans_size >> 2)), |
3120 | 14.4M | cu_pos_y + ((k) * (trans_size >> 2)), |
3121 | 14.4M | (trans_size >> 2), |
3122 | 14.4M | 1); |
3123 | | |
3124 | 14.4M | pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); |
3125 | 14.4M | pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); |
3126 | 14.4M | } |
3127 | 9.51M | } |
3128 | | |
3129 | | /* Revaluation for angular mode */ |
3130 | | //if(ps_ed_blk->ang_attr.mode_present == 1) |
3131 | | //if(((best_amode & 0x1) != 1)) |
3132 | | |
3133 | 7.05M | { |
3134 | 7.05M | WORD32 u1_trans_idx = trans_size >> 3; |
3135 | 7.05M | if(trans_size == 32) |
3136 | 577k | u1_trans_idx = 3; |
3137 | | //best_amode = ps_ed_blk->ang_attr.best_mode; |
3138 | | |
3139 | 7.05M | i = 0; |
3140 | 7.05M | if(!step2_bypass) |
3141 | 4.95M | { |
3142 | | /* Around best level 4 angular mode, search for best level 2 mode */ |
3143 | 4.95M | ASSERT((best_amode >= 2) && (best_amode <= 34)); |
3144 | | |
3145 | 4.95M | if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3) |
3146 | 3.93M | { |
3147 | 3.93M | if(best_amode >= 4) |
3148 | 3.91M | ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode - 2; |
3149 | 3.93M | } |
3150 | | |
3151 | 4.95M | ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode; |
3152 | | |
3153 | 4.95M | if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3) |
3154 | 3.93M | { |
3155 | 3.93M | if(best_amode <= 32) |
3156 | 3.92M | ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode + 2; |
3157 | 3.93M | } |
3158 | 4.95M | } |
3159 | 2.09M | else |
3160 | 2.09M | { |
3161 | 2.09M | ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[0]->best_mode; |
3162 | 2.09M | ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[1]->best_mode; |
3163 | 2.09M | ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[2]->best_mode; |
3164 | 2.09M | ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[3]->best_mode; |
3165 | 2.09M | } |
3166 | | |
3167 | | /* Add the left and top MPM modes for computation*/ |
3168 | | |
3169 | 7.05M | ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[0]; |
3170 | 7.05M | ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[1]; |
3171 | | |
3172 | 7.05M | i_end = i; |
3173 | 7.05M | count = 0; |
3174 | | |
3175 | | /*Remove duplicate modes from modes_to_eval_temp[] */ |
3176 | 42.3M | for(j = 0; j < i_end; j++) |
3177 | 35.2M | { |
3178 | 77.3M | for(k = 0; k < count; k++) |
3179 | 44.8M | { |
3180 | 44.8M | if(ps_ctxt->au1_modes_to_eval_temp[j] == ps_ctxt->au1_modes_to_eval[k]) |
3181 | 2.75M | break; |
3182 | 44.8M | } |
3183 | 35.2M | if((k == count) && (ps_ctxt->au1_modes_to_eval_temp[j] > 1)) |
3184 | 16.6M | { |
3185 | 16.6M | ps_ctxt->au1_modes_to_eval[count] = ps_ctxt->au1_modes_to_eval_temp[j]; |
3186 | 16.6M | count++; |
3187 | 16.6M | } |
3188 | 35.2M | } |
3189 | 7.05M | i_end = count; |
3190 | 7.05M | if(count == 0) |
3191 | 994k | { |
3192 | 994k | ps_ctxt->au1_modes_to_eval[0] = 26; |
3193 | 994k | i_end = 1; |
3194 | 994k | } |
3195 | | |
3196 | 24.6M | for(i = 0; i < i_end; i++) |
3197 | 17.6M | { |
3198 | 17.6M | pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; |
3199 | 17.6M | pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; |
3200 | | |
3201 | 17.6M | mode = ps_ctxt->au1_modes_to_eval[i]; |
3202 | 17.6M | ASSERT((mode >= 2) && (mode <= 34)); |
3203 | 17.6M | cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode]; |
3204 | 17.6M | filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2)); |
3205 | | |
3206 | 42.0M | for(k = 0; k < num_tu_in_y; k++) |
3207 | 24.4M | { |
3208 | 62.7M | for(j = 0; j < num_tu_in_x; j++) |
3209 | 38.2M | { |
3210 | 38.2M | pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); |
3211 | | |
3212 | 38.2M | if(0 == filter_flag) |
3213 | 33.9M | pu1_ref = pu1_ref_orig; |
3214 | 4.29M | else |
3215 | 4.29M | pu1_ref = pu1_ref_filt; |
3216 | | |
3217 | 38.2M | g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
3218 | 38.2M | pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode); |
3219 | | |
3220 | 38.2M | if(ps_ctxt->u1_use_satd) |
3221 | 36.3M | { |
3222 | 36.3M | sad = apf_resd_trns_had[u1_trans_idx]( |
3223 | 36.3M | pu1_origin, |
3224 | 36.3M | ps_curr_src->i4_y_strd, |
3225 | 36.3M | &ps_ctxt->au1_pred_samples[0], |
3226 | 36.3M | trans_size, |
3227 | 36.3M | NULL, |
3228 | 36.3M | 0 |
3229 | | |
3230 | 36.3M | ); |
3231 | 36.3M | } |
3232 | 1.90M | else |
3233 | 1.90M | { |
3234 | 1.90M | sad = ps_ctxt->s_ipe_optimised_function_list.pf_nxn_sad_computer( |
3235 | 1.90M | pu1_origin, |
3236 | 1.90M | ps_curr_src->i4_y_strd, |
3237 | 1.90M | &ps_ctxt->au1_pred_samples[0], |
3238 | 1.90M | trans_size, |
3239 | 1.90M | trans_size); |
3240 | 1.90M | } |
3241 | | |
3242 | 38.2M | cost_amode_step2[i] += sad; |
3243 | | |
3244 | 38.2M | pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); |
3245 | 38.2M | pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); |
3246 | 38.2M | } |
3247 | 24.4M | } |
3248 | 17.6M | } |
3249 | 7.05M | best_amode = ps_ctxt->au1_modes_to_eval[0]; |
3250 | | /*Init cost indx */ |
3251 | 7.05M | cost_ang_mode = MAX_INTRA_COST_IPE; //cost_amode_step2[0]; |
3252 | 24.6M | for(z = 0; z < i_end; z++) |
3253 | 17.6M | { |
3254 | | /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/ |
3255 | 17.6M | if(cost_ang_mode >= cost_amode_step2[z]) |
3256 | 12.6M | { |
3257 | 12.6M | if(cost_ang_mode == cost_amode_step2[z]) |
3258 | 1.21M | { |
3259 | 1.21M | if(best_amode > ps_ctxt->au1_modes_to_eval[z]) |
3260 | 15.9k | best_amode = ps_ctxt->au1_modes_to_eval[z]; |
3261 | 1.21M | } |
3262 | 11.4M | else |
3263 | 11.4M | { |
3264 | 11.4M | best_amode = ps_ctxt->au1_modes_to_eval[z]; |
3265 | 11.4M | } |
3266 | 12.6M | cost_ang_mode = cost_amode_step2[z]; |
3267 | 12.6M | } |
3268 | 17.6M | } |
3269 | | |
3270 | | /*Modify mode bits for the angular modes */ |
3271 | 7.05M | } |
3272 | | |
3273 | 7.05M | { |
3274 | | /* Step - I modification */ |
3275 | 7.05M | ASSERT((best_amode >= 2) && (best_amode <= 34)); |
3276 | 7.05M | i_end = 0; |
3277 | 7.05M | z = 0; |
3278 | | |
3279 | | /* Around best level 3 angular mode, search for best level 1 mode */ |
3280 | 7.05M | ps_ctxt->au1_modes_to_eval[i_end++] = 0; |
3281 | 7.05M | ps_ctxt->au1_modes_to_eval[i_end++] = 1; |
3282 | | |
3283 | 7.05M | if(best_amode != 2) |
3284 | 6.86M | ps_ctxt->au1_modes_to_eval[i_end++] = best_amode - 1; |
3285 | | |
3286 | 7.05M | ps_ctxt->au1_modes_to_eval[i_end++] = best_amode; |
3287 | | |
3288 | 7.05M | if(best_amode != 34) |
3289 | 6.99M | ps_ctxt->au1_modes_to_eval[i_end++] = best_amode + 1; |
3290 | | |
3291 | | /* Inserting step_2's best mode at last to avoid |
3292 | | recalculation of it's SATD cost */ |
3293 | | |
3294 | | //ps_ctxt->au1_modes_to_eval[i_end] = best_amode; //Bugfix: HSAD compared with SAD |
3295 | | //cost_amode_step2[i_end] = cost_ang_mode; |
3296 | | |
3297 | | /*best_sad[i_end] = cost_ang_mode |
3298 | | - mode_bits_satd_cost[best_amode]; //See NOTE_A01 above */ |
3299 | | |
3300 | 7.05M | cost_ang_mode = MAX_INTRA_COST_IPE; /* Init cost */ |
3301 | | |
3302 | 42.0M | for(i = 0; i < i_end; i++) |
3303 | 35.0M | { |
3304 | 35.0M | WORD32 u1_trans_idx = trans_size >> 3; |
3305 | 35.0M | if(trans_size == 32) |
3306 | 2.87M | u1_trans_idx = 3; |
3307 | 35.0M | pu1_ref_orig = &ps_ctxt->au1_ref_samples[0]; |
3308 | 35.0M | pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0]; |
3309 | | |
3310 | | /*best_sad[i] = 0; //See NOTE_A01 above */ |
3311 | 35.0M | mode = ps_ctxt->au1_modes_to_eval[i]; |
3312 | 35.0M | cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode]; |
3313 | 35.0M | filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2)); |
3314 | | |
3315 | 82.2M | for(k = 0; k < num_tu_in_y; k++) |
3316 | 47.2M | { |
3317 | 118M | for(j = 0; j < num_tu_in_x; j++) |
3318 | 71.6M | { |
3319 | 71.6M | pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size); |
3320 | | |
3321 | 71.6M | if(0 == filter_flag) |
3322 | 57.6M | pu1_ref = pu1_ref_orig; |
3323 | 13.9M | else |
3324 | 13.9M | pu1_ref = pu1_ref_filt; |
3325 | | |
3326 | 71.6M | g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
3327 | 71.6M | pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode); |
3328 | | |
3329 | | //if(trans_size != 4) |
3330 | 71.6M | { |
3331 | 71.6M | sad = apf_resd_trns_had[u1_trans_idx]( |
3332 | 71.6M | pu1_origin, |
3333 | 71.6M | ps_curr_src->i4_y_strd, |
3334 | 71.6M | &ps_ctxt->au1_pred_samples[0], |
3335 | 71.6M | trans_size, |
3336 | 71.6M | NULL, |
3337 | 71.6M | 0); |
3338 | 71.6M | } |
3339 | | |
3340 | | /*accumualting SATD though name says it is sad*/ |
3341 | 71.6M | cost_amode_step2[i] += sad; |
3342 | | /*best_sad[i] +=sad; //See NOTE_A01 above */ |
3343 | 71.6M | pu1_ref_orig += (4 * MAX_CTB_SIZE + 1); |
3344 | 71.6M | pu1_ref_filt += (4 * MAX_CTB_SIZE + 1); |
3345 | 71.6M | } |
3346 | 47.2M | } |
3347 | 35.0M | } |
3348 | | /* Updating i_end for the step_2's inserted mode*/ |
3349 | | // i_end++; |
3350 | | |
3351 | | /* Arrange the reference array in ascending order */ |
3352 | | |
3353 | 35.0M | for(i = 0; i < (i_end - 1); i++) |
3354 | 27.9M | { |
3355 | 97.4M | for(j = i + 1; j < i_end; j++) |
3356 | 69.5M | { |
3357 | 69.5M | if(cost_amode_step2[i] > cost_amode_step2[j]) |
3358 | 13.6M | { |
3359 | 13.6M | temp = cost_amode_step2[i]; |
3360 | 13.6M | cost_amode_step2[i] = cost_amode_step2[j]; |
3361 | 13.6M | cost_amode_step2[j] = temp; |
3362 | | |
3363 | 13.6M | temp = modes_4x4[i]; |
3364 | 13.6M | modes_4x4[i] = modes_4x4[j]; |
3365 | 13.6M | modes_4x4[j] = temp; |
3366 | 13.6M | } |
3367 | 69.5M | } |
3368 | 27.9M | } |
3369 | | |
3370 | | /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/ |
3371 | 7.05M | best_amode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]]; |
3372 | 7.05M | cost_ang_mode = cost_amode_step2[0]; |
3373 | 7.05M | ps_cu_node->best_satd = cost_ang_mode - ps_ctxt->au2_mode_bits_satd_cost[best_amode]; |
3374 | 7.05M | ps_cu_node->best_cost = cost_amode_step2[0]; |
3375 | 7.05M | ps_cu_node->best_mode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]]; |
3376 | 7.05M | ps_cu_node->best_satd = |
3377 | 7.05M | ps_cu_node->best_cost - ps_ctxt->au2_mode_bits_satd_cost[ps_cu_node->best_mode]; |
3378 | | |
3379 | | /*Accumalate best mode bits cost for RC*/ |
3380 | 7.05M | ps_cu_node->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_satd[ps_cu_node->best_mode]; |
3381 | | |
3382 | | /* Store the best three candidates */ |
3383 | 28.2M | for(i = 0; i < 3; i++) |
3384 | 21.1M | { |
3385 | 21.1M | best_costs_4x4[i] = cost_amode_step2[i]; |
3386 | 21.1M | best_modes_4x4[i] = ps_ctxt->au1_modes_to_eval[modes_4x4[i]]; |
3387 | 21.1M | } |
3388 | 7.05M | } |
3389 | | |
3390 | 7.05M | return; |
3391 | 7.05M | } |