/src/libhevc/encoder/ihevce_decomp_pre_intra_pass.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*! |
22 | | ****************************************************************************** |
23 | | * \file ihevce_decomp_pre_intra_pass.c |
24 | | * |
25 | | * \brief |
26 | | * This file contains definitions related to frame decomposition done during |
27 | | * pre intra processing |
28 | | * |
29 | | * \date |
30 | | * 19/02/2013 |
31 | | * |
32 | | * \author |
33 | | * Ittiam |
34 | | * |
35 | | * List of Functions |
36 | | * ihevce_intra_populate_mode_bits_cost() |
37 | | * ihevce_8x8_sad_computer() |
38 | | * ihevce_4x4_sad_computer() |
39 | | * ihevce_ed_4x4_find_best_modes() |
40 | | * ihevce_ed_calc_4x4_blk() |
41 | | * ihevce_ed_calc_8x8_blk() |
42 | | * ihevce_ed_calc_incomplete_ctb() |
43 | | * ihevce_cu_level_qp_mod() |
44 | | * ihevce_ed_calc_ctb() |
45 | | * ihevce_ed_frame_init() |
46 | | * ihevce_scale_by_2() |
47 | | * ihevce_decomp_pre_intra_process_row() |
48 | | * ihevce_decomp_pre_intra_process() |
49 | | * ihevce_decomp_pre_intra_get_num_mem_recs() |
50 | | * ihevce_decomp_pre_intra_get_mem_recs() |
51 | | * ihevce_decomp_pre_intra_init() |
52 | | * ihevce_decomp_pre_intra_frame_init() |
53 | | * ihevce_merge_sort() |
54 | | * ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit() |
55 | | * |
56 | | ****************************************************************************** |
57 | | */ |
58 | | |
59 | | /*****************************************************************************/ |
60 | | /* File Includes */ |
61 | | /*****************************************************************************/ |
62 | | /* System include files */ |
63 | | #include <stdio.h> |
64 | | #include <string.h> |
65 | | #include <stdlib.h> |
66 | | #include <assert.h> |
67 | | #include <stdarg.h> |
68 | | #include <stdint.h> |
69 | | #include <math.h> |
70 | | #include <limits.h> |
71 | | |
72 | | /* User include files */ |
73 | | #include "ihevc_typedefs.h" |
74 | | #include "itt_video_api.h" |
75 | | #include "ihevce_api.h" |
76 | | |
77 | | #include "rc_cntrl_param.h" |
78 | | #include "rc_frame_info_collector.h" |
79 | | #include "rc_look_ahead_params.h" |
80 | | |
81 | | #include "ihevc_defs.h" |
82 | | #include "ihevc_debug.h" |
83 | | #include "ihevc_structs.h" |
84 | | #include "ihevc_platform_macros.h" |
85 | | #include "ihevc_deblk.h" |
86 | | #include "ihevc_itrans_recon.h" |
87 | | #include "ihevc_chroma_itrans_recon.h" |
88 | | #include "ihevc_chroma_intra_pred.h" |
89 | | #include "ihevc_intra_pred.h" |
90 | | #include "ihevc_inter_pred.h" |
91 | | #include "ihevc_mem_fns.h" |
92 | | #include "ihevc_padding.h" |
93 | | #include "ihevc_weighted_pred.h" |
94 | | #include "ihevc_sao.h" |
95 | | #include "ihevc_resi_trans.h" |
96 | | #include "ihevc_quant_iquant_ssd.h" |
97 | | #include "ihevc_cabac_tables.h" |
98 | | |
99 | | #include "ihevce_defs.h" |
100 | | #include "ihevce_hle_interface.h" |
101 | | #include "ihevce_lap_enc_structs.h" |
102 | | #include "ihevce_multi_thrd_structs.h" |
103 | | #include "ihevce_multi_thrd_funcs.h" |
104 | | #include "ihevce_me_common_defs.h" |
105 | | #include "ihevce_had_satd.h" |
106 | | #include "ihevce_error_codes.h" |
107 | | #include "ihevce_bitstream.h" |
108 | | #include "ihevce_cabac.h" |
109 | | #include "ihevce_rdoq_macros.h" |
110 | | #include "ihevce_function_selector.h" |
111 | | #include "ihevce_enc_structs.h" |
112 | | #include "ihevce_entropy_structs.h" |
113 | | #include "ihevce_cmn_utils_instr_set_router.h" |
114 | | #include "ihevce_ipe_instr_set_router.h" |
115 | | #include "ihevce_decomp_pre_intra_structs.h" |
116 | | #include "ihevce_decomp_pre_intra_pass.h" |
117 | | #include "ihevce_enc_loop_structs.h" |
118 | | #include "hme_datatype.h" |
119 | | #include "hme_interface.h" |
120 | | #include "hme_common_defs.h" |
121 | | #include "ihevce_global_tables.h" |
122 | | |
123 | | /*****************************************************************************/ |
124 | | /* Global variables */ |
125 | | /*****************************************************************************/ |
126 | | |
127 | | /** |
128 | | ***************************************************************************** |
129 | | * @brief subset of intra modes to be evaluated during pre enc intra process |
130 | | ***************************************************************************** |
131 | | */ |
132 | | static const UWORD8 gau1_modes_to_eval[11] = { 0, 1, 26, 2, 6, 10, 14, 18, 22, 30, 34 }; |
133 | | |
134 | | /** |
135 | | ***************************************************************************** |
136 | | * @brief list of pointers to luma intra pred functions |
137 | | ***************************************************************************** |
138 | | */ |
139 | | pf_intra_pred g_apf_lum_ip[NUM_IP_FUNCS]; |
140 | | |
141 | | /*****************************************************************************/ |
142 | | /* Function Definitions */ |
143 | | /*****************************************************************************/ |
144 | | |
145 | | /*! |
146 | | ****************************************************************************** |
147 | | * \if Function name : ihevce_intra_populate_mode_bits_cost \endif |
148 | | * |
149 | | * \brief: look-up table of cost of signalling an intra mode in the |
150 | | * bitstream |
151 | | * |
152 | | ***************************************************************************** |
153 | | */ |
154 | | static void ihevce_intra_populate_mode_bits_cost(UWORD16 *mode_bits_cost, WORD32 lambda) |
155 | 0 | { |
156 | 0 | WORD32 i; |
157 | | // 5.5 * lambda |
158 | 0 | UWORD16 five_bits_cost = COMPUTE_RATE_COST_CLIP30(11, lambda, (LAMBDA_Q_SHIFT + 1)); |
159 | |
|
160 | 0 | for(i = 0; i < NUM_MODES; i++) |
161 | 0 | { |
162 | 0 | mode_bits_cost[i] = five_bits_cost; |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | | /*! |
167 | | ****************************************************************************** |
168 | | * \if Function name : ihevce_8x8_sad_computer \endif |
169 | | * |
170 | | * \brief: compute sad between 2 8x8 blocks |
171 | | * |
172 | | ***************************************************************************** |
173 | | */ |
174 | | UWORD16 ihevce_8x8_sad_computer(UWORD8 *src, UWORD8 *pred, WORD32 src_strd, WORD32 pred_strd) |
175 | 0 | { |
176 | 0 | UWORD16 sad = 0; |
177 | 0 | WORD32 i, j; |
178 | |
|
179 | 0 | for(i = 0; i < 8; i++) |
180 | 0 | { |
181 | 0 | for(j = 0; j < 8; j++) |
182 | 0 | { |
183 | 0 | sad += ABS(src[j] - pred[j]); |
184 | 0 | } |
185 | 0 | src += src_strd; |
186 | 0 | pred += pred_strd; |
187 | 0 | } |
188 | |
|
189 | 0 | return sad; |
190 | 0 | } |
191 | | |
192 | | /*! |
193 | | ****************************************************************************** |
194 | | * \if Function name : ihevce_4x4_sad_computer \endif |
195 | | * |
196 | | * \brief: compute sad between 2 4x4 blocks |
197 | | * |
198 | | ***************************************************************************** |
199 | | */ |
200 | | UWORD16 ihevce_4x4_sad_computer(UWORD8 *src, UWORD8 *pred, WORD32 src_strd, WORD32 pred_strd) |
201 | 0 | { |
202 | 0 | UWORD16 sad = 0; |
203 | 0 | WORD32 i, j; |
204 | |
|
205 | 0 | for(i = 0; i < 4; i++) |
206 | 0 | { |
207 | 0 | for(j = 0; j < 4; j++) |
208 | 0 | { |
209 | 0 | sad += ABS(src[j] - pred[j]); |
210 | 0 | } |
211 | 0 | src += src_strd; |
212 | 0 | pred += pred_strd; |
213 | 0 | } |
214 | |
|
215 | 0 | return sad; |
216 | 0 | } |
217 | | |
218 | | /*! |
219 | | ****************************************************************************** |
220 | | * \if Function name : ihevce_ed_4x4_find_best_modes \endif |
221 | | * |
222 | | * \brief: evaluate input 4x4 block for pre-selected list intra modes and |
223 | | * return best sad, cost |
224 | | * |
225 | | ***************************************************************************** |
226 | | */ |
227 | | void ihevce_ed_4x4_find_best_modes( |
228 | | UWORD8 *pu1_src, |
229 | | WORD32 src_stride, |
230 | | UWORD8 *ref, |
231 | | UWORD16 *mode_bits_cost, |
232 | | UWORD8 *pu1_best_modes, |
233 | | WORD32 *pu1_best_sad_costs, |
234 | | WORD32 u1_low_resol, |
235 | | FT_SAD_COMPUTER *pf_4x4_sad_computer) |
236 | 0 | { |
237 | 0 | WORD32 i; |
238 | 0 | UWORD8 mode = 0, best_amode = 0, best_nmode = 0; |
239 | 0 | UWORD8 pred[16]; |
240 | 0 | WORD32 sad = 0; |
241 | 0 | WORD32 sad_cost = 0; |
242 | 0 | WORD32 best_asad_cost = 0xFFFFF; |
243 | 0 | WORD32 best_nsad_cost = 0xFFFFF; |
244 | | |
245 | | /* If lower layers, l1 or l2, all the 11 modes are evaluated */ |
246 | | /* If L0 layer, all modes excluding DC and Planar are evaluated */ |
247 | 0 | if(1 == u1_low_resol) |
248 | 0 | i = 0; |
249 | 0 | else |
250 | 0 | i = 2; |
251 | | |
252 | | /* Find the best non-angular and angular mode till level 4 */ |
253 | 0 | for(; i < 11; i++) |
254 | 0 | { |
255 | 0 | mode = gau1_modes_to_eval[i]; |
256 | 0 | g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
257 | 0 | sad = pf_4x4_sad_computer(pu1_src, pred, src_stride, 4); |
258 | 0 | sad_cost = sad + mode_bits_cost[mode]; |
259 | 0 | if(mode < 2) |
260 | 0 | { |
261 | 0 | if(sad_cost < best_nsad_cost) |
262 | 0 | { |
263 | 0 | best_nmode = mode; |
264 | 0 | best_nsad_cost = sad_cost; |
265 | 0 | } |
266 | 0 | } |
267 | 0 | else |
268 | 0 | { |
269 | 0 | if(sad_cost < best_asad_cost) |
270 | 0 | { |
271 | 0 | best_amode = mode; |
272 | 0 | best_asad_cost = sad_cost; |
273 | 0 | } |
274 | 0 | } |
275 | 0 | } |
276 | |
|
277 | 0 | pu1_best_modes[0] = best_amode; |
278 | 0 | pu1_best_sad_costs[0] = best_asad_cost; |
279 | |
|
280 | 0 | if(1 == u1_low_resol) |
281 | 0 | { |
282 | 0 | pu1_best_modes[1] = best_nmode; |
283 | 0 | pu1_best_sad_costs[1] = best_nsad_cost; |
284 | 0 | } |
285 | 0 | } |
286 | | |
287 | | /*! |
288 | | ****************************************************************************** |
289 | | * \if Function name : ihevce_ed_calc_4x4_blk \endif |
290 | | * |
291 | | * \brief: evaluate input 4x4 block for all intra modes and return best sad & |
292 | | * cost |
293 | | * |
294 | | ***************************************************************************** |
295 | | */ |
296 | | static void ihevce_ed_calc_4x4_blk( |
297 | | ihevce_ed_blk_t *ps_ed, |
298 | | UWORD8 *pu1_src, |
299 | | WORD32 src_stride, |
300 | | UWORD8 *ref, |
301 | | UWORD16 *mode_bits_cost, |
302 | | WORD32 *pi4_best_satd, |
303 | | WORD32 i4_quality_preset, |
304 | | WORD32 *pi4_best_sad_cost, |
305 | | ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list) |
306 | 0 | { |
307 | 0 | WORD32 i, i_end; |
308 | 0 | UWORD8 mode, best_amode, best_nmode; |
309 | 0 | UWORD8 pred[16]; |
310 | 0 | UWORD16 sad; |
311 | 0 | WORD32 sad_cost = 0; |
312 | 0 | WORD32 best_asad_cost = 0xFFFFF; |
313 | 0 | WORD32 best_nsad_cost = 0xFFFFF; |
314 | 0 | UWORD8 au1_best_modes[2]; |
315 | 0 | WORD32 ai4_best_sad_costs[2]; |
316 | | /* L1/L2 resolution hence low resolution enable */ |
317 | 0 | const WORD32 u1_low_resol = 1; |
318 | 0 | UWORD8 modes_to_eval[2]; |
319 | |
|
320 | 0 | ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes( |
321 | 0 | pu1_src, |
322 | 0 | src_stride, |
323 | 0 | ref, |
324 | 0 | mode_bits_cost, |
325 | 0 | au1_best_modes, |
326 | 0 | ai4_best_sad_costs, |
327 | 0 | u1_low_resol, |
328 | 0 | ps_ipe_optimised_function_list->pf_4x4_sad_computer); |
329 | |
|
330 | 0 | best_nmode = au1_best_modes[1]; |
331 | 0 | best_amode = au1_best_modes[0]; |
332 | 0 | best_nsad_cost = ai4_best_sad_costs[1]; |
333 | 0 | best_asad_cost = ai4_best_sad_costs[0]; |
334 | 0 | *pi4_best_satd = best_asad_cost - mode_bits_cost[best_amode]; |
335 | | |
336 | | /* Around best level 4 angular mode, search for best level 2 mode */ |
337 | 0 | modes_to_eval[0] = best_amode - 2; |
338 | 0 | modes_to_eval[1] = best_amode + 2; |
339 | 0 | i = 0; |
340 | 0 | i_end = 2; |
341 | 0 | if(best_amode == 2) |
342 | 0 | i = 1; |
343 | 0 | else if(best_amode == 34) |
344 | 0 | i_end = 1; |
345 | 0 | for(; i < i_end; i++) |
346 | 0 | { |
347 | 0 | mode = modes_to_eval[i]; |
348 | 0 | g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
349 | 0 | sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, pred, src_stride, 4); |
350 | 0 | sad_cost = sad + mode_bits_cost[mode]; |
351 | 0 | if(sad_cost < best_asad_cost) |
352 | 0 | { |
353 | 0 | best_amode = mode; |
354 | 0 | best_asad_cost = sad_cost; |
355 | 0 | *pi4_best_satd = sad; |
356 | 0 | } |
357 | 0 | } |
358 | |
|
359 | 0 | if(i4_quality_preset < IHEVCE_QUALITY_P4) |
360 | 0 | { |
361 | | /* Around best level 2 angular mode, search for best level 1 mode */ |
362 | 0 | modes_to_eval[0] = best_amode - 1; |
363 | 0 | modes_to_eval[1] = best_amode + 1; |
364 | 0 | i = 0; |
365 | 0 | i_end = 2; |
366 | 0 | if(best_amode == 2) |
367 | 0 | i = 1; |
368 | 0 | else if(best_amode == 34) |
369 | 0 | i_end = 1; |
370 | 0 | for(; i < i_end; i++) |
371 | 0 | { |
372 | 0 | mode = modes_to_eval[i]; |
373 | 0 | g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode); |
374 | 0 | sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, pred, src_stride, 4); |
375 | 0 | sad_cost = sad + mode_bits_cost[mode]; |
376 | 0 | if(sad_cost < best_asad_cost) |
377 | 0 | { |
378 | 0 | best_amode = mode; |
379 | 0 | best_asad_cost = sad_cost; |
380 | 0 | *pi4_best_satd = sad; |
381 | 0 | } |
382 | 0 | } |
383 | 0 | } |
384 | |
|
385 | 0 | if(best_asad_cost < best_nsad_cost) |
386 | 0 | { |
387 | 0 | ps_ed->best_mode = best_amode; |
388 | 0 | *pi4_best_sad_cost = best_asad_cost; |
389 | 0 | } |
390 | 0 | else |
391 | 0 | { |
392 | 0 | ps_ed->best_mode = best_nmode; |
393 | 0 | *pi4_best_sad_cost = best_nsad_cost; |
394 | 0 | } |
395 | 0 | ps_ed->intra_or_inter = 0; |
396 | 0 | ps_ed->merge_success = 0; |
397 | 0 | } |
398 | | |
399 | | /*! |
400 | | ****************************************************************************** |
401 | | * \if Function name : ihevce_ed_calc_8x8_blk \endif |
402 | | * |
403 | | * \brief: evaluate input 8x8 block for intra modes basing on the intra mode |
404 | | * decisions made at 4x4 level. This function also makes a decision whether |
405 | | * to split blk in to 4x4 partitions or not. |
406 | | * |
407 | | ***************************************************************************** |
408 | | */ |
409 | | static void ihevce_ed_calc_8x8_blk( |
410 | | ihevce_ed_ctxt_t *ps_ed_ctxt, |
411 | | ihevce_ed_blk_t *ps_ed_8x8, |
412 | | UWORD8 *pu1_src, |
413 | | WORD32 src_stride, |
414 | | WORD32 *nbr_flags_ptr, |
415 | | WORD32 lambda, |
416 | | WORD32 *pi4_best_satd, |
417 | | WORD32 i4_layer_id, |
418 | | WORD32 i4_quality_preset, |
419 | | WORD32 *pi4_best_sad_cost_8x8_l1_ipe, |
420 | | WORD32 *pi4_best_sad_8x8_l1_ipe, |
421 | | ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list, |
422 | | ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
423 | 0 | { |
424 | 0 | ihevce_ed_blk_t *ps_ed_4x4 = ps_ed_8x8; |
425 | 0 | UWORD8 *pu1_src_arr[4]; |
426 | 0 | WORD32 ai4_4x4_best_sad_cost[4]; |
427 | 0 | WORD32 nbr_flags_c, nbr_flags_r; |
428 | 0 | UWORD8 *pu1_src_4x4; |
429 | 0 | WORD32 i, j; |
430 | 0 | func_selector_t *ps_func_selector = ps_ed_ctxt->ps_func_selector; |
431 | 0 | ihevc_intra_pred_luma_ref_substitution_ft *pf_intra_pred_luma_ref_substitution = |
432 | 0 | ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr; |
433 | | |
434 | | /* linearize ref samples for ipe of 8x8 block */ |
435 | 0 | nbr_flags_c = nbr_flags_ptr[0]; |
436 | 0 | nbr_flags_r = nbr_flags_ptr[1]; |
437 | 0 | if(CHECK_TR_AVAILABLE(nbr_flags_r)) |
438 | 0 | { |
439 | 0 | SET_TR_AVAILABLE(nbr_flags_c); |
440 | 0 | } |
441 | 0 | else |
442 | 0 | { |
443 | 0 | SET_TR_UNAVAILABLE(nbr_flags_c); |
444 | 0 | } |
445 | |
|
446 | 0 | pf_intra_pred_luma_ref_substitution( |
447 | 0 | pu1_src - src_stride - 1, |
448 | 0 | pu1_src - src_stride, |
449 | 0 | pu1_src - 1, |
450 | 0 | src_stride, |
451 | 0 | 8, |
452 | 0 | nbr_flags_c, |
453 | 0 | &ps_ed_ctxt->au1_ref_8x8[0][0], |
454 | 0 | 0); |
455 | |
|
456 | 0 | for(i = 0; i < 2; i++) |
457 | 0 | { |
458 | 0 | pu1_src_4x4 = pu1_src + i * 4 * src_stride; |
459 | 0 | for(j = 0; j < 2; j++) |
460 | 0 | { |
461 | 0 | WORD32 i4_best_satd; |
462 | |
|
463 | 0 | pu1_src_arr[i * 2 + j] = pu1_src_4x4; |
464 | 0 | nbr_flags_c = nbr_flags_ptr[i * 8 + j]; |
465 | | |
466 | | /* linearize ref samples for ipe of 4x4 block */ |
467 | 0 | pf_intra_pred_luma_ref_substitution( |
468 | 0 | pu1_src_4x4 - src_stride - 1, |
469 | 0 | pu1_src_4x4 - src_stride, |
470 | 0 | pu1_src_4x4 - 1, |
471 | 0 | src_stride, |
472 | 0 | 4, |
473 | 0 | nbr_flags_c, |
474 | 0 | &ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0], |
475 | 0 | 0); |
476 | | |
477 | | /* populates mode bits cost */ |
478 | 0 | ihevce_intra_populate_mode_bits_cost( |
479 | 0 | &ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0], lambda); |
480 | |
|
481 | 0 | ihevce_ed_calc_4x4_blk( |
482 | 0 | ps_ed_4x4, |
483 | 0 | pu1_src_4x4, |
484 | 0 | src_stride, |
485 | 0 | &ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0], |
486 | 0 | &ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0], |
487 | 0 | &i4_best_satd, |
488 | 0 | i4_quality_preset, |
489 | 0 | &ai4_4x4_best_sad_cost[i * 2 + j], |
490 | 0 | ps_ipe_optimised_function_list); |
491 | |
|
492 | 0 | pu1_src_4x4 += 4; |
493 | 0 | ps_ed_4x4 += 1; |
494 | 0 | } |
495 | 0 | } |
496 | | |
497 | | /* 8x8 merge */ |
498 | 0 | { |
499 | 0 | UWORD8 pred[64]; |
500 | 0 | WORD32 merge_success; |
501 | 0 | WORD32 sad, satd, cost; |
502 | 0 | UWORD16 u2_sum_best_4x4_sad_cost = 0; |
503 | 0 | UWORD16 u2_sum_best_4x4_satd_cost = 0; |
504 | 0 | WORD32 i4_best_8x8_sad, i4_best_8x8_satd = 0; |
505 | 0 | UWORD16 u2_best_8x8_cost = (UWORD16)(-1); |
506 | 0 | UWORD8 u1_best_8x8_mode; |
507 | 0 | UWORD8 modes_to_eval[6]; |
508 | 0 | UWORD8 u1_cond_4x4_satd; |
509 | 0 | UWORD8 mode; |
510 | | |
511 | | /* init */ |
512 | 0 | ps_ed_4x4 = ps_ed_8x8; |
513 | 0 | u1_best_8x8_mode = mode = ps_ed_4x4[0].best_mode; |
514 | 0 | merge_success = |
515 | 0 | (((ps_ed_4x4[0].best_mode == ps_ed_4x4[1].best_mode) + |
516 | 0 | (ps_ed_4x4[0].best_mode == ps_ed_4x4[2].best_mode) + |
517 | 0 | (ps_ed_4x4[0].best_mode == ps_ed_4x4[3].best_mode)) == 3); |
518 | 0 | *pi4_best_satd = 0; |
519 | |
|
520 | 0 | for(i = 0; i < 4; i++) |
521 | 0 | { |
522 | 0 | u2_sum_best_4x4_sad_cost += ai4_4x4_best_sad_cost[i]; |
523 | 0 | modes_to_eval[i] = ps_ed_4x4[i].best_mode; |
524 | 0 | } |
525 | |
|
526 | 0 | u1_cond_4x4_satd = ((1 == i4_layer_id) || (!merge_success && i4_quality_preset < IHEVCE_QUALITY_P4)); |
527 | 0 | if(u1_cond_4x4_satd) |
528 | 0 | { |
529 | | /* Get SATD for 4x4 blocks */ |
530 | 0 | for(i = 0; i < 4; i++) |
531 | 0 | { |
532 | 0 | mode = modes_to_eval[i]; |
533 | 0 | g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
534 | 0 | &ps_ed_ctxt->au1_ref_full_ctb[i][0], 0, &pred[0], 4, 4, mode); |
535 | |
|
536 | 0 | satd = ps_cmn_utils_optimised_function_list->pf_HAD_4x4_8bit( |
537 | 0 | pu1_src_arr[i], src_stride, &pred[0], 4, NULL, 0); |
538 | |
|
539 | 0 | (ps_ed_4x4 + i)->i4_4x4_satd = satd; |
540 | |
|
541 | 0 | u2_sum_best_4x4_satd_cost += |
542 | 0 | (satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]); |
543 | 0 | *pi4_best_satd += satd; |
544 | 0 | } |
545 | 0 | } |
546 | |
|
547 | 0 | if(!merge_success) |
548 | 0 | { |
549 | 0 | UWORD8 i1_start; /* no of modes to evaluate */ |
550 | 0 | UWORD8 ai1_modes[6]; |
551 | 0 | WORD32 i4_merge_success_stage2 = 0; |
552 | | |
553 | | /* Prepare 6 candidates for 8x8 block. Two are DC and planar */ |
554 | 0 | ai1_modes[4] = 0; |
555 | 0 | ai1_modes[5] = 1; |
556 | 0 | i1_start = 4; |
557 | | |
558 | | /* Assign along with removing duplicates rest 4 candidates. */ |
559 | 0 | for(i = 3; i >= 0; i--) |
560 | 0 | { |
561 | 0 | WORD8 i1_fresh_mode_flag = 1; |
562 | |
|
563 | 0 | mode = modes_to_eval[i]; |
564 | | /* Check if duplicate already exists in ai1_modes */ |
565 | 0 | for(j = i1_start; j < 6; j++) |
566 | 0 | { |
567 | 0 | if(mode == ai1_modes[j]) |
568 | 0 | i1_fresh_mode_flag = 0; |
569 | 0 | } |
570 | 0 | if(i1_fresh_mode_flag) |
571 | 0 | { |
572 | 0 | i1_start--; |
573 | 0 | ai1_modes[i1_start] = mode; |
574 | 0 | } |
575 | 0 | } |
576 | |
|
577 | 0 | if(i4_quality_preset < IHEVCE_QUALITY_P4) |
578 | 0 | { |
579 | | // 7.5 * lambda to incorporate transform flags |
580 | 0 | u2_sum_best_4x4_satd_cost += |
581 | 0 | (COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1))); |
582 | | |
583 | | /* loop over all modes for calculating SATD */ |
584 | 0 | for(i = i1_start; i < 6; i++) |
585 | 0 | { |
586 | 0 | mode = ai1_modes[i]; |
587 | 0 | g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
588 | 0 | &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode); |
589 | |
|
590 | 0 | satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit( |
591 | 0 | pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0); |
592 | |
|
593 | 0 | cost = satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]; |
594 | | |
595 | | /* Update data corresponding to least 8x8 cost */ |
596 | 0 | if(cost <= u2_best_8x8_cost) |
597 | 0 | { |
598 | 0 | u2_best_8x8_cost = cost; |
599 | 0 | i4_best_8x8_satd = satd; |
600 | 0 | u1_best_8x8_mode = mode; |
601 | 0 | } |
602 | 0 | } |
603 | | |
604 | | /* 8x8 vs 4x4 decision based on SATD values */ |
605 | 0 | if((u2_best_8x8_cost <= u2_sum_best_4x4_satd_cost) || (u2_best_8x8_cost <= 300)) |
606 | 0 | { |
607 | 0 | i4_merge_success_stage2 = 1; |
608 | 0 | } |
609 | | |
610 | | /* Find the SAD based cost for 8x8 block for best mode */ |
611 | 0 | if(1 == i4_layer_id) |
612 | 0 | { |
613 | 0 | UWORD8 i4_best_8x8_mode = u1_best_8x8_mode; |
614 | 0 | WORD32 i4_best_8x8_sad_curr; |
615 | |
|
616 | 0 | g_apf_lum_ip[g_i4_ip_funcs[i4_best_8x8_mode]]( |
617 | 0 | &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, i4_best_8x8_mode); |
618 | |
|
619 | 0 | i4_best_8x8_sad_curr = ps_ipe_optimised_function_list->pf_8x8_sad_computer( |
620 | 0 | pu1_src_arr[0], &pred[0], src_stride, 8); |
621 | |
|
622 | 0 | *pi4_best_sad_cost_8x8_l1_ipe = |
623 | 0 | i4_best_8x8_sad_curr + |
624 | 0 | ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][i4_best_8x8_mode]; |
625 | 0 | *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr; |
626 | 0 | } |
627 | 0 | } |
628 | 0 | else /*If high_speed or extreme speed*/ |
629 | 0 | { |
630 | | // 7.5 * lambda to incorporate transform flags |
631 | 0 | u2_sum_best_4x4_sad_cost += |
632 | 0 | (COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1))); |
633 | | |
634 | | /*Loop over all modes for calculating SAD*/ |
635 | 0 | for(i = i1_start; i < 6; i++) |
636 | 0 | { |
637 | 0 | mode = ai1_modes[i]; |
638 | 0 | g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
639 | 0 | &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode); |
640 | |
|
641 | 0 | sad = ps_ipe_optimised_function_list->pf_8x8_sad_computer( |
642 | 0 | pu1_src_arr[0], &pred[0], src_stride, 8); |
643 | |
|
644 | 0 | cost = sad + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]; |
645 | | |
646 | | /*Find the data correspoinding to least cost */ |
647 | 0 | if(cost <= u2_best_8x8_cost) |
648 | 0 | { |
649 | 0 | u2_best_8x8_cost = cost; |
650 | 0 | i4_best_8x8_sad = sad; |
651 | 0 | u1_best_8x8_mode = mode; |
652 | 0 | } |
653 | 0 | } |
654 | | |
655 | | /* 8x8 vs 4x4 decision based on SAD values */ |
656 | 0 | if((u2_best_8x8_cost <= u2_sum_best_4x4_sad_cost) || (u2_best_8x8_cost <= 300)) |
657 | 0 | { |
658 | 0 | i4_merge_success_stage2 = 1; |
659 | 0 | if(1 == i4_layer_id) |
660 | 0 | { |
661 | 0 | g_apf_lum_ip[g_i4_ip_funcs[u1_best_8x8_mode]]( |
662 | 0 | &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, u1_best_8x8_mode); |
663 | 0 | i4_best_8x8_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit( |
664 | 0 | pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0); |
665 | 0 | } |
666 | 0 | } |
667 | |
|
668 | 0 | if(1 == i4_layer_id) |
669 | 0 | { |
670 | 0 | *pi4_best_sad_cost_8x8_l1_ipe = u2_best_8x8_cost; |
671 | 0 | *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad; |
672 | 0 | } |
673 | 0 | } |
674 | 0 | if(i4_merge_success_stage2) |
675 | 0 | { |
676 | 0 | ps_ed_4x4->merge_success = 1; |
677 | 0 | ps_ed_4x4->best_merge_mode = u1_best_8x8_mode; |
678 | 0 | *pi4_best_satd = i4_best_8x8_satd; |
679 | 0 | } |
680 | 0 | } |
681 | 0 | else |
682 | 0 | { |
683 | 0 | ps_ed_4x4->merge_success = 1; |
684 | 0 | ps_ed_4x4->best_merge_mode = u1_best_8x8_mode; |
685 | |
|
686 | 0 | if(1 == i4_layer_id) |
687 | 0 | { |
688 | 0 | mode = u1_best_8x8_mode; |
689 | 0 | g_apf_lum_ip[g_i4_ip_funcs[mode]]( |
690 | 0 | &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred[0], 8, 8, mode); |
691 | |
|
692 | 0 | i4_best_8x8_sad = ps_ipe_optimised_function_list->pf_8x8_sad_computer( |
693 | 0 | pu1_src_arr[0], &pred[0], src_stride, 8); |
694 | |
|
695 | 0 | *pi4_best_sad_cost_8x8_l1_ipe = |
696 | 0 | i4_best_8x8_sad + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]; |
697 | 0 | *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad; |
698 | |
|
699 | 0 | i4_best_8x8_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit( |
700 | 0 | pu1_src_arr[0], src_stride, &pred[0], 8, NULL, 0); |
701 | 0 | } |
702 | 0 | *pi4_best_satd = i4_best_8x8_satd; |
703 | 0 | } |
704 | 0 | } |
705 | 0 | } |
706 | | |
707 | | /*! |
708 | | ****************************************************************************** |
709 | | * \if Function name : ihevce_ed_calc_ctb \endif |
710 | | * |
711 | | * \brief: performs L1/L2 8x8 and 4x4 intra mode analysis |
712 | | * |
713 | | ***************************************************************************** |
714 | | */ |
715 | | void ihevce_ed_calc_ctb( |
716 | | ihevce_ed_ctxt_t *ps_ed_ctxt, |
717 | | ihevce_ed_blk_t *ps_ed_ctb, |
718 | | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, |
719 | | UWORD8 *pu1_src, |
720 | | WORD32 src_stride, |
721 | | WORD32 num_4x4_blks_x, |
722 | | WORD32 num_4x4_blks_y, |
723 | | WORD32 *nbr_flags, |
724 | | WORD32 i4_layer_id, |
725 | | ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list, |
726 | | ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
727 | 0 | { |
728 | 0 | ihevce_ed_blk_t *ps_ed_8x8; |
729 | 0 | UWORD8 *pu1_src_8x8; |
730 | 0 | WORD32 *nbr_flags_ptr; |
731 | 0 | WORD32 lambda = ps_ed_ctxt->lambda; |
732 | 0 | WORD32 i, j; |
733 | 0 | WORD32 z_scan_idx = 0; |
734 | 0 | WORD32 z_scan_act_idx = 0; |
735 | |
|
736 | 0 | if(i4_layer_id == 1) |
737 | 0 | { |
738 | 0 | WORD32 i4_i; |
739 | |
|
740 | 0 | for(i4_i = 0; i4_i < 64; i4_i++) |
741 | 0 | { |
742 | 0 | (ps_ed_ctb + i4_i)->i4_4x4_satd = -1; |
743 | 0 | } |
744 | |
|
745 | 0 | for(i4_i = 0; i4_i < 16; i4_i++) |
746 | 0 | { |
747 | 0 | ps_ed_ctb_l1->i4_sum_4x4_satd[i4_i] = -2; |
748 | 0 | ps_ed_ctb_l1->i4_min_4x4_satd[i4_i] = 0x7FFFFFFF; |
749 | 0 | ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] = -2; |
750 | 0 | ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] = -2; |
751 | 0 | } |
752 | |
|
753 | 0 | for(i4_i = 0; i4_i < 4; i4_i++) |
754 | 0 | { |
755 | 0 | ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] = -2; |
756 | 0 | ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] = -2; |
757 | 0 | ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] = -2; |
758 | 0 | } |
759 | 0 | ps_ed_ctb_l1->i4_32x32_satd[0][0] = -2; |
760 | 0 | ps_ed_ctb_l1->i4_32x32_satd[0][1] = -2; |
761 | 0 | ps_ed_ctb_l1->i4_32x32_satd[0][2] = -2; |
762 | 0 | ps_ed_ctb_l1->i4_32x32_satd[0][3] = -2; |
763 | |
|
764 | 0 | for(i4_i = 0; i4_i < 16; i4_i++) |
765 | 0 | { |
766 | 0 | ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i4_i] = -1; |
767 | 0 | ps_ed_ctb_l1->i4_sad_cost_me_for_ref[i4_i] = -1; |
768 | 0 | ps_ed_ctb_l1->i4_sad_me_for_ref[i4_i] = -1; |
769 | 0 | ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_i] = -1; |
770 | |
|
771 | 0 | ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_i] = -1; |
772 | |
|
773 | 0 | ps_ed_ctb_l1->i4_best_satd_8x8[i4_i] = -1; |
774 | 0 | ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i4_i] = -1; |
775 | 0 | ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_i] = -1; |
776 | 0 | } |
777 | 0 | } |
778 | |
|
779 | 0 | ASSERT((num_4x4_blks_x & 1) == 0); |
780 | 0 | ASSERT((num_4x4_blks_y & 1) == 0); |
781 | 0 | for(i = 0; i < num_4x4_blks_y / 2; i++) |
782 | 0 | { |
783 | 0 | pu1_src_8x8 = pu1_src + i * 2 * 4 * src_stride; |
784 | 0 | nbr_flags_ptr = &nbr_flags[0] + 2 * 8 * i; |
785 | |
|
786 | 0 | for(j = 0; j < num_4x4_blks_x / 2; j++) |
787 | 0 | { |
788 | 0 | WORD32 i4_best_satd; |
789 | 0 | WORD32 i4_best_sad_cost_8x8_l1_ipe; |
790 | 0 | WORD32 i4_best_sad_8x8_l1_ipe; |
791 | |
|
792 | 0 | z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j * 2]; |
793 | 0 | z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j]; |
794 | 0 | ASSERT(z_scan_act_idx <= 15); |
795 | | |
796 | 0 | ps_ed_8x8 = ps_ed_ctb + z_scan_idx; |
797 | 0 | ihevce_ed_calc_8x8_blk( |
798 | 0 | ps_ed_ctxt, |
799 | 0 | ps_ed_8x8, |
800 | 0 | pu1_src_8x8, |
801 | 0 | src_stride, |
802 | 0 | nbr_flags_ptr, |
803 | 0 | lambda, |
804 | 0 | &i4_best_satd, |
805 | 0 | i4_layer_id, |
806 | 0 | ps_ed_ctxt->i4_quality_preset, |
807 | 0 | &i4_best_sad_cost_8x8_l1_ipe, |
808 | 0 | &i4_best_sad_8x8_l1_ipe, |
809 | 0 | ps_ipe_optimised_function_list, |
810 | 0 | ps_cmn_utils_optimised_function_list); |
811 | 0 | ASSERT(i4_best_satd >= 0); |
812 | | |
813 | 0 | if(i4_layer_id == 1) |
814 | 0 | { |
815 | 0 | ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[z_scan_act_idx] = |
816 | 0 | i4_best_sad_cost_8x8_l1_ipe; |
817 | 0 | ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[z_scan_act_idx] = i4_best_sad_8x8_l1_ipe; |
818 | 0 | ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = i4_best_satd; |
819 | 0 | ps_ed_ctxt->i8_sum_best_satd += i4_best_satd; |
820 | 0 | ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd); |
821 | 0 | } |
822 | 0 | pu1_src_8x8 += 8; |
823 | 0 | nbr_flags_ptr += 2; |
824 | 0 | } |
825 | 0 | } |
826 | 0 | } |
827 | | |
828 | | float fast_log2(float val) |
829 | 0 | { |
830 | 0 | union { float val; int32_t x; } u = { val }; |
831 | 0 | float log_2 = (float)(((u.x >> 23) & 255) - 128); |
832 | |
|
833 | 0 | u.x &= ~(255 << 23); |
834 | 0 | u.x += 127 << 23; |
835 | 0 | log_2 += ((-1.0f / 3) * u.val + 2) * u.val - 2.0f / 3; |
836 | 0 | return log_2; |
837 | 0 | } |
838 | | |
839 | | /*! |
840 | | ****************************************************************************** |
841 | | * \if Function name : ihevce_cu_level_qp_mod \endif |
842 | | * |
843 | | * \brief: Performs CU level QP modulation |
844 | | * |
845 | | ***************************************************************************** |
846 | | */ |
847 | | WORD32 ihevce_cu_level_qp_mod( |
848 | | WORD32 frm_qscale, |
849 | | WORD32 cu_satd, |
850 | | long double frm_avg_activity, |
851 | | float f_mod_strength, |
852 | | WORD32 *pi4_act_factor, |
853 | | WORD32 *pi4_q_scale_mod, |
854 | | rc_quant_t *rc_quant_ctxt) |
855 | 0 | { |
856 | 0 | WORD32 cu_qscale; |
857 | 0 | WORD32 cu_qp; |
858 | |
|
859 | 0 | *pi4_act_factor = (1 << QP_LEVEL_MOD_ACT_FACTOR); |
860 | 0 | if(cu_satd != -1 && (WORD32)frm_avg_activity != 0) |
861 | 0 | { |
862 | 0 | ULWORD64 sq_cur_satd = (cu_satd * cu_satd); |
863 | 0 | float log2_sq_cur_satd = fast_log2(1 + sq_cur_satd); |
864 | 0 | WORD32 qp_offset = f_mod_strength * (log2_sq_cur_satd - frm_avg_activity); |
865 | |
|
866 | 0 | ASSERT(USE_SQRT_AVG_OF_SATD_SQR); |
867 | 0 | qp_offset = CLIP3(qp_offset, MIN_QP_MOD_OFFSET, MAX_QP_MOD_OFFSET); |
868 | 0 | *pi4_act_factor *= gad_look_up_activity[qp_offset + ABS(MIN_QP_MOD_OFFSET)]; |
869 | 0 | ASSERT(*pi4_act_factor > 0); |
870 | 0 | cu_qscale = ((frm_qscale * (*pi4_act_factor)) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))); |
871 | 0 | cu_qscale >>= QP_LEVEL_MOD_ACT_FACTOR; |
872 | 0 | } |
873 | 0 | else |
874 | 0 | { |
875 | 0 | cu_qscale = frm_qscale; |
876 | 0 | } |
877 | 0 | cu_qscale = CLIP3(cu_qscale, rc_quant_ctxt->i2_min_qscale, rc_quant_ctxt->i2_max_qscale); |
878 | 0 | cu_qp = rc_quant_ctxt->pi4_qscale_to_qp[cu_qscale]; |
879 | 0 | cu_qp = CLIP3(cu_qp, rc_quant_ctxt->i2_min_qp, rc_quant_ctxt->i2_max_qp); |
880 | 0 | *pi4_q_scale_mod = cu_qscale; |
881 | |
|
882 | 0 | return (cu_qp); |
883 | 0 | } |
884 | | |
885 | | /*! |
886 | | ****************************************************************************** |
887 | | * \if Function name : ihevce_ed_frame_init \endif |
888 | | * |
889 | | * \brief: Initialize frame context for early decision |
890 | | * |
891 | | ***************************************************************************** |
892 | | */ |
893 | | void ihevce_ed_frame_init(void *pv_ed_ctxt, WORD32 i4_layer_no) |
894 | 0 | { |
895 | 0 | ihevce_ed_ctxt_t *ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt; |
896 | |
|
897 | 0 | g_apf_lum_ip[IP_FUNC_MODE_0] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_planar_fptr; |
898 | 0 | g_apf_lum_ip[IP_FUNC_MODE_1] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_dc_fptr; |
899 | 0 | g_apf_lum_ip[IP_FUNC_MODE_2] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode2_fptr; |
900 | 0 | g_apf_lum_ip[IP_FUNC_MODE_3TO9] = |
901 | 0 | ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr; |
902 | 0 | g_apf_lum_ip[IP_FUNC_MODE_10] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_horz_fptr; |
903 | 0 | g_apf_lum_ip[IP_FUNC_MODE_11TO17] = |
904 | 0 | ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr; |
905 | 0 | g_apf_lum_ip[IP_FUNC_MODE_18_34] = |
906 | 0 | ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr; |
907 | 0 | g_apf_lum_ip[IP_FUNC_MODE_19TO25] = |
908 | 0 | ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr; |
909 | 0 | g_apf_lum_ip[IP_FUNC_MODE_26] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_ver_fptr; |
910 | 0 | g_apf_lum_ip[IP_FUNC_MODE_27TO33] = |
911 | 0 | ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr; |
912 | |
|
913 | 0 | if(i4_layer_no == 1) |
914 | 0 | { |
915 | 0 | ps_ed_ctxt->i8_sum_best_satd = 0; |
916 | 0 | ps_ed_ctxt->i8_sum_sq_best_satd = 0; |
917 | 0 | } |
918 | 0 | } |
919 | | |
920 | | /** |
921 | | ******************************************************************************** |
922 | | * |
923 | | * @brief downscales by 2 in horz and vertical direction, creates output of |
924 | | * size wd/2 * ht/2 |
925 | | * |
926 | | * @param[in] pu1_src : source pointer |
927 | | * @param[in] src_stride : source stride |
928 | | * @param[out] pu1_dst : destination pointer. Starting of a row. |
929 | | * @param[in] dst_stride : destination stride |
930 | | * @param[in] wd : width |
931 | | * @param[in] ht : height |
932 | | * @param[in] pu1_wkg_mem : working memory (atleast of size CEIL16(wd) * ht)) |
933 | | * @param[in] ht_offset : height offset of the block to be scaled |
934 | | * @param[in] block_ht : height of the block to be scaled |
935 | | * @param[in] wd_offset : width offset of the block to be scaled |
936 | | * @param[in] block_wd : width of the block to be scaled |
937 | | * |
938 | | * @return void |
939 | | * |
940 | | * @remarks Assumption made block_ht should me multiple of 2. LANCZOS_SCALER |
941 | | * |
942 | | ******************************************************************************** |
943 | | */ |
944 | | void ihevce_scaling_filter_mxn( |
945 | | UWORD8 *pu1_src, |
946 | | WORD32 src_strd, |
947 | | UWORD8 *pu1_scrtch, |
948 | | WORD32 scrtch_strd, |
949 | | UWORD8 *pu1_dst, |
950 | | WORD32 dst_strd, |
951 | | WORD32 ht, |
952 | | WORD32 wd) |
953 | 0 | { |
954 | 0 | #define FILT_TAP_Q 8 |
955 | 0 | #define N_TAPS 7 |
956 | 0 | const WORD16 i4_ftaps[N_TAPS] = { -18, 0, 80, 132, 80, 0, -18 }; |
957 | 0 | WORD32 i, j; |
958 | 0 | WORD32 tmp; |
959 | 0 | UWORD8 *pu1_src_tmp = pu1_src - 3 * src_strd; |
960 | 0 | UWORD8 *pu1_scrtch_tmp = pu1_scrtch; |
961 | | |
962 | | /* horizontal filtering */ |
963 | 0 | for(i = -3; i < ht + 2; i++) |
964 | 0 | { |
965 | 0 | for(j = 0; j < wd; j += 2) |
966 | 0 | { |
967 | 0 | tmp = (i4_ftaps[3] * pu1_src_tmp[j] + |
968 | 0 | i4_ftaps[2] * (pu1_src_tmp[j - 1] + pu1_src_tmp[j + 1]) + |
969 | 0 | i4_ftaps[1] * (pu1_src_tmp[j + 2] + pu1_src_tmp[j - 2]) + |
970 | 0 | i4_ftaps[0] * (pu1_src_tmp[j + 3] + pu1_src_tmp[j - 3]) + |
971 | 0 | (1 << (FILT_TAP_Q - 1))) >> |
972 | 0 | FILT_TAP_Q; |
973 | 0 | pu1_scrtch_tmp[j >> 1] = CLIP_U8(tmp); |
974 | 0 | } |
975 | 0 | pu1_scrtch_tmp += scrtch_strd; |
976 | 0 | pu1_src_tmp += src_strd; |
977 | 0 | } |
978 | | /* vertical filtering */ |
979 | 0 | pu1_scrtch_tmp = pu1_scrtch + 3 * scrtch_strd; |
980 | 0 | for(i = 0; i < ht; i += 2) |
981 | 0 | { |
982 | 0 | for(j = 0; j < (wd >> 1); j++) |
983 | 0 | { |
984 | 0 | tmp = |
985 | 0 | (i4_ftaps[3] * pu1_scrtch_tmp[j] + |
986 | 0 | i4_ftaps[2] * (pu1_scrtch_tmp[j + scrtch_strd] + pu1_scrtch_tmp[j - scrtch_strd]) + |
987 | 0 | i4_ftaps[1] * |
988 | 0 | (pu1_scrtch_tmp[j + 2 * scrtch_strd] + pu1_scrtch_tmp[j - 2 * scrtch_strd]) + |
989 | 0 | i4_ftaps[0] * |
990 | 0 | (pu1_scrtch_tmp[j + 3 * scrtch_strd] + pu1_scrtch_tmp[j - 3 * scrtch_strd]) + |
991 | 0 | (1 << (FILT_TAP_Q - 1))) >> |
992 | 0 | FILT_TAP_Q; |
993 | 0 | pu1_dst[j] = CLIP_U8(tmp); |
994 | 0 | } |
995 | 0 | pu1_dst += dst_strd; |
996 | 0 | pu1_scrtch_tmp += (scrtch_strd << 1); |
997 | 0 | } |
998 | 0 | } |
999 | | |
1000 | | void ihevce_scale_by_2( |
1001 | | UWORD8 *pu1_src, |
1002 | | WORD32 src_strd, |
1003 | | UWORD8 *pu1_dst, |
1004 | | WORD32 dst_strd, |
1005 | | WORD32 wd, |
1006 | | WORD32 ht, |
1007 | | UWORD8 *pu1_wkg_mem, |
1008 | | WORD32 ht_offset, |
1009 | | WORD32 block_ht, |
1010 | | WORD32 wd_offset, |
1011 | | WORD32 block_wd, |
1012 | | FT_COPY_2D *pf_copy_2d, |
1013 | | FT_SCALING_FILTER_BY_2 *pf_scaling_filter_mxn) |
1014 | 0 | { |
1015 | 0 | #define N_TAPS 7 |
1016 | 0 | #define MAX_BLK_SZ (MAX_CTB_SIZE + ((N_TAPS >> 1) << 1)) |
1017 | 0 | UWORD8 au1_cpy[MAX_BLK_SZ * MAX_BLK_SZ]; |
1018 | 0 | UWORD32 cpy_strd = MAX_BLK_SZ; |
1019 | 0 | UWORD8 *pu1_cpy = au1_cpy + cpy_strd * (N_TAPS >> 1) + (N_TAPS >> 1); |
1020 | |
|
1021 | 0 | UWORD8 *pu1_in, *pu1_out; |
1022 | 0 | WORD32 in_strd, wkg_mem_strd; |
1023 | |
|
1024 | 0 | WORD32 row_start, row_end; |
1025 | 0 | WORD32 col_start, col_end; |
1026 | 0 | WORD32 i, fun_select; |
1027 | 0 | WORD32 ht_tmp, wd_tmp; |
1028 | 0 | FT_SCALING_FILTER_BY_2 *ihevce_scaling_filters[2]; |
1029 | |
|
1030 | 0 | assert((wd & 1) == 0); |
1031 | 0 | assert((ht & 1) == 0); |
1032 | 0 | assert(block_wd <= MAX_CTB_SIZE); |
1033 | 0 | assert(block_ht <= MAX_CTB_SIZE); |
1034 | | |
1035 | | /* function pointers for filtering different dimensions */ |
1036 | 0 | ihevce_scaling_filters[0] = ihevce_scaling_filter_mxn; |
1037 | 0 | ihevce_scaling_filters[1] = pf_scaling_filter_mxn; |
1038 | | |
1039 | | /* handle boundary blks */ |
1040 | 0 | col_start = (wd_offset < (N_TAPS >> 1)) ? 1 : 0; |
1041 | 0 | row_start = (ht_offset < (N_TAPS >> 1)) ? 1 : 0; |
1042 | 0 | col_end = ((wd_offset + block_wd) > (wd - (N_TAPS >> 1))) ? 1 : 0; |
1043 | 0 | row_end = ((ht_offset + block_ht) > (ht - (N_TAPS >> 1))) ? 1 : 0; |
1044 | 0 | if(col_end && (wd % block_wd != 0)) |
1045 | 0 | { |
1046 | 0 | block_wd = (wd % block_wd); |
1047 | 0 | } |
1048 | 0 | if(row_end && (ht % block_ht != 0)) |
1049 | 0 | { |
1050 | 0 | block_ht = (ht % block_ht); |
1051 | 0 | } |
1052 | | |
1053 | | /* boundary blks needs to be padded, copy src to tmp buffer */ |
1054 | 0 | if(col_start || col_end || row_end || row_start) |
1055 | 0 | { |
1056 | 0 | UWORD8 *pu1_src_tmp = pu1_src + wd_offset + ht_offset * src_strd; |
1057 | |
|
1058 | 0 | pu1_cpy -= (3 * (1 - col_start) + cpy_strd * 3 * (1 - row_start)); |
1059 | 0 | pu1_src_tmp -= (3 * (1 - col_start) + src_strd * 3 * (1 - row_start)); |
1060 | 0 | ht_tmp = block_ht + 3 * (1 - row_start) + 3 * (1 - row_end); |
1061 | 0 | wd_tmp = block_wd + 3 * (1 - col_start) + 3 * (1 - col_end); |
1062 | 0 | pf_copy_2d(pu1_cpy, cpy_strd, pu1_src_tmp, src_strd, wd_tmp, ht_tmp); |
1063 | 0 | pu1_in = au1_cpy + cpy_strd * 3 + 3; |
1064 | 0 | in_strd = cpy_strd; |
1065 | 0 | } |
1066 | 0 | else |
1067 | 0 | { |
1068 | 0 | pu1_in = pu1_src + wd_offset + ht_offset * src_strd; |
1069 | 0 | in_strd = src_strd; |
1070 | 0 | } |
1071 | | |
1072 | | /*top padding*/ |
1073 | 0 | if(row_start) |
1074 | 0 | { |
1075 | 0 | UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3; |
1076 | |
|
1077 | 0 | pu1_cpy = au1_cpy + cpy_strd * (3 - 1); |
1078 | 0 | memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
1079 | 0 | pu1_cpy -= cpy_strd; |
1080 | 0 | memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
1081 | 0 | pu1_cpy -= cpy_strd; |
1082 | 0 | memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
1083 | 0 | } |
1084 | | |
1085 | | /*bottom padding*/ |
1086 | 0 | if(row_end) |
1087 | 0 | { |
1088 | 0 | UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3 + (block_ht - 1) * cpy_strd; |
1089 | |
|
1090 | 0 | pu1_cpy = pu1_cpy_tmp + cpy_strd; |
1091 | 0 | memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
1092 | 0 | pu1_cpy += cpy_strd; |
1093 | 0 | memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
1094 | 0 | pu1_cpy += cpy_strd; |
1095 | 0 | memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6); |
1096 | 0 | } |
1097 | | |
1098 | | /*left padding*/ |
1099 | 0 | if(col_start) |
1100 | 0 | { |
1101 | 0 | UWORD8 *pu1_cpy_tmp = au1_cpy + 3; |
1102 | |
|
1103 | 0 | pu1_cpy = au1_cpy; |
1104 | 0 | for(i = 0; i < block_ht + 6; i++) |
1105 | 0 | { |
1106 | 0 | pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0]; |
1107 | 0 | pu1_cpy += cpy_strd; |
1108 | 0 | pu1_cpy_tmp += cpy_strd; |
1109 | 0 | } |
1110 | 0 | } |
1111 | | |
1112 | | /*right padding*/ |
1113 | 0 | if(col_end) |
1114 | 0 | { |
1115 | 0 | UWORD8 *pu1_cpy_tmp = au1_cpy + 3 + block_wd - 1; |
1116 | |
|
1117 | 0 | pu1_cpy = au1_cpy + 3 + block_wd; |
1118 | 0 | for(i = 0; i < block_ht + 6; i++) |
1119 | 0 | { |
1120 | 0 | pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0]; |
1121 | 0 | pu1_cpy += cpy_strd; |
1122 | 0 | pu1_cpy_tmp += cpy_strd; |
1123 | 0 | } |
1124 | 0 | } |
1125 | |
|
1126 | 0 | wkg_mem_strd = block_wd >> 1; |
1127 | 0 | pu1_out = pu1_dst + (wd_offset >> 1); |
1128 | 0 | fun_select = (block_wd % 16 == 0); |
1129 | 0 | ihevce_scaling_filters[fun_select]( |
1130 | 0 | pu1_in, in_strd, pu1_wkg_mem, wkg_mem_strd, pu1_out, dst_strd, block_ht, block_wd); |
1131 | | |
1132 | | /* Left padding of 16 for 1st block of every row */ |
1133 | 0 | if(wd_offset == 0) |
1134 | 0 | { |
1135 | 0 | UWORD8 u1_val; |
1136 | 0 | WORD32 pad_wd = 16; |
1137 | 0 | WORD32 pad_ht = block_ht >> 1; |
1138 | 0 | UWORD8 *dst = pu1_dst; |
1139 | |
|
1140 | 0 | for(i = 0; i < pad_ht; i++) |
1141 | 0 | { |
1142 | 0 | u1_val = dst[0]; |
1143 | 0 | memset(&dst[-pad_wd], u1_val, pad_wd); |
1144 | 0 | dst += dst_strd; |
1145 | 0 | } |
1146 | 0 | } |
1147 | |
|
1148 | 0 | if(wd == wd_offset + block_wd) |
1149 | 0 | { |
1150 | | /* Right padding of (16 + (CEIL16(wd/2))-wd/2) for last block of every row */ |
1151 | | /* Right padding is done only after processing of last block of that row is done*/ |
1152 | 0 | UWORD8 u1_val; |
1153 | 0 | WORD32 pad_wd = 16 + CEIL16((wd >> 1)) - (wd >> 1) + 4; |
1154 | 0 | WORD32 pad_ht = block_ht >> 1; |
1155 | 0 | UWORD8 *dst = pu1_dst + (wd >> 1) - 1; |
1156 | |
|
1157 | 0 | for(i = 0; i < pad_ht; i++) |
1158 | 0 | { |
1159 | 0 | u1_val = dst[0]; |
1160 | 0 | memset(&dst[1], u1_val, pad_wd); |
1161 | 0 | dst += dst_strd; |
1162 | 0 | } |
1163 | |
|
1164 | 0 | if(ht_offset == 0) |
1165 | 0 | { |
1166 | | /* Top padding of 16 is done for 1st row only after we reach end of that row */ |
1167 | 0 | pad_wd = dst_strd; |
1168 | 0 | pad_ht = 16; |
1169 | 0 | dst = pu1_dst - 16; |
1170 | 0 | for(i = 1; i <= pad_ht; i++) |
1171 | 0 | { |
1172 | 0 | memcpy(dst - (i * dst_strd), dst, pad_wd); |
1173 | 0 | } |
1174 | 0 | } |
1175 | | |
1176 | | /* Bottom padding of (16 + (CEIL16(ht/2)) - ht/2) is done only if we have |
1177 | | reached end of frame */ |
1178 | 0 | if(ht - ht_offset - block_ht == 0) |
1179 | 0 | { |
1180 | 0 | pad_wd = dst_strd; |
1181 | 0 | pad_ht = 16 + CEIL16((ht >> 1)) - (ht >> 1) + 4; |
1182 | 0 | dst = pu1_dst + (((block_ht >> 1) - 1) * dst_strd) - 16; |
1183 | 0 | for(i = 1; i <= pad_ht; i++) |
1184 | 0 | memcpy(dst + (i * dst_strd), dst, pad_wd); |
1185 | 0 | } |
1186 | 0 | } |
1187 | 0 | } |
1188 | | |
1189 | | /*! |
1190 | | ****************************************************************************** |
1191 | | * \if Function name : ihevce_decomp_pre_intra_process_row \endif |
1192 | | * |
1193 | | * \brief |
1194 | | * Row level function which down scales a given row by 2 in horz and vertical |
1195 | | * direction creates output of size wd/2 * ht/2. When decomposition is done |
1196 | | * from L1 to L2 pre intra analysis is done on L1 |
1197 | | * |
1198 | | ***************************************************************************** |
1199 | | */ |
1200 | | void ihevce_decomp_pre_intra_process_row( |
1201 | | UWORD8 *pu1_src, |
1202 | | WORD32 src_stride, |
1203 | | UWORD8 *pu1_dst_decomp, |
1204 | | WORD32 dst_stride, |
1205 | | WORD32 layer_wd, |
1206 | | WORD32 layer_ht, |
1207 | | UWORD8 *pu1_wkg_mem, |
1208 | | WORD32 ht_offset, |
1209 | | WORD32 block_ht, |
1210 | | WORD32 block_wd, |
1211 | | WORD32 num_col_blks, |
1212 | | WORD32 layer_no, |
1213 | | ihevce_ed_ctxt_t *ps_ed_ctxt, |
1214 | | ihevce_ed_blk_t *ps_ed_row, |
1215 | | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row, |
1216 | | WORD32 num_4x4_blks_ctb_y, |
1217 | | WORD32 num_4x4_blks_last_ctb_x, |
1218 | | WORD32 skip_decomp, |
1219 | | WORD32 skip_pre_intra, |
1220 | | WORD32 row_block_no, |
1221 | | ctb_analyse_t *ps_ctb_analyse, |
1222 | | ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list, |
1223 | | ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list) |
1224 | 0 | { |
1225 | 0 | WORD32 do_pre_intra_analysis = ((layer_no == 1) || (layer_no == 2)) && (!skip_pre_intra); |
1226 | 0 | WORD32 col_block_no; |
1227 | 0 | WORD32 i, j; |
1228 | |
|
1229 | 0 | if(!skip_decomp) |
1230 | 0 | { |
1231 | 0 | ctb_analyse_t *ps_ctb_analyse_curr = ps_ctb_analyse + row_block_no * num_col_blks; |
1232 | |
|
1233 | 0 | for(col_block_no = 0; col_block_no < num_col_blks; col_block_no++) |
1234 | 0 | { |
1235 | 0 | ihevce_scale_by_2( |
1236 | 0 | pu1_src, |
1237 | 0 | src_stride, |
1238 | 0 | pu1_dst_decomp, |
1239 | 0 | dst_stride, |
1240 | 0 | layer_wd, |
1241 | 0 | layer_ht, |
1242 | 0 | pu1_wkg_mem, |
1243 | 0 | ht_offset, |
1244 | 0 | block_ht, |
1245 | 0 | block_wd * col_block_no, |
1246 | 0 | block_wd, |
1247 | 0 | ps_cmn_utils_optimised_function_list->pf_copy_2d, |
1248 | 0 | ps_ipe_optimised_function_list->pf_scaling_filter_mxn); |
1249 | | |
1250 | | /* Disable noise detection */ |
1251 | 0 | memset( |
1252 | 0 | ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy, |
1253 | 0 | 0, |
1254 | 0 | sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy)); |
1255 | |
|
1256 | 0 | ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0; |
1257 | |
|
1258 | 0 | ps_ctb_analyse_curr++; |
1259 | 0 | } |
1260 | 0 | } |
1261 | |
|
1262 | 0 | if(do_pre_intra_analysis) |
1263 | 0 | { |
1264 | 0 | ihevce_ed_blk_t *ps_ed_ctb = ps_ed_row; |
1265 | 0 | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_ed_ctb_l1_row; |
1266 | 0 | WORD32 *nbr_flags_ptr = &ps_ed_ctxt->ai4_nbr_flags[0]; |
1267 | 0 | UWORD8 *pu1_src_pre_intra = pu1_src + (ht_offset * src_stride); |
1268 | 0 | WORD32 num_4x4_blks_in_ctb = block_wd >> 2; |
1269 | 0 | WORD32 src_inc_pre_intra = num_4x4_blks_in_ctb * 4; |
1270 | 0 | WORD32 inc_ctb = num_4x4_blks_in_ctb * num_4x4_blks_in_ctb; |
1271 | | |
1272 | | /* To analyse any given CTB we need to set the availability flags of the |
1273 | | * following neighbouring CTB: BL,L,TL,T,TR */ |
1274 | | /* copy the neighbor flags for a general ctb (ctb inside the frame); not any corners */ |
1275 | 0 | memcpy( |
1276 | 0 | ps_ed_ctxt->ai4_nbr_flags, |
1277 | 0 | gau4_nbr_flags_8x8_4x4blks, |
1278 | 0 | sizeof(gau4_nbr_flags_8x8_4x4blks)); |
1279 | | |
1280 | | /* set top flags unavailable for first ctb row */ |
1281 | 0 | if(ht_offset == 0) |
1282 | 0 | { |
1283 | 0 | for(j = 0; j < num_4x4_blks_in_ctb; j++) |
1284 | 0 | { |
1285 | 0 | SET_T_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]); |
1286 | 0 | SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]); |
1287 | 0 | SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]); |
1288 | 0 | } |
1289 | 0 | } |
1290 | | |
1291 | | /* set bottom left flags as not available for last row */ |
1292 | 0 | if(ht_offset + block_ht >= layer_ht) |
1293 | 0 | { |
1294 | 0 | for(j = 0; j < num_4x4_blks_in_ctb; j++) |
1295 | 0 | { |
1296 | 0 | SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(num_4x4_blks_ctb_y - 1) * 8 + j]); |
1297 | 0 | } |
1298 | 0 | } |
1299 | | |
1300 | | /* set left flags unavailable for 1st ctb col */ |
1301 | 0 | for(j = 0; j < num_4x4_blks_ctb_y; j++) |
1302 | 0 | { |
1303 | 0 | SET_L_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
1304 | 0 | SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
1305 | 0 | SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
1306 | 0 | } |
1307 | |
|
1308 | 0 | for(col_block_no = 0; col_block_no < num_col_blks; col_block_no++) |
1309 | 0 | { |
1310 | 0 | if(col_block_no == 1) |
1311 | 0 | { |
1312 | | /* For the rest of the ctbs, set left flags available */ |
1313 | 0 | for(j = 0; j < num_4x4_blks_ctb_y; j++) |
1314 | 0 | { |
1315 | 0 | SET_L_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
1316 | 0 | } |
1317 | 0 | for(j = 0; j < num_4x4_blks_ctb_y - 1; j++) |
1318 | 0 | { |
1319 | 0 | SET_BL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]); |
1320 | 0 | SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(j + 1) * 8]); |
1321 | 0 | } |
1322 | 0 | if(ht_offset != 0) |
1323 | 0 | { |
1324 | 0 | SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[0]); |
1325 | 0 | } |
1326 | 0 | } |
1327 | |
|
1328 | 0 | if(col_block_no == num_col_blks - 1) |
1329 | 0 | { |
1330 | | /* set top right flags unavailable for last ctb col */ |
1331 | 0 | for(i = 0; i < num_4x4_blks_ctb_y; i++) |
1332 | 0 | { |
1333 | 0 | SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[i * 8 + num_4x4_blks_last_ctb_x - 1]); |
1334 | 0 | } |
1335 | 0 | } |
1336 | | |
1337 | | /* Call intra analysis for the ctb */ |
1338 | 0 | ihevce_ed_calc_ctb( |
1339 | 0 | ps_ed_ctxt, |
1340 | 0 | ps_ed_ctb, |
1341 | 0 | ps_ed_ctb_l1, |
1342 | 0 | pu1_src_pre_intra, |
1343 | 0 | src_stride, |
1344 | 0 | (col_block_no == num_col_blks - 1) ? num_4x4_blks_last_ctb_x : num_4x4_blks_in_ctb, |
1345 | 0 | num_4x4_blks_ctb_y, |
1346 | 0 | nbr_flags_ptr, |
1347 | 0 | layer_no, |
1348 | 0 | ps_ipe_optimised_function_list, |
1349 | 0 | ps_cmn_utils_optimised_function_list); |
1350 | 0 | pu1_src_pre_intra += src_inc_pre_intra; |
1351 | 0 | ps_ed_ctb += inc_ctb; |
1352 | 0 | ps_ed_ctb_l1 += 1; |
1353 | 0 | } |
1354 | 0 | } |
1355 | 0 | } |
1356 | | |
1357 | | /*! |
1358 | | ****************************************************************************** |
1359 | | * \if Function name : ihevce_decomp_pre_intra_process \endif |
1360 | | * |
1361 | | * \brief |
1362 | | * Frame level function to decompose given layer L0 into coarser layers and |
1363 | | * perform intra analysis on layers below L0 |
1364 | | * |
1365 | | ***************************************************************************** |
1366 | | */ |
1367 | | void ihevce_decomp_pre_intra_process( |
1368 | | void *pv_ctxt, |
1369 | | ihevce_lap_output_params_t *ps_lap_out_prms, |
1370 | | frm_ctb_ctxt_t *ps_frm_ctb_prms, |
1371 | | void *pv_multi_thrd_ctxt, |
1372 | | WORD32 thrd_id, |
1373 | | WORD32 i4_ping_pong) |
1374 | 0 | { |
1375 | 0 | ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt; |
1376 | 0 | ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thrd_id]; |
1377 | 0 | multi_thrd_ctxt_t *ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt; |
1378 | 0 | WORD32 i4_num_layers = ps_ctxt->i4_num_layers; |
1379 | 0 | UWORD8 *pu1_wkg_mem = ps_ctxt->au1_wkg_mem; |
1380 | 0 | ihevce_ed_ctxt_t *ps_ed_ctxt = ps_ctxt->ps_ed_ctxt; |
1381 | 0 | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1; |
1382 | 0 | ihevce_ed_blk_t *ps_ed; |
1383 | 0 | WORD32 i4_layer_no; |
1384 | 0 | WORD32 end_of_layer; |
1385 | 0 | UWORD8 *pu1_src, *pu1_dst; |
1386 | 0 | WORD32 src_stride, dst_stride; |
1387 | 0 | WORD32 i4_layer_wd, i4_layer_ht; |
1388 | 0 | WORD32 ht_offset, block_ht, row_block_no, num_row_blocks; |
1389 | 0 | WORD32 block_wd, num_col_blks; |
1390 | 0 | WORD32 skip_decomp, skip_pre_intra; |
1391 | 0 | WORD32 inc_ctb; |
1392 | |
|
1393 | 0 | ASSERT(i4_num_layers >= 3); |
1394 | 0 | ps_ctxt->as_layers[0].pu1_inp = (UWORD8 *)ps_lap_out_prms->s_input_buf.pv_y_buf; |
1395 | 0 | ps_ctxt->as_layers[0].i4_inp_stride = ps_lap_out_prms->s_input_buf.i4_y_strd; |
1396 | 0 | ps_ctxt->as_layers[0].i4_actual_wd = ps_lap_out_prms->s_input_buf.i4_y_wd; |
1397 | 0 | ps_ctxt->as_layers[0].i4_actual_ht = ps_lap_out_prms->s_input_buf.i4_y_ht; |
1398 | | |
1399 | | /* This loop does decomp & intra by picking jobs from job queue */ |
1400 | 0 | for(i4_layer_no = 0; i4_layer_no < i4_num_layers; i4_layer_no++) |
1401 | 0 | { |
1402 | 0 | WORD32 idx = 0; |
1403 | |
|
1404 | 0 | src_stride = ps_ctxt->as_layers[i4_layer_no].i4_inp_stride; |
1405 | 0 | pu1_src = ps_ctxt->as_layers[i4_layer_no].pu1_inp; |
1406 | 0 | i4_layer_wd = ps_ctxt->as_layers[i4_layer_no].i4_actual_wd; |
1407 | 0 | i4_layer_ht = ps_ctxt->as_layers[i4_layer_no].i4_actual_ht; |
1408 | 0 | pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp; |
1409 | 0 | dst_stride = ps_ctxt->as_layers[i4_layer_no + 1].i4_inp_stride; |
1410 | 0 | block_wd = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_wd; |
1411 | 0 | block_ht = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_ht; |
1412 | 0 | num_col_blks = ps_ctxt->as_layers[i4_layer_no].i4_num_col_blks; |
1413 | 0 | num_row_blocks = ps_ctxt->as_layers[i4_layer_no].i4_num_row_blks; |
1414 | 0 | inc_ctb = (block_wd >> 2) * (block_wd >> 2); |
1415 | 0 | end_of_layer = 0; |
1416 | 0 | skip_pre_intra = 1; |
1417 | 0 | skip_decomp = 0; |
1418 | 0 | if(i4_layer_no >= (ps_ctxt->i4_num_layers - 1)) |
1419 | 0 | { |
1420 | 0 | skip_decomp = 1; |
1421 | 0 | } |
1422 | | |
1423 | | /* ------------ Loop over all the CTB rows & perform Decomp --------------- */ |
1424 | 0 | while(0 == end_of_layer) |
1425 | 0 | { |
1426 | 0 | job_queue_t *ps_pre_enc_job; |
1427 | 0 | WORD32 num_4x4_blks_ctb_y = 0, num_4x4_blks_last_ctb_x = 0; |
1428 | | |
1429 | | /* Get the current row from the job queue */ |
1430 | 0 | ps_pre_enc_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job( |
1431 | 0 | pv_multi_thrd_ctxt, (DECOMP_JOB_LYR0 + i4_layer_no), 1, i4_ping_pong); |
1432 | | |
1433 | | /* If all rows are done, set the end of layer flag to 1, */ |
1434 | 0 | if(NULL == ps_pre_enc_job) |
1435 | 0 | { |
1436 | 0 | end_of_layer = 1; |
1437 | 0 | } |
1438 | 0 | else |
1439 | 0 | { |
1440 | | /* Obtain the current row's details from the job */ |
1441 | 0 | row_block_no = ps_pre_enc_job->s_job_info.s_decomp_job_info.i4_vert_unit_row_no; |
1442 | 0 | ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = row_block_no; |
1443 | 0 | ht_offset = row_block_no * block_ht; |
1444 | |
|
1445 | 0 | if(row_block_no < (num_row_blocks)) |
1446 | 0 | { |
1447 | 0 | pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp + |
1448 | 0 | ((block_ht >> 1) * dst_stride * row_block_no); |
1449 | | |
1450 | | /* call the row level processing function */ |
1451 | 0 | ihevce_decomp_pre_intra_process_row( |
1452 | 0 | pu1_src, |
1453 | 0 | src_stride, |
1454 | 0 | pu1_dst, |
1455 | 0 | dst_stride, |
1456 | 0 | i4_layer_wd, |
1457 | 0 | i4_layer_ht, |
1458 | 0 | pu1_wkg_mem, |
1459 | 0 | ht_offset, |
1460 | 0 | block_ht, |
1461 | 0 | block_wd, |
1462 | 0 | num_col_blks, |
1463 | 0 | i4_layer_no, |
1464 | 0 | ps_ed_ctxt, |
1465 | 0 | ps_ed, |
1466 | 0 | ps_ed_ctb_l1, |
1467 | 0 | num_4x4_blks_ctb_y, |
1468 | 0 | num_4x4_blks_last_ctb_x, |
1469 | 0 | skip_decomp, |
1470 | 0 | skip_pre_intra, |
1471 | 0 | row_block_no, |
1472 | 0 | ps_ctxt->ps_ctb_analyse, |
1473 | 0 | &ps_ctxt->s_ipe_optimised_function_list, |
1474 | 0 | &ps_ctxt->s_cmn_opt_func); |
1475 | 0 | } |
1476 | 0 | idx++; |
1477 | | /* set the output dependency */ |
1478 | 0 | ihevce_pre_enc_grp_job_set_out_dep( |
1479 | 0 | pv_multi_thrd_ctxt, ps_pre_enc_job, i4_ping_pong); |
1480 | 0 | } |
1481 | 0 | } |
1482 | 0 | ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = idx; |
1483 | | |
1484 | | /* ------------ For the same rows perform preintra if required --------------- */ |
1485 | 0 | ihevce_ed_frame_init(ps_ed_ctxt, i4_layer_no); |
1486 | |
|
1487 | 0 | if((1 == i4_layer_no) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset)) |
1488 | 0 | { |
1489 | 0 | WORD32 vert_ctr, ctb_ctr, i; |
1490 | 0 | WORD32 ctb_ctr_blks = ps_ctxt->as_layers[1].i4_num_col_blks; |
1491 | 0 | WORD32 vert_ctr_blks = ps_ctxt->as_layers[1].i4_num_row_blks; |
1492 | |
|
1493 | 0 | if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && |
1494 | 0 | (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)) |
1495 | 0 | { |
1496 | 0 | for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++) |
1497 | 0 | { |
1498 | 0 | ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 = |
1499 | 0 | ps_ctxt->ps_ed_ctb_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
1500 | |
|
1501 | 0 | for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++) |
1502 | 0 | { |
1503 | 0 | ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr; |
1504 | |
|
1505 | 0 | for(i = 0; i < 16; i++) |
1506 | 0 | { |
1507 | 0 | ps_ed_ctb_curr_l1->i4_best_sad_cost_8x8_l1_ipe[i] = 0x7fffffff; |
1508 | 0 | ps_ed_ctb_curr_l1->i4_best_sad_8x8_l1_ipe[i] = 0x7fffffff; |
1509 | 0 | } |
1510 | 0 | } |
1511 | 0 | } |
1512 | 0 | } |
1513 | 0 | } |
1514 | |
|
1515 | 0 | #if DISABLE_L2_IPE_IN_PB_L1_IN_B |
1516 | 0 | if(((2 == i4_layer_no) && (ps_lap_out_prms->i4_pic_type == IV_I_FRAME || |
1517 | 0 | ps_lap_out_prms->i4_pic_type == IV_IDR_FRAME)) || |
1518 | 0 | ((1 == i4_layer_no) && |
1519 | 0 | (ps_lap_out_prms->i4_temporal_lyr_id <= TEMPORAL_LAYER_DISABLE)) || |
1520 | 0 | ((IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset) && (0 != i4_layer_no))) |
1521 | | #else |
1522 | | if((0 != i4_layer_no) && |
1523 | | (1 != ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) && |
1524 | | (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)))) |
1525 | | #endif |
1526 | 0 | { |
1527 | 0 | WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed; |
1528 | |
|
1529 | 0 | ps_ed_ctxt->lambda = ps_ctxt->ai4_lambda[i4_layer_no]; |
1530 | 0 | if(0 == i4_layer_no) |
1531 | 0 | { |
1532 | 0 | ps_ed_ctxt->ps_ed_pic = NULL; |
1533 | 0 | ps_ed_ctxt->ps_ed = NULL; |
1534 | 0 | ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL; |
1535 | 0 | ps_ed_ctxt->ps_ed_ctb_l1 = NULL; |
1536 | 0 | } |
1537 | 0 | else if(1 == i4_layer_no) |
1538 | 0 | { |
1539 | 0 | ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer1_buf; |
1540 | 0 | ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer1_buf; |
1541 | 0 | ps_ed_ctxt->ps_ed_ctb_l1_pic = ps_ctxt->ps_ed_ctb_l1; |
1542 | 0 | ps_ed_ctxt->ps_ed_ctb_l1 = ps_ctxt->ps_ed_ctb_l1; |
1543 | 0 | } |
1544 | 0 | else if(2 == i4_layer_no) |
1545 | 0 | { |
1546 | 0 | ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer2_buf; |
1547 | 0 | ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer2_buf; |
1548 | 0 | ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL; |
1549 | 0 | ps_ed_ctxt->ps_ed_ctb_l1 = NULL; |
1550 | 0 | } |
1551 | |
|
1552 | 0 | skip_decomp = 1; |
1553 | 0 | skip_pre_intra = 0; |
1554 | |
|
1555 | 0 | for(idx = 0; idx < i4_num_rows; idx++) |
1556 | 0 | { |
1557 | 0 | WORD32 num_4x4_blks_ctb_y = 0, num_4x4_blks_last_ctb_x = 0; |
1558 | | |
1559 | | /* Obtain the current row's details from the job */ |
1560 | 0 | row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx]; |
1561 | 0 | ht_offset = row_block_no * block_ht; |
1562 | |
|
1563 | 0 | if(row_block_no < (num_row_blocks)) |
1564 | 0 | { |
1565 | 0 | pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp + |
1566 | 0 | ((block_ht >> 1) * dst_stride * row_block_no); |
1567 | |
|
1568 | 0 | if(i4_layer_no == 1 || i4_layer_no == 2) |
1569 | 0 | { |
1570 | 0 | ps_ed = ps_ed_ctxt->ps_ed + (row_block_no * inc_ctb * (num_col_blks)); |
1571 | 0 | ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1 + (row_block_no * num_col_blks); |
1572 | 0 | ps_ed_ctxt->i4_quality_preset = ps_ctxt->i4_quality_preset; |
1573 | 0 | num_4x4_blks_last_ctb_x = block_wd >> 2; |
1574 | 0 | num_4x4_blks_ctb_y = block_ht >> 2; |
1575 | 0 | if(row_block_no == num_row_blocks - 1) |
1576 | 0 | { |
1577 | 0 | if(i4_layer_ht % block_ht) |
1578 | 0 | { |
1579 | 0 | num_4x4_blks_ctb_y = ((i4_layer_ht % block_ht) + 3) >> 2; |
1580 | 0 | } |
1581 | 0 | } |
1582 | 0 | if(i4_layer_wd % block_wd) |
1583 | 0 | { |
1584 | 0 | num_4x4_blks_last_ctb_x = ((i4_layer_wd % block_wd) + 3) >> 2; |
1585 | 0 | } |
1586 | 0 | } |
1587 | | |
1588 | | /* call the row level processing function */ |
1589 | 0 | ihevce_decomp_pre_intra_process_row( |
1590 | 0 | pu1_src, |
1591 | 0 | src_stride, |
1592 | 0 | pu1_dst, |
1593 | 0 | dst_stride, |
1594 | 0 | i4_layer_wd, |
1595 | 0 | i4_layer_ht, |
1596 | 0 | pu1_wkg_mem, |
1597 | 0 | ht_offset, |
1598 | 0 | block_ht, |
1599 | 0 | block_wd, |
1600 | 0 | num_col_blks, |
1601 | 0 | i4_layer_no, |
1602 | 0 | ps_ed_ctxt, |
1603 | 0 | ps_ed, |
1604 | 0 | ps_ed_ctb_l1, |
1605 | 0 | num_4x4_blks_ctb_y, |
1606 | 0 | num_4x4_blks_last_ctb_x, |
1607 | 0 | skip_decomp, |
1608 | 0 | skip_pre_intra, |
1609 | 0 | row_block_no, |
1610 | 0 | NULL, |
1611 | 0 | &ps_ctxt->s_ipe_optimised_function_list, |
1612 | 0 | &ps_ctxt->s_cmn_opt_func); |
1613 | 0 | } |
1614 | |
|
1615 | 0 | if(1 == i4_layer_no) |
1616 | 0 | { |
1617 | 0 | ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1; |
1618 | 0 | } |
1619 | 0 | } |
1620 | 0 | for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++) |
1621 | 0 | { |
1622 | 0 | ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1; |
1623 | 0 | } |
1624 | 0 | ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0; |
1625 | 0 | } |
1626 | |
|
1627 | 0 | #if DISABLE_L2_IPE_IN_PB_L1_IN_B |
1628 | 0 | if((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) && |
1629 | 0 | (((i4_layer_no == 2) && (ps_lap_out_prms->i4_pic_type == ISLICE)) || |
1630 | 0 | ((i4_layer_no == 1) && (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)))) |
1631 | 0 | { |
1632 | 0 | WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed; |
1633 | 0 | if(1 == i4_layer_no) |
1634 | 0 | { |
1635 | 0 | for(idx = 0; idx < i4_num_rows; idx++) |
1636 | 0 | { |
1637 | 0 | row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx]; |
1638 | |
|
1639 | 0 | { |
1640 | 0 | ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1; |
1641 | 0 | } |
1642 | 0 | } |
1643 | 0 | } |
1644 | 0 | for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++) |
1645 | 0 | { |
1646 | 0 | ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1; |
1647 | 0 | } |
1648 | 0 | ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0; |
1649 | 0 | } |
1650 | | #else |
1651 | | if((i4_layer_no != 0) && ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) && |
1652 | | (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))) |
1653 | | { |
1654 | | WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed; |
1655 | | for(idx = 0; idx < i4_num_rows; idx++) |
1656 | | { |
1657 | | row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx]; |
1658 | | if(1 == i4_layer_no) |
1659 | | { |
1660 | | ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1; |
1661 | | } |
1662 | | } |
1663 | | for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++) |
1664 | | { |
1665 | | ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1; |
1666 | | } |
1667 | | ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0; |
1668 | | } |
1669 | | #endif |
1670 | 0 | } |
1671 | 0 | } |
1672 | | |
1673 | | /*! |
1674 | | ************************************************************************ |
1675 | | * \brief |
1676 | | * return number of records used by decomp pre intra |
1677 | | * |
1678 | | ************************************************************************ |
1679 | | */ |
1680 | | WORD32 ihevce_decomp_pre_intra_get_num_mem_recs(void) |
1681 | 0 | { |
1682 | 0 | return (NUM_DECOMP_PRE_INTRA_MEM_RECS); |
1683 | 0 | } |
1684 | | |
1685 | | /*! |
1686 | | ************************************************************************ |
1687 | | * @brief |
1688 | | * return each record attributes of decomp pre intra |
1689 | | ************************************************************************ |
1690 | | */ |
1691 | | WORD32 ihevce_decomp_pre_intra_get_mem_recs( |
1692 | | iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space) |
1693 | 0 | { |
1694 | | /* memories should be requested assuming worst case requirememnts */ |
1695 | | |
1696 | | /* Module context structure */ |
1697 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_size = sizeof(ihevce_decomp_pre_intra_master_ctxt_t); |
1698 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
1699 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_alignment = 8; |
1700 | | |
1701 | | /* Thread context structure */ |
1702 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_size = |
1703 | 0 | i4_num_proc_thrds * sizeof(ihevce_decomp_pre_intra_ctxt_t); |
1704 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
1705 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_alignment = 8; |
1706 | | |
1707 | | /* early decision context structure */ |
1708 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_size = i4_num_proc_thrds * sizeof(ihevce_ed_ctxt_t); |
1709 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space; |
1710 | 0 | ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_alignment = 8; |
1711 | |
|
1712 | 0 | return (NUM_DECOMP_PRE_INTRA_MEM_RECS); |
1713 | 0 | } |
1714 | | |
1715 | | /*! |
1716 | | ************************************************************************ |
1717 | | * @brief |
1718 | | * Init decomp pre intra context |
1719 | | ************************************************************************ |
1720 | | */ |
1721 | | void *ihevce_decomp_pre_intra_init( |
1722 | | iv_mem_rec_t *ps_mem_tab, |
1723 | | ihevce_static_cfg_params_t *ps_init_prms, |
1724 | | WORD32 i4_num_proc_thrds, |
1725 | | func_selector_t *ps_func_selector, |
1726 | | WORD32 i4_resolution_id, |
1727 | | UWORD8 u1_is_popcnt_available) |
1728 | 0 | { |
1729 | 0 | ihevce_decomp_pre_intra_master_ctxt_t *ps_mstr_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_CTXT].pv_base; |
1730 | 0 | ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].pv_base; |
1731 | 0 | ihevce_ed_ctxt_t *ps_ed_ctxt = ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].pv_base; |
1732 | 0 | ihevce_tgt_params_t *ps_tgt_prms = &ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]; |
1733 | 0 | WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size; |
1734 | 0 | WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS]; |
1735 | 0 | WORD32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS]; |
1736 | 0 | WORD32 n_tot_layers; |
1737 | 0 | WORD32 i, j, k; |
1738 | | |
1739 | | /* Get the height and width of each layer */ |
1740 | 0 | *a_wd = ps_tgt_prms->i4_width + SET_CTB_ALIGN(ps_tgt_prms->i4_width, min_cu_size); |
1741 | 0 | *a_ht = ps_tgt_prms->i4_height + SET_CTB_ALIGN(ps_tgt_prms->i4_height, min_cu_size); |
1742 | 0 | n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht); |
1743 | 0 | ps_mstr_ctxt->i4_num_proc_thrds = i4_num_proc_thrds; |
1744 | 0 | for(i = 0; i < ps_mstr_ctxt->i4_num_proc_thrds; i++) |
1745 | 0 | { |
1746 | 0 | ps_mstr_ctxt->aps_decomp_pre_intra_thrd_ctxt[i] = ps_ctxt; |
1747 | 0 | ps_ctxt->i4_num_layers = n_tot_layers; |
1748 | 0 | ps_ctxt->ps_ed_ctxt = ps_ed_ctxt; |
1749 | 0 | for(j = 0; j < n_tot_layers; j++) |
1750 | 0 | { |
1751 | | /** If CTB size= 64, decomp_blk_wd = 64 for L0, 32 for L1 , 16 for L2, 8 for L3 */ |
1752 | 0 | WORD32 max_ctb_size = 1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size; |
1753 | 0 | WORD32 decomp_blk_wd = max_ctb_size >> j; |
1754 | 0 | WORD32 decomp_blk_ht = max_ctb_size >> j; |
1755 | |
|
1756 | 0 | ps_ctxt->as_layers[j].i4_actual_wd = a_wd[j]; |
1757 | 0 | ps_ctxt->as_layers[j].i4_actual_ht = a_ht[j]; |
1758 | 0 | if(0 == j) |
1759 | 0 | { |
1760 | 0 | ps_ctxt->as_layers[j].i4_padded_ht = a_ht[j]; |
1761 | 0 | ps_ctxt->as_layers[j].i4_padded_wd = a_wd[j]; |
1762 | 0 | } |
1763 | 0 | else |
1764 | 0 | { |
1765 | 0 | ps_ctxt->as_layers[j].i4_padded_ht = a_ht[j] + 32 + 4; |
1766 | 0 | ps_ctxt->as_layers[j].i4_padded_wd = a_wd[j] + 32 + 4; |
1767 | 0 | } |
1768 | 0 | ps_ctxt->as_layers[j].pu1_inp = NULL; |
1769 | 0 | ps_ctxt->as_layers[j].i4_inp_stride = 0; |
1770 | 0 | ps_ctxt->as_layers[j].i4_decomp_blk_ht = decomp_blk_ht; |
1771 | 0 | ps_ctxt->as_layers[j].i4_decomp_blk_wd = decomp_blk_wd; |
1772 | 0 | ps_ctxt->as_layers[j].i4_num_row_blks = ((a_ht[j] + (decomp_blk_ht - 1)) / decomp_blk_ht); |
1773 | 0 | ps_ctxt->as_layers[j].i4_num_col_blks = ((a_wd[j] + (decomp_blk_wd - 1)) / decomp_blk_wd); |
1774 | 0 | for(k = 0; k < MAX_NUM_CTB_ROWS_FRM; k++) |
1775 | 0 | { |
1776 | 0 | ps_ctxt->as_layers[j].ai4_curr_row_no[k] = -1; |
1777 | 0 | } |
1778 | 0 | ps_ctxt->as_layers[j].i4_num_rows_processed = 0; |
1779 | 0 | } |
1780 | 0 | ps_ctxt->i4_quality_preset = ps_tgt_prms->i4_quality_preset; |
1781 | 0 | if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7) |
1782 | 0 | { |
1783 | 0 | ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6; |
1784 | 0 | } |
1785 | 0 | if(ps_init_prms->s_coding_tools_prms.i4_vqet & |
1786 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) |
1787 | 0 | { |
1788 | 0 | if(ps_init_prms->s_coding_tools_prms.i4_vqet & |
1789 | 0 | (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) |
1790 | 0 | { |
1791 | 0 | ps_ctxt->i4_enable_noise_detection = 1; |
1792 | 0 | } |
1793 | 0 | else |
1794 | 0 | { |
1795 | 0 | ps_ctxt->i4_enable_noise_detection = 0; |
1796 | 0 | } |
1797 | 0 | } |
1798 | 0 | else |
1799 | 0 | { |
1800 | 0 | ps_ctxt->i4_enable_noise_detection = 0; |
1801 | 0 | } |
1802 | 0 | ihevce_cmn_utils_instr_set_router( |
1803 | 0 | &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type); |
1804 | 0 | ihevce_ipe_instr_set_router( |
1805 | 0 | &ps_ctxt->s_ipe_optimised_function_list, ps_init_prms->e_arch_type); |
1806 | |
|
1807 | 0 | ps_ed_ctxt->ps_func_selector = ps_func_selector; |
1808 | |
|
1809 | 0 | ps_ctxt++; |
1810 | 0 | ps_ed_ctxt++; |
1811 | 0 | } |
1812 | | /* return the handle to caller */ |
1813 | 0 | return ((void *)ps_mstr_ctxt); |
1814 | 0 | } |
1815 | | |
1816 | | /*! |
1817 | | ************************************************************************ |
1818 | | * @brief |
1819 | | * Init decomp pre intra layer buffers |
1820 | | ************************************************************************ |
1821 | | */ |
1822 | | void ihevce_decomp_pre_intra_frame_init( |
1823 | | void *pv_ctxt, |
1824 | | UWORD8 **ppu1_decomp_lyr_bufs, |
1825 | | WORD32 *pi4_lyr_buf_stride, |
1826 | | ihevce_ed_blk_t *ps_layer1_buf, |
1827 | | ihevce_ed_blk_t *ps_layer2_buf, |
1828 | | ihevce_ed_ctb_l1_t *ps_ed_ctb_l1, |
1829 | | WORD32 i4_ol_sad_lambda_qf, |
1830 | | ctb_analyse_t *ps_ctb_analyse) |
1831 | 0 | { |
1832 | 0 | ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt; |
1833 | 0 | ihevce_decomp_pre_intra_ctxt_t *ps_ctxt; |
1834 | 0 | WORD32 i, j; |
1835 | |
|
1836 | 0 | for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++) |
1837 | 0 | { |
1838 | 0 | ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i]; |
1839 | | |
1840 | | /* L0 layer (actual input) is registered in process call */ |
1841 | 0 | for(j = 1; j < ps_ctxt->i4_num_layers; j++) |
1842 | 0 | { |
1843 | 0 | ps_ctxt->as_layers[j].i4_inp_stride = pi4_lyr_buf_stride[j - 1]; |
1844 | 0 | ps_ctxt->as_layers[j].pu1_inp = ppu1_decomp_lyr_bufs[j - 1]; |
1845 | | |
1846 | | /* Populating the buffer pointers for layer1 and layer2 buffers to store the |
1847 | | structure for each 4x4 block after pre intra analysis on their respective layers */ |
1848 | 0 | if(j == 1) |
1849 | 0 | { |
1850 | 0 | WORD32 sad_lambda_l1 = (3 * i4_ol_sad_lambda_qf >> 2); |
1851 | 0 | WORD32 temp = 1 << LAMBDA_Q_SHIFT; |
1852 | 0 | WORD32 lambda = ((temp) > sad_lambda_l1) ? temp : sad_lambda_l1; |
1853 | |
|
1854 | 0 | ps_ctxt->ps_layer1_buf = ps_layer1_buf; |
1855 | 0 | ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1; |
1856 | 0 | ps_ctxt->ai4_lambda[j] = lambda; |
1857 | 0 | } |
1858 | 0 | else if(j == 2) |
1859 | 0 | { |
1860 | 0 | WORD32 sad_lambda_l2 = i4_ol_sad_lambda_qf >> 1; |
1861 | 0 | WORD32 temp = 1 << LAMBDA_Q_SHIFT; |
1862 | 0 | WORD32 lambda = ((temp) > sad_lambda_l2) ? temp : sad_lambda_l2; |
1863 | |
|
1864 | 0 | ps_ctxt->ps_layer2_buf = ps_layer2_buf; |
1865 | 0 | ps_ctxt->ai4_lambda[j] = lambda; |
1866 | 0 | } |
1867 | 0 | else |
1868 | 0 | { |
1869 | 0 | ps_ctxt->ai4_lambda[j] = -1; |
1870 | 0 | } |
1871 | 0 | } |
1872 | | |
1873 | | /* make the ps_ctb_analyse refernce as a part of the private context */ |
1874 | 0 | ps_ctxt->ps_ctb_analyse = ps_ctb_analyse; |
1875 | 0 | } |
1876 | 0 | } |
1877 | | |
1878 | | /** |
1879 | | ******************************************************************************* |
1880 | | * |
1881 | | * @brief Merge Sort function. |
1882 | | * |
1883 | | * @par Description: |
1884 | | * This function sorts the data in the input array in ascending |
1885 | | * order using merge sort algorithm. Intermediate data obtained in |
1886 | | * merge sort are stored in output 2-D array. |
1887 | | * |
1888 | | * @param[in] |
1889 | | * pi4_input_val : Input 1-D array |
1890 | | * aai4_output_val: Output 2-D array containing elements sorted in sets of |
1891 | | * 4,16,64 etc. |
1892 | | * i4_length : length of the array |
1893 | | * i4_ip_sort_level: Input sort level. Specifies the level upto which array is sorted. |
1894 | | * It should be 1 if the array is unsorted. Should be 4 if array is sorted |
1895 | | * in sets of 4. |
1896 | | * i4_op_sort_level: Output sort level. Specify the level upto which sorting is required. |
1897 | | * If it is given as length of array it sorts for whole array. |
1898 | | * |
1899 | | ******************************************************************************* |
1900 | | */ |
1901 | | void ihevce_merge_sort( |
1902 | | WORD32 *pi4_input_val, |
1903 | | WORD32 aai4_output_val[][64], |
1904 | | WORD32 i4_length, |
1905 | | WORD32 i4_ip_sort_level, |
1906 | | WORD32 i4_op_sort_level) |
1907 | 0 | { |
1908 | 0 | WORD32 i, j, k; |
1909 | 0 | WORD32 count, level; |
1910 | 0 | WORD32 temp[64]; |
1911 | 0 | WORD32 *pi4_temp_buf_cpy; |
1912 | 0 | WORD32 *pi4_temp = &temp[0]; |
1913 | 0 | WORD32 calc_level; |
1914 | |
|
1915 | 0 | pi4_temp_buf_cpy = pi4_temp; |
1916 | |
|
1917 | 0 | GETRANGE(calc_level, i4_op_sort_level / i4_ip_sort_level); |
1918 | |
|
1919 | 0 | calc_level = calc_level - 1; |
1920 | | |
1921 | | /*** This function is written under the assumption that we need only intermediate values of |
1922 | | sort in the range of 4,16,64 etc. ***/ |
1923 | 0 | ASSERT((calc_level % 2) == 0); |
1924 | | |
1925 | | /** One iteration of this for loop does 1 sets of sort and produces one intermediate value in 2 iterations **/ |
1926 | 0 | for(level = 0; level < calc_level; level++) |
1927 | 0 | { |
1928 | | /** Merges adjacent sets of elements based on current sort level **/ |
1929 | 0 | for(count = 0; count < i4_length; (count = count + (i4_ip_sort_level * 2))) |
1930 | 0 | { |
1931 | 0 | i = 0; |
1932 | 0 | j = 0; |
1933 | 0 | if(pi4_input_val[i4_ip_sort_level - 1] < pi4_input_val[i4_ip_sort_level]) |
1934 | 0 | { |
1935 | | /*** Condition for early exit ***/ |
1936 | 0 | memcpy(&pi4_temp[0], pi4_input_val, sizeof(WORD32) * i4_ip_sort_level * 2); |
1937 | 0 | } |
1938 | 0 | else |
1939 | 0 | { |
1940 | 0 | for(k = 0; k < (i4_ip_sort_level * 2); k++) |
1941 | 0 | { |
1942 | 0 | if((i < i4_ip_sort_level) && (j < i4_ip_sort_level)) |
1943 | 0 | { |
1944 | 0 | if(pi4_input_val[i] > pi4_input_val[j + i4_ip_sort_level]) |
1945 | 0 | { |
1946 | | /** copy to output array **/ |
1947 | 0 | pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level]; |
1948 | 0 | j++; |
1949 | 0 | } |
1950 | 0 | else |
1951 | 0 | { |
1952 | | /** copy to output array **/ |
1953 | 0 | pi4_temp[k] = pi4_input_val[i]; |
1954 | 0 | i++; |
1955 | 0 | } |
1956 | 0 | } |
1957 | 0 | else if(i == i4_ip_sort_level) |
1958 | 0 | { |
1959 | | /** copy the remaining data to output array **/ |
1960 | 0 | pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level]; |
1961 | 0 | j++; |
1962 | 0 | } |
1963 | 0 | else |
1964 | 0 | { |
1965 | | /** copy the remaining data to output array **/ |
1966 | 0 | pi4_temp[k] = pi4_input_val[i]; |
1967 | 0 | i++; |
1968 | 0 | } |
1969 | 0 | } |
1970 | 0 | } |
1971 | 0 | pi4_input_val += (i4_ip_sort_level * 2); |
1972 | 0 | pi4_temp += (i4_ip_sort_level * 2); |
1973 | 0 | } |
1974 | 0 | pi4_input_val = pi4_temp - i4_length; |
1975 | |
|
1976 | 0 | if(level % 2) |
1977 | 0 | { |
1978 | | /** Assign a temp address for storing next sort level output as we will not need this data as output **/ |
1979 | 0 | pi4_temp = pi4_temp_buf_cpy; |
1980 | 0 | } |
1981 | 0 | else |
1982 | 0 | { |
1983 | | /** Assign address for storing the intermediate data into output 2-D array **/ |
1984 | 0 | pi4_temp = aai4_output_val[level / 2]; |
1985 | 0 | } |
1986 | 0 | i4_ip_sort_level *= 2; |
1987 | 0 | } |
1988 | 0 | } |
1989 | | |
1990 | | /*! |
1991 | | ************************************************************************ |
1992 | | * @brief |
1993 | | * Calculate the average activities at 16*16 (8*8 in L1) and 32*32 |
1994 | | * (8*8 in L2) block sizes. As this function accumulates activities |
1995 | | * across blocks of a frame, this needs to be called by only one thread |
1996 | | * and only after ensuring the processing of entire frame is done |
1997 | | ************************************************************************ |
1998 | | */ |
1999 | | void ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit( |
2000 | | void *pv_pre_intra_ctxt, |
2001 | | pre_enc_me_ctxt_t *ps_curr_out, |
2002 | | frm_ctb_ctxt_t *ps_frm_ctb_prms) |
2003 | 0 | { |
2004 | 0 | ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_pre_intra_ctxt; |
2005 | 0 | ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0]; |
2006 | |
|
2007 | 0 | ULWORD64 u8_frame_8x8_sum_act_sqr = 0; |
2008 | 0 | LWORD64 ai8_frame_8x8_sum_act_sqr[2] = { 0, 0 }; |
2009 | 0 | WORD32 ai4_frame_8x8_sum_act[2] = { 0, 0 }; |
2010 | 0 | WORD32 ai4_frame_8x8_sum_blks[2] = { 0, 0 }; |
2011 | |
|
2012 | 0 | LWORD64 ai8_frame_16x16_sum_act_sqr[3] = { 0, 0, 0 }; |
2013 | 0 | WORD32 ai4_frame_16x16_sum_act[3] = { 0, 0, 0 }; |
2014 | 0 | WORD32 ai4_frame_16x16_sum_blks[3] = { 0, 0, 0 }; |
2015 | |
|
2016 | 0 | LWORD64 ai8_frame_32x32_sum_act_sqr[3] = { 0, 0, 0 }; |
2017 | 0 | WORD32 ai4_frame_32x32_sum_act[3] = { 0, 0, 0 }; |
2018 | 0 | WORD32 ai4_frame_32x32_sum_blks[3] = { 0, 0, 0 }; |
2019 | |
|
2020 | 0 | ihevce_ed_ctb_l1_t *ps_ed_ctb_pic_l1 = ps_curr_out->ps_ed_ctb_l1; |
2021 | 0 | ihevce_ed_blk_t *ps_ed_blk_l1 = ps_curr_out->ps_layer1_buf; |
2022 | 0 | WORD32 ctb_wd = ps_ctxt->as_layers[1].i4_decomp_blk_wd; |
2023 | 0 | WORD32 h_ctb_cnt = ps_ctxt->as_layers[1].i4_num_col_blks; |
2024 | 0 | WORD32 v_ctb_cnt = ps_ctxt->as_layers[1].i4_num_row_blks; |
2025 | 0 | WORD32 sub_blk_cnt = ((ctb_wd >> 2) * (ctb_wd >> 2)); |
2026 | 0 | WORD32 i4_avg_noise_satd; |
2027 | 0 | WORD32 ctb_ctr, vert_ctr; |
2028 | 0 | WORD32 i, j, k; |
2029 | |
|
2030 | 0 | { |
2031 | | /* Calculate min noise threshold */ |
2032 | | /* Min noise threshold is calculated by taking average of lowest 1% satd val in |
2033 | | * the complete 4x4 frame satds */ |
2034 | 0 | #define MAX_SATD 64 |
2035 | 0 | #define SATD_NOISE_FLOOR_THRESHOLD 16 |
2036 | 0 | #define MIN_BLKS 2 |
2037 | 0 | WORD32 i4_layer_wd = ps_ctxt->as_layers[1].i4_actual_wd; |
2038 | 0 | WORD32 i4_layer_ht = ps_ctxt->as_layers[1].i4_actual_ht; |
2039 | 0 | WORD32 i4_min_blk = ((MIN_BLKS * (i4_layer_wd >> 1) * (i4_layer_ht >> 1)) / 100); |
2040 | 0 | WORD32 i4_total_blks = 0; |
2041 | 0 | WORD32 satd_hist[MAX_SATD]; |
2042 | 0 | LWORD64 i8_acc_satd = 0; |
2043 | |
|
2044 | 0 | memset(satd_hist, 0, sizeof(satd_hist)); |
2045 | 0 | for(i = 0; i < sub_blk_cnt * h_ctb_cnt * v_ctb_cnt; i++) |
2046 | 0 | { |
2047 | 0 | if(ps_ed_blk_l1[i].i4_4x4_satd >= 0 && ps_ed_blk_l1[i].i4_4x4_satd < MAX_SATD) |
2048 | 0 | { |
2049 | 0 | satd_hist[ps_ed_blk_l1[i].i4_4x4_satd]++; |
2050 | 0 | } |
2051 | 0 | } |
2052 | 0 | for(i = 0; i < MAX_SATD && i4_total_blks <= i4_min_blk; i++) |
2053 | 0 | { |
2054 | 0 | i4_total_blks += satd_hist[i]; |
2055 | 0 | i8_acc_satd += (i * satd_hist[i]); |
2056 | 0 | } |
2057 | 0 | if(i4_total_blks < i4_min_blk) |
2058 | 0 | { |
2059 | 0 | i4_avg_noise_satd = SATD_NOISE_FLOOR_THRESHOLD; |
2060 | 0 | } |
2061 | 0 | else |
2062 | 0 | { |
2063 | 0 | i4_avg_noise_satd = (WORD32)(i8_acc_satd + (i4_total_blks >> 1)) / i4_total_blks; |
2064 | 0 | } |
2065 | 0 | ps_curr_out->i4_avg_noise_thrshld_4x4 = i4_avg_noise_satd; |
2066 | 0 | } |
2067 | |
|
2068 | 0 | for(vert_ctr = 0; vert_ctr < v_ctb_cnt; vert_ctr++) |
2069 | 0 | { |
2070 | 0 | ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 = |
2071 | 0 | ps_ed_ctb_pic_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz; |
2072 | 0 | ihevce_ed_blk_t *ps_ed = ps_ed_blk_l1 + (vert_ctr * sub_blk_cnt * h_ctb_cnt); |
2073 | |
|
2074 | 0 | for(ctb_ctr = 0; ctb_ctr < h_ctb_cnt; ctb_ctr++, ps_ed += sub_blk_cnt) |
2075 | 0 | { |
2076 | 0 | ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr; |
2077 | 0 | WORD8 b8_satd_eval[4]; |
2078 | 0 | WORD32 ai4_satd_4x4[64]; |
2079 | 0 | WORD32 ai4_satd_8x8[16]; // derived from accumulating 4x4 satds |
2080 | 0 | WORD32 ai4_satd_16x16[4] = { 0 }; // derived from accumulating 8x8 satds |
2081 | 0 | WORD32 i4_satd_32x32 = 0; // derived from accumulating 8x8 satds |
2082 | | /* This 2-D array will contain 4x4 satds sorted in ascending order in sets |
2083 | | * of 4, 16, 64 For example : '5 10 2 7 6 12 3 1' array input will return |
2084 | | * '2 5 7 10 1 3 6 12' if sorted in sets of 4 */ |
2085 | 0 | WORD32 aai4_sort_4_16_64_satd[3][64]; |
2086 | | /* This 2-D array will contain 8x8 satds sorted in ascending order in sets of |
2087 | | * 4, 16***/ |
2088 | 0 | WORD32 aai4_sort_4_16_satd[2][64]; |
2089 | |
|
2090 | 0 | memset(b8_satd_eval, 1, sizeof(b8_satd_eval)); |
2091 | 0 | for(i = 0; i < 4; i++) |
2092 | 0 | { |
2093 | 0 | ihevce_ed_blk_t *ps_ed_b32 = &ps_ed[i * 16]; |
2094 | |
|
2095 | 0 | for(j = 0; j < 4; j++) |
2096 | 0 | { |
2097 | 0 | ihevce_ed_blk_t *ps_ed_b16 = &ps_ed_b32[j * 4]; |
2098 | 0 | WORD32 satd_sum = 0; |
2099 | 0 | WORD32 blk_cnt = 0; |
2100 | |
|
2101 | 0 | for(k = 0; k < 4; k++) |
2102 | 0 | { |
2103 | 0 | ihevce_ed_blk_t *ps_ed_b4 = &ps_ed_b16[k]; |
2104 | |
|
2105 | 0 | if(-1 != ps_ed_b4->i4_4x4_satd) |
2106 | 0 | { |
2107 | 0 | #define SUB_NOISE_THRSHLD 0 |
2108 | | #if SUB_NOISE_THRSHLD |
2109 | | ps_ed_b4->i4_4x4_satd = ps_ed_b4->i4_4x4_satd - i4_avg_noise_satd; |
2110 | | if(ps_ed_b4->i4_4x4_satd < 0) |
2111 | | { |
2112 | | ps_ed_b4->i4_4x4_satd = 0; |
2113 | | } |
2114 | | #else |
2115 | 0 | if(ps_ed_b4->i4_4x4_satd < i4_avg_noise_satd) |
2116 | 0 | { |
2117 | 0 | ps_ed_b4->i4_4x4_satd = i4_avg_noise_satd; |
2118 | 0 | } |
2119 | 0 | #endif |
2120 | 0 | blk_cnt++; |
2121 | 0 | satd_sum += ps_ed_b4->i4_4x4_satd; |
2122 | 0 | } |
2123 | 0 | ai4_satd_4x4[i * 16 + j * 4 + k] = ps_ed_b4->i4_4x4_satd; |
2124 | 0 | } |
2125 | 0 | ASSERT(blk_cnt == 0 || blk_cnt == 4); |
2126 | 0 | if(blk_cnt == 0) |
2127 | 0 | { |
2128 | 0 | satd_sum = -1; |
2129 | 0 | } |
2130 | 0 | ai4_satd_8x8[i * 4 + j] = satd_sum; |
2131 | 0 | ai4_satd_16x16[i] += satd_sum; |
2132 | 0 | i4_satd_32x32 += satd_sum; |
2133 | 0 | ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j] = satd_sum; |
2134 | 0 | } |
2135 | 0 | } |
2136 | | |
2137 | 0 | { |
2138 | | /* This function will sort 64 elements in array ai4_satd_4x4 in ascending order |
2139 | | * to 3 arrays in sets of 4, 16, 64 into the 2-D array aai4_min_4_16_64_satd */ |
2140 | 0 | WORD32 array_length = sizeof(ai4_satd_4x4) / sizeof(WORD32); |
2141 | 0 | ihevce_merge_sort( |
2142 | 0 | &ai4_satd_4x4[0], aai4_sort_4_16_64_satd, array_length, 1, 64); |
2143 | | |
2144 | | /* This function will sort 64 elements in array ai4_satd_8x8 in ascending order |
2145 | | * to 2 arrays in sets of 4, 16 into the 2-D array aai4_sum_4_16_satd_ctb */ |
2146 | 0 | array_length = sizeof(ai4_satd_8x8) / sizeof(WORD32); |
2147 | 0 | ihevce_merge_sort( |
2148 | 0 | &ai4_satd_8x8[0], aai4_sort_4_16_satd, array_length, 1, 16); |
2149 | 0 | } |
2150 | | |
2151 | | /* Populate avg satd to calculate modulation index and activity factors */ |
2152 | | /* 16x16 */ |
2153 | 0 | for(i = 0; i < 4; i++) |
2154 | 0 | { |
2155 | 0 | for(j = 0; j < 4; j++) |
2156 | 0 | { |
2157 | 0 | WORD32 satd_sum = ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j]; |
2158 | 0 | WORD32 satd_min = aai4_sort_4_16_64_satd[0][i * 16 + j * 4 + MEDIAN_CU_TU]; |
2159 | |
|
2160 | 0 | ASSERT(-2 != satd_sum); |
2161 | 0 | ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j] = satd_min; |
2162 | |
|
2163 | 0 | if(-1 != satd_sum) |
2164 | 0 | { |
2165 | 0 | ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = satd_sum; |
2166 | 0 | ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = satd_min; |
2167 | |
|
2168 | 0 | u8_frame_8x8_sum_act_sqr += (satd_sum * satd_sum); |
2169 | 0 | ai4_frame_8x8_sum_act[0] += satd_sum; |
2170 | 0 | ai8_frame_8x8_sum_act_sqr[0] += (satd_sum * satd_sum); |
2171 | 0 | ai4_frame_8x8_sum_blks[0] += 1; |
2172 | 0 | ai4_frame_8x8_sum_act[1] += satd_min; |
2173 | 0 | ai8_frame_8x8_sum_act_sqr[1] += (satd_min * satd_min); |
2174 | 0 | ai4_frame_8x8_sum_blks[1] += 1; |
2175 | 0 | } |
2176 | 0 | else |
2177 | 0 | { |
2178 | 0 | ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = -1; |
2179 | 0 | ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = -1; |
2180 | 0 | b8_satd_eval[i] = 0; |
2181 | 0 | } |
2182 | 0 | } |
2183 | | |
2184 | 0 | if(b8_satd_eval[i]) |
2185 | 0 | { |
2186 | 0 | ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = ai4_satd_16x16[i]; |
2187 | 0 | ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = aai4_sort_4_16_satd[0][i * 4 + MEDIAN_CU_TU]; |
2188 | 0 | ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = aai4_sort_4_16_64_satd[1][i * 16 + MEDIAN_CU_TU_BY_2]; |
2189 | |
|
2190 | 0 | for(k = 0; k < 3; k++) |
2191 | 0 | { |
2192 | 0 | WORD32 satd = ps_ed_ctb_curr_l1->i4_16x16_satd[i][k]; |
2193 | |
|
2194 | 0 | ai4_frame_16x16_sum_act[k] += satd; |
2195 | 0 | ai8_frame_16x16_sum_act_sqr[k] += (satd * satd); |
2196 | 0 | ai4_frame_16x16_sum_blks[k] += 1; |
2197 | 0 | } |
2198 | 0 | } |
2199 | 0 | else |
2200 | 0 | { |
2201 | 0 | ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = -1; |
2202 | 0 | ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = -1; |
2203 | 0 | ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = -1; |
2204 | 0 | } |
2205 | 0 | } |
2206 | | |
2207 | | /*32x32*/ |
2208 | 0 | if(b8_satd_eval[0] && b8_satd_eval[1] && b8_satd_eval[2] && b8_satd_eval[3]) |
2209 | 0 | { |
2210 | 0 | WORD32 aai4_sort_4_satd[1][64]; |
2211 | 0 | WORD32 array_length = sizeof(ai4_satd_16x16) / sizeof(WORD32); |
2212 | 0 | WORD32 satd; |
2213 | | |
2214 | | /* Sort 4 elements in ascending order */ |
2215 | 0 | ihevce_merge_sort(ai4_satd_16x16, aai4_sort_4_satd, array_length, 1, 4); |
2216 | |
|
2217 | 0 | ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = aai4_sort_4_satd[0][MEDIAN_CU_TU]; |
2218 | 0 | ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = aai4_sort_4_16_satd[1][MEDIAN_CU_TU_BY_2]; |
2219 | 0 | ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = aai4_sort_4_16_64_satd[2][MEDIAN_CU_TU_BY_4]; |
2220 | 0 | ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = i4_satd_32x32; |
2221 | |
|
2222 | 0 | for(k = 0; k < 3; k++) |
2223 | 0 | { |
2224 | 0 | WORD32 satd = ps_ed_ctb_curr_l1->i4_32x32_satd[0][k]; |
2225 | |
|
2226 | 0 | ai4_frame_32x32_sum_act[k] += satd; |
2227 | 0 | ai8_frame_32x32_sum_act_sqr[k] += (satd * satd); |
2228 | 0 | ai4_frame_32x32_sum_blks[k] += 1; |
2229 | 0 | } |
2230 | 0 | } |
2231 | 0 | else |
2232 | 0 | { |
2233 | 0 | ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = -1; |
2234 | 0 | ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = -1; |
2235 | 0 | ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = -1; |
2236 | 0 | ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = -1; |
2237 | 0 | } |
2238 | 0 | } |
2239 | 0 | } |
2240 | | |
2241 | 0 | for(i = 0; i < 2; i++) |
2242 | 0 | { |
2243 | | /*8x8*/ |
2244 | 0 | #if USE_SQRT_AVG_OF_SATD_SQR |
2245 | 0 | ps_curr_out->i8_curr_frame_8x8_sum_act[i] = ai8_frame_8x8_sum_act_sqr[i]; |
2246 | | #else |
2247 | | ps_curr_out->i8_curr_frame_8x8_sum_act[i] = ai4_frame_8x8_sum_act[i]; |
2248 | | #endif |
2249 | 0 | ps_curr_out->i4_curr_frame_8x8_sum_act_for_strength[i] = ai4_frame_8x8_sum_act[i]; |
2250 | 0 | ps_curr_out->i4_curr_frame_8x8_num_blks[i] = ai4_frame_8x8_sum_blks[i]; |
2251 | 0 | ps_curr_out->u8_curr_frame_8x8_sum_act_sqr = u8_frame_8x8_sum_act_sqr; |
2252 | | |
2253 | | /*16x16*/ |
2254 | 0 | #if USE_SQRT_AVG_OF_SATD_SQR |
2255 | 0 | ps_curr_out->i8_curr_frame_16x16_sum_act[i] = ai8_frame_16x16_sum_act_sqr[i]; |
2256 | | #else |
2257 | | ps_curr_out->i8_curr_frame_16x16_sum_act[i] = ai4_frame_16x16_sum_act[i]; |
2258 | | #endif |
2259 | 0 | ps_curr_out->i4_curr_frame_16x16_num_blks[i] = ai4_frame_16x16_sum_blks[i]; |
2260 | | |
2261 | | /*32x32*/ |
2262 | 0 | #if USE_SQRT_AVG_OF_SATD_SQR |
2263 | 0 | ps_curr_out->i8_curr_frame_32x32_sum_act[i] = ai8_frame_32x32_sum_act_sqr[i]; |
2264 | | #else |
2265 | | ps_curr_out->i8_curr_frame_32x32_sum_act[i] = ai4_frame_32x32_sum_act[i]; |
2266 | | #endif |
2267 | 0 | ps_curr_out->i4_curr_frame_32x32_num_blks[i] = ai4_frame_32x32_sum_blks[i]; |
2268 | 0 | } |
2269 | | |
2270 | | /*16x16*/ |
2271 | 0 | #if USE_SQRT_AVG_OF_SATD_SQR |
2272 | 0 | ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai8_frame_16x16_sum_act_sqr[2]; |
2273 | | #else |
2274 | | ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai4_frame_16x16_sum_act[2]; |
2275 | | #endif |
2276 | 0 | ps_curr_out->i4_curr_frame_16x16_num_blks[2] = ai4_frame_16x16_sum_blks[2]; |
2277 | | |
2278 | | /*32x32*/ |
2279 | 0 | #if USE_SQRT_AVG_OF_SATD_SQR |
2280 | 0 | ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai8_frame_32x32_sum_act_sqr[2]; |
2281 | | #else |
2282 | | ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai4_frame_32x32_sum_act[2]; |
2283 | | #endif |
2284 | 0 | ps_curr_out->i4_curr_frame_32x32_num_blks[2] = ai4_frame_32x32_sum_blks[2]; |
2285 | 0 | } |
2286 | | |
2287 | | /*! |
2288 | | ************************************************************************ |
2289 | | * @brief |
2290 | | * accumulate L1 intra satd across all threads. |
2291 | | * Note: call to this function has to be made after all threads have |
2292 | | * finished preintra processing |
2293 | | * |
2294 | | ************************************************************************ |
2295 | | */ |
2296 | | LWORD64 ihevce_decomp_pre_intra_get_frame_satd(void *pv_ctxt, WORD32 *wd, WORD32 *ht) |
2297 | 0 | { |
2298 | 0 | ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt; |
2299 | 0 | ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0]; |
2300 | 0 | LWORD64 satd_sum = ps_ctxt->ps_ed_ctxt->i8_sum_best_satd; |
2301 | 0 | WORD32 i; |
2302 | |
|
2303 | 0 | *wd = ps_ctxt->as_layers[1].i4_actual_wd; |
2304 | 0 | *ht = ps_ctxt->as_layers[1].i4_actual_ht; |
2305 | 0 | for(i = 1; i < ps_master_ctxt->i4_num_proc_thrds; i++) |
2306 | 0 | { |
2307 | 0 | ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i]; |
2308 | 0 | satd_sum += ps_ctxt->ps_ed_ctxt->i8_sum_best_satd; |
2309 | 0 | } |
2310 | |
|
2311 | 0 | return satd_sum; |
2312 | 0 | } |
2313 | | |
2314 | | LWORD64 ihevce_decomp_pre_intra_get_frame_satd_squared(void *pv_ctxt, WORD32 *wd, WORD32 *ht) |
2315 | 0 | { |
2316 | 0 | ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt = pv_ctxt; |
2317 | 0 | ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0]; |
2318 | 0 | LWORD64 satd_sum = ps_ctxt->ps_ed_ctxt->i8_sum_sq_best_satd; |
2319 | 0 | WORD32 i; |
2320 | |
|
2321 | 0 | *wd = ps_ctxt->as_layers[1].i4_actual_wd; |
2322 | 0 | *ht = ps_ctxt->as_layers[1].i4_actual_ht; |
2323 | 0 | for(i = 1; i < ps_master_ctxt->i4_num_proc_thrds; i++) |
2324 | 0 | { |
2325 | 0 | ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i]; |
2326 | 0 | satd_sum += ps_ctxt->ps_ed_ctxt->i8_sum_sq_best_satd; |
2327 | 0 | } |
2328 | |
|
2329 | 0 | return satd_sum; |
2330 | 0 | } |