/src/libhevc/encoder/ihevce_common_utils.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /*! |
21 | | ****************************************************************************** |
22 | | * \file ihevce_common_utils.c |
23 | | * |
24 | | * \brief |
25 | | * Contains definitions of common utility functions used across encoder |
26 | | * |
27 | | * \date |
28 | | * 18/09/2012 |
29 | | * |
30 | | * \author |
31 | | * ittiam |
32 | | * |
33 | | * List of Functions |
34 | | * ihevce_copy_2d() |
35 | | * ihevce_hbd_copy_2d() |
36 | | * ihevce_2d_square_copy_luma() |
37 | | * ihevce_wt_avg_2d() |
38 | | * ihevce_itrans_recon_dc_compute() |
39 | | * ihevce_itrans_recon_dc() |
40 | | * ihevce_hbd_itrans_recon_dc() |
41 | | * ihevce_truncate_16bit_data_to_8bit() |
42 | | * ihevce_convert_16bit_recon_to_8bit() |
43 | | * ihevce_convert_16bit_input_to_8bit() |
44 | | * ihevce_find_num_clusters_of_identical_points_1D() |
45 | | * ihevce_hbd_compute_ssd() |
46 | | * ihevce_compare_pu_mv_t() |
47 | | * ihevce_set_pred_buf_as_free() |
48 | | * ihevce_get_free_pred_buf_indices() |
49 | | * ihevce_scale_mv() |
50 | | * ihevce_osal_alloc() |
51 | | * ihevce_osal_free() |
52 | | * ihevce_osal_init() |
53 | | * ihevce_osal_delete() |
54 | | * ihevce_sum_abs_seq() |
55 | | * ihevce_ssd_calculator() |
56 | | * ihevce_chroma_interleave_ssd_calculator() |
57 | | * ihevce_ssd_and_sad_calculator() |
58 | | * ihevce_chroma_interleave_2d_copy() |
59 | | * ihevce_hbd_chroma_interleave_2d_copy() |
60 | | * ihevce_hbd_chroma_interleave_ssd_calculator() |
61 | | * ihevce_get_chroma_eo_sao_params() |
62 | | * ihevce_get_chroma_eo_sao_params_hbd() |
63 | | * ihevce_compute_area_of_valid_cus_in_ctb() |
64 | | * ihevce_create_cuNode_children() |
65 | | * ihevce_cu_tree_init() |
66 | | * |
67 | | ****************************************************************************** |
68 | | */ |
69 | | |
70 | | /*****************************************************************************/ |
71 | | /* File Includes */ |
72 | | /*****************************************************************************/ |
73 | | |
74 | | /* System include files */ |
75 | | #include <stdio.h> |
76 | | #include <stdlib.h> |
77 | | #include <assert.h> |
78 | | #include <string.h> |
79 | | |
80 | | /* User include files */ |
81 | | #include "ihevc_typedefs.h" |
82 | | #include "itt_video_api.h" |
83 | | #include "ihevce_api.h" |
84 | | |
85 | | #include "rc_cntrl_param.h" |
86 | | #include "rc_frame_info_collector.h" |
87 | | #include "rc_look_ahead_params.h" |
88 | | |
89 | | #include "ihevc_defs.h" |
90 | | #include "ihevc_debug.h" |
91 | | #include "ihevc_structs.h" |
92 | | #include "ihevc_platform_macros.h" |
93 | | #include "ihevc_deblk.h" |
94 | | #include "ihevc_itrans_recon.h" |
95 | | #include "ihevc_chroma_itrans_recon.h" |
96 | | #include "ihevc_chroma_intra_pred.h" |
97 | | #include "ihevc_intra_pred.h" |
98 | | #include "ihevc_inter_pred.h" |
99 | | #include "ihevc_mem_fns.h" |
100 | | #include "ihevc_padding.h" |
101 | | #include "ihevc_weighted_pred.h" |
102 | | #include "ihevc_sao.h" |
103 | | #include "ihevc_resi_trans.h" |
104 | | #include "ihevc_quant_iquant_ssd.h" |
105 | | #include "ihevc_cabac_tables.h" |
106 | | |
107 | | #include "ihevce_defs.h" |
108 | | #include "ihevce_hle_interface.h" |
109 | | #include "ihevce_lap_enc_structs.h" |
110 | | #include "ihevce_multi_thrd_structs.h" |
111 | | #include "ihevce_me_common_defs.h" |
112 | | #include "ihevce_had_satd.h" |
113 | | #include "ihevce_error_codes.h" |
114 | | #include "ihevce_bitstream.h" |
115 | | #include "ihevce_cabac.h" |
116 | | #include "ihevce_rdoq_macros.h" |
117 | | #include "ihevce_function_selector.h" |
118 | | #include "ihevce_enc_structs.h" |
119 | | #include "ihevce_entropy_structs.h" |
120 | | #include "ihevce_cmn_utils_instr_set_router.h" |
121 | | #include "ihevce_enc_loop_structs.h" |
122 | | #include "ihevce_common_utils.h" |
123 | | #include "ihevce_global_tables.h" |
124 | | |
125 | | #include "cast_types.h" |
126 | | #include "osal.h" |
127 | | #include "osal_defaults.h" |
128 | | |
129 | | /*****************************************************************************/ |
130 | | /* Function Definitions */ |
131 | | /*****************************************************************************/ |
132 | | |
133 | | /** |
134 | | ****************************************************************************** |
135 | | * |
136 | | * @brief Performs the 2D copy |
137 | | * |
138 | | * @par Description |
139 | | * This routine Performs the 2D copy |
140 | | * |
141 | | * @param[inout] pu1_dst |
142 | | * pointer to the destination buffer |
143 | | * |
144 | | * @param[in] dst_strd |
145 | | * destination stride in terms of the size of input/output unit |
146 | | * |
147 | | * @param[inout] pu1_src |
148 | | * pointer to the source buffer |
149 | | * |
150 | | * @param[in] src_strd |
151 | | * source stride in terms of the size of input/output unit |
152 | | * |
153 | | * @param[in] blk_wd |
154 | | * number of samples to copy in a row |
155 | | * |
156 | | * @param[in] blk_ht |
157 | | * number of rows to copy |
158 | | * |
159 | | ****************************************************************************** |
160 | | */ |
161 | | void ihevce_copy_2d( |
162 | | UWORD8 *pu1_dst, |
163 | | WORD32 dst_stride, |
164 | | UWORD8 *pu1_src, |
165 | | WORD32 src_stride, |
166 | | WORD32 blk_wd, |
167 | | WORD32 blk_ht) |
168 | 31.2M | { |
169 | 31.2M | WORD32 i; |
170 | | |
171 | 418M | for(i = 0; i < blk_ht; i++) |
172 | 386M | { |
173 | 386M | memcpy(pu1_dst, pu1_src, blk_wd); |
174 | 386M | pu1_dst += dst_stride; |
175 | 386M | pu1_src += src_stride; |
176 | 386M | } |
177 | 31.2M | } |
178 | | |
179 | | /** |
180 | | ****************************************************************************** |
181 | | * |
182 | | * @brief Performs the 2D copy of luma data |
183 | | * |
184 | | * @par Description |
185 | | * This routine performs the 2D square copy of luma data |
186 | | * |
187 | | * @param[inout] p_dst |
188 | | * pointer to the destination buffer |
189 | | * |
190 | | * @param[in] dst_strd |
191 | | * destination stride in terms of the size of input/output unit |
192 | | * |
193 | | * @param[inout] p_src |
194 | | * pointer to the source buffer |
195 | | * |
196 | | * @param[in] src_strd |
197 | | * source stride in terms of the size of input/output unit |
198 | | * |
199 | | * @param[in] num_cols_to_copy |
200 | | * number of units in a line to copy from src to dst buffer |
201 | | * Assumption : num_cols_to_copy <= min (dst_strd, src_strd) |
202 | | * |
203 | | * @param[in] unit_size |
204 | | * size of the unit in bytes |
205 | | * |
206 | | * @return none |
207 | | * |
208 | | * Assumptions : num_cols_to_copy = num_lines_to_copy, |
209 | | * num_lines_to_copy can have {4, 16, 32, 64} |
210 | | * |
211 | | ****************************************************************************** |
212 | | */ |
213 | | void ihevce_2d_square_copy_luma( |
214 | | void *p_dst, |
215 | | WORD32 dst_strd, |
216 | | void *p_src, |
217 | | WORD32 src_strd, |
218 | | WORD32 num_cols_to_copy, |
219 | | WORD32 unit_size) |
220 | 18.3M | { |
221 | 18.3M | UWORD8 *pu1_dst = (UWORD8 *)p_dst; |
222 | 18.3M | UWORD8 *pu1_src = (UWORD8 *)p_src; |
223 | 18.3M | WORD32 i; |
224 | | |
225 | 240M | for(i = 0; i < num_cols_to_copy; i++) |
226 | 222M | { |
227 | 222M | memcpy(pu1_dst, pu1_src, (num_cols_to_copy * unit_size)); |
228 | 222M | pu1_dst += (dst_strd * unit_size); |
229 | 222M | pu1_src += (src_strd * unit_size); |
230 | 222M | } |
231 | 18.3M | } |
232 | | |
233 | | /** |
234 | | ******************************************************************************** |
235 | | * |
236 | | * @brief Weighted pred of 2 predictor buffers as per spec |
237 | | * |
238 | | * @param[in] pu1_pred0 : Pred0 buffer |
239 | | * |
240 | | * @param[in] pu1_pred1 : Pred1 buffer |
241 | | * |
242 | | * @param[in] pred0_strd : Stride of pred0 buffer |
243 | | * |
244 | | * @param[in] pred1_strd : Stride of pred1 buffer |
245 | | * |
246 | | * @param[in] wd : Width of pred block |
247 | | * |
248 | | * @param[in] ht : Height of pred block |
249 | | * |
250 | | * @param[out] pu1_dst : Destination buffer that will hold result |
251 | | * |
252 | | * @param[in] dst_strd : Stride of dest buffer |
253 | | * |
254 | | * @param[in] w0 : Weighting factor of Pred0 |
255 | | * |
256 | | * @param[in] w1 : weighting factor of pred1 |
257 | | * |
258 | | * @param[in] o0 : offset for pred0 |
259 | | * |
260 | | * @param[in] o1 : offset for pred1 |
261 | | * |
262 | | * @param[in] log_wdc : shift factor as per spec |
263 | | * |
264 | | * @return none |
265 | | * |
266 | | ******************************************************************************** |
267 | | */ |
268 | | void ihevce_wt_avg_2d( |
269 | | UWORD8 *pu1_pred0, |
270 | | UWORD8 *pu1_pred1, |
271 | | WORD32 pred0_strd, |
272 | | WORD32 pred1_strd, |
273 | | WORD32 wd, |
274 | | WORD32 ht, |
275 | | UWORD8 *pu1_dst, |
276 | | WORD32 dst_strd, |
277 | | WORD32 w0, |
278 | | WORD32 w1, |
279 | | WORD32 o0, |
280 | | WORD32 o1, |
281 | | WORD32 log_wdc) |
282 | 3.28M | { |
283 | | /* Total Rounding term to be added, including offset */ |
284 | 3.28M | WORD32 rnd = (o0 + o1 + 1) >> 1; // << log_wdc; |
285 | | /* Downshift */ |
286 | 3.28M | WORD32 shift = log_wdc + 1; |
287 | | /* loop counters */ |
288 | 3.28M | WORD32 i, j; |
289 | | |
290 | | /* Dst = ((w0*p0 + w1*p1) + ((o0 + o1 + 1) << logWDc)) >> (logWDc + 1) */ |
291 | | /* In above formula, the additive term is constant and is evaluated */ |
292 | | /* outside loop and stored as "rnd". */ |
293 | 39.1M | for(i = 0; i < ht; i++) |
294 | 35.8M | { |
295 | 634M | for(j = 0; j < wd; j++) |
296 | 598M | { |
297 | 598M | WORD32 tmp; |
298 | 598M | tmp = IHEVCE_WT_PRED(pu1_pred0[j], pu1_pred1[j], w0, w1, rnd, shift); |
299 | 598M | pu1_dst[j] = (UWORD8)(CLIP3(tmp, 0, 255)); |
300 | 598M | } |
301 | 35.8M | pu1_pred0 += pred0_strd; |
302 | 35.8M | pu1_pred1 += pred1_strd; |
303 | 35.8M | pu1_dst += dst_strd; |
304 | 35.8M | } |
305 | 3.28M | } |
306 | | /** |
307 | | ****************************************************************************** |
308 | | * |
309 | | * @brief Performs the Recon for DC only coefficient case |
310 | | * |
311 | | * @par Description |
312 | | * This routine performs the Recon for DC only coefficient case |
313 | | * |
314 | | * @param[inout] pu1_dst |
315 | | * pointer to the destination buffer |
316 | | * |
317 | | * @param[in] pu1_pred |
318 | | * pointer to the pred buffer |
319 | | * |
320 | | * @param[in] dst_strd |
321 | | * destination stride |
322 | | * |
323 | | * @param[in] pred_strd |
324 | | * pred buffer stride |
325 | | * |
326 | | * @param[in] trans_size |
327 | | * transform size |
328 | | * |
329 | | * @param[in] col_mult |
330 | | * chroma multiplier |
331 | | * |
332 | | * @param[in] dc_value |
333 | | * residue value |
334 | | * |
335 | | * @return none |
336 | | * |
337 | | ****************************************************************************** |
338 | | */ |
339 | | static INLINE void ihevce_itrans_recon_dc_compute( |
340 | | UWORD8 *pu1_dst, |
341 | | UWORD8 *pu1_pred, |
342 | | WORD32 dst_strd, |
343 | | WORD32 pred_strd, |
344 | | WORD32 trans_size, |
345 | | WORD32 col_mult, |
346 | | WORD32 dc_value) |
347 | 512k | { |
348 | 512k | WORD32 row, col; |
349 | | |
350 | 5.52M | for(row = 0; row < trans_size; row++) |
351 | 5.01M | { |
352 | 87.7M | for(col = 0; col < trans_size; col++) |
353 | 82.7M | { |
354 | 82.7M | pu1_dst[row * dst_strd + col * col_mult] = |
355 | 82.7M | CLIP_U8(pu1_pred[row * pred_strd + col * col_mult] + dc_value); |
356 | 82.7M | } |
357 | 5.01M | } |
358 | 512k | } |
359 | | |
360 | | /** |
361 | | ****************************************************************************** |
362 | | * |
363 | | * @brief Performs the IQ+IT+Recon for DC only coefficient case |
364 | | * |
365 | | * @par Description |
366 | | * This routine performs the IQ+IT+Recon for DC only coefficient case |
367 | | * |
368 | | * @param[in] pu1_pred |
369 | | * pointer to the pred buffer |
370 | | * |
371 | | * @param[in] pred_strd |
372 | | * pred buffer stride |
373 | | * |
374 | | * @param[inout] pu1_dst |
375 | | * pointer to the destination buffer |
376 | | * |
377 | | * @param[in] dst_strd |
378 | | * destination stride |
379 | | * |
380 | | * @param[in] trans_size |
381 | | * transform size |
382 | | * |
383 | | * @param[in] i2_deq_value |
384 | | * Dequant Coeffs |
385 | | * |
386 | | * @param[in] chroma plane |
387 | | * -1 : luma, 0 : chroma U, 1 : chroma V |
388 | | * |
389 | | * @return none |
390 | | * |
391 | | ****************************************************************************** |
392 | | */ |
393 | | void ihevce_itrans_recon_dc( |
394 | | UWORD8 *pu1_pred, |
395 | | WORD32 pred_strd, |
396 | | UWORD8 *pu1_dst, |
397 | | WORD32 dst_strd, |
398 | | WORD32 trans_size, |
399 | | WORD16 i2_deq_value, |
400 | | CHROMA_PLANE_ID_T e_chroma_plane) |
401 | 512k | { |
402 | 512k | WORD32 add, shift; |
403 | 512k | WORD32 dc_value; |
404 | 512k | UWORD8 *pu1_pred_tmp, *pu1_dst_tmp; |
405 | 512k | WORD32 col_mult; |
406 | | |
407 | 512k | assert(e_chroma_plane == NULL_PLANE || e_chroma_plane == U_PLANE || e_chroma_plane == V_PLANE); |
408 | 512k | if(e_chroma_plane == NULL_PLANE) |
409 | 197k | { |
410 | 197k | pu1_pred_tmp = pu1_pred; |
411 | 197k | pu1_dst_tmp = pu1_dst; |
412 | 197k | col_mult = 1; |
413 | 197k | } |
414 | 315k | else |
415 | 315k | { |
416 | 315k | col_mult = 2; |
417 | 315k | pu1_pred_tmp = pu1_pred + e_chroma_plane; |
418 | 315k | pu1_dst_tmp = pu1_dst + e_chroma_plane; |
419 | 315k | } |
420 | | |
421 | 512k | shift = IT_SHIFT_STAGE_1; |
422 | 512k | add = 1 << (shift - 1); |
423 | 512k | dc_value = CLIP_S16((i2_deq_value * 64 + add) >> shift); |
424 | 512k | shift = IT_SHIFT_STAGE_2; |
425 | 512k | add = 1 << (shift - 1); |
426 | 512k | dc_value = CLIP_S16((dc_value * 64 + add) >> shift); |
427 | 512k | ihevce_itrans_recon_dc_compute( |
428 | 512k | pu1_dst_tmp, pu1_pred_tmp, dst_strd, pred_strd, trans_size, col_mult, dc_value); |
429 | 512k | } |
430 | | |
431 | | /*! |
432 | | ****************************************************************************** |
433 | | * \if Function name : ihevce_find_num_clusters_of_identical_points_1D \endif |
434 | | * |
435 | | * \brief |
436 | | * |
437 | | * |
438 | | ***************************************************************************** |
439 | | */ |
440 | | WORD32 ihevce_find_num_clusters_of_identical_points_1D( |
441 | | UWORD8 *pu1_inp_array, |
442 | | UWORD8 *pu1_out_array, |
443 | | UWORD8 *pu1_freq_of_out_data_in_inp, |
444 | | WORD32 i4_num_inp_array_elements) |
445 | 1.52M | { |
446 | 1.52M | WORD32 i; |
447 | 1.52M | UWORD8 u1_value = pu1_inp_array[0]; |
448 | 1.52M | WORD32 i4_num_clusters = i4_num_inp_array_elements; |
449 | 1.52M | WORD32 i4_output_array_idx = 1; |
450 | | |
451 | 1.52M | pu1_freq_of_out_data_in_inp[0] = 1; |
452 | 1.52M | pu1_out_array[0] = u1_value; |
453 | | |
454 | 1.52M | if(1 == i4_num_inp_array_elements) |
455 | 281k | { |
456 | 281k | return 1; |
457 | 281k | } |
458 | | |
459 | 4.02M | for(i = 1; i < i4_num_inp_array_elements; i++) |
460 | 2.77M | { |
461 | 2.77M | if(pu1_inp_array[i] == u1_value) |
462 | 1.10M | { |
463 | 1.10M | pu1_freq_of_out_data_in_inp[0]++; |
464 | 1.10M | i4_num_clusters--; |
465 | 1.10M | } |
466 | 1.67M | else |
467 | 1.67M | { |
468 | 1.67M | pu1_out_array[i4_output_array_idx] = pu1_inp_array[i]; |
469 | | |
470 | 1.67M | i4_output_array_idx++; |
471 | 1.67M | } |
472 | 2.77M | } |
473 | | |
474 | 1.24M | if(i4_num_clusters > 1) |
475 | 868k | { |
476 | 868k | WORD32 i4_num_sub_clusters; |
477 | | |
478 | 868k | i4_num_sub_clusters = ihevce_find_num_clusters_of_identical_points_1D( |
479 | 868k | &pu1_out_array[1], |
480 | 868k | &pu1_out_array[1], |
481 | 868k | &pu1_freq_of_out_data_in_inp[1], |
482 | 868k | i4_num_clusters - 1); |
483 | | |
484 | 868k | i4_num_clusters = 1 + i4_num_sub_clusters; |
485 | 868k | } |
486 | | |
487 | 1.24M | return i4_num_clusters; |
488 | 1.52M | } |
489 | | |
490 | | /** |
491 | | ******************************************************************************* |
492 | | * |
493 | | * @brief Compare Motion vectors function |
494 | | * |
495 | | * @par Description: |
496 | | * Checks if MVs and Reference idx are excatly matching. |
497 | | * |
498 | | * @param[inout] ps_1 |
499 | | * motion vector 1 to be compared |
500 | | * |
501 | | * @param[in] ps_2 |
502 | | * motion vector 2 to be compared |
503 | | * |
504 | | * @returns |
505 | | * 0 : if not matching 1 : if matching |
506 | | * |
507 | | * @remarks |
508 | | * |
509 | | ******************************************************************************* |
510 | | */ |
511 | | WORD32 ihevce_compare_pu_mv_t( |
512 | | pu_mv_t *ps_pu_mv_1, pu_mv_t *ps_pu_mv_2, WORD32 i4_pred_mode_1, WORD32 i4_pred_mode_2) |
513 | 18.0M | { |
514 | 18.0M | WORD32 i4_l0_match, i4_l1_match; |
515 | 18.0M | WORD32 i4_pred_l0, i4_pred_l1; |
516 | | |
517 | 18.0M | i4_pred_l0 = (i4_pred_mode_1 != PRED_L1); |
518 | 18.0M | i4_pred_l1 = (i4_pred_mode_1 != PRED_L0); |
519 | | |
520 | 18.0M | if(i4_pred_mode_1 != i4_pred_mode_2) |
521 | 3.24M | return 0; |
522 | | |
523 | 14.8M | i4_l0_match = 0; |
524 | 14.8M | i4_l1_match = 0; |
525 | | |
526 | 14.8M | if(i4_pred_l0) |
527 | 14.4M | { |
528 | 14.4M | if(ps_pu_mv_1->i1_l0_ref_idx == ps_pu_mv_2->i1_l0_ref_idx) |
529 | 11.2M | { |
530 | 11.2M | if(0 == memcmp(&ps_pu_mv_1->s_l0_mv, &ps_pu_mv_2->s_l0_mv, sizeof(mv_t))) |
531 | 7.81M | i4_l0_match = 1; |
532 | 11.2M | } |
533 | 14.4M | } |
534 | 14.8M | if(i4_pred_l1) |
535 | 1.18M | { |
536 | 1.18M | if(ps_pu_mv_1->i1_l1_ref_idx == ps_pu_mv_2->i1_l1_ref_idx) |
537 | 1.15M | { |
538 | 1.15M | if(0 == memcmp(&ps_pu_mv_1->s_l1_mv, &ps_pu_mv_2->s_l1_mv, sizeof(mv_t))) |
539 | 1.00M | i4_l1_match = 1; |
540 | 1.15M | } |
541 | 1.18M | } |
542 | | |
543 | 14.8M | if(i4_pred_l0 && i4_pred_l1) |
544 | 838k | return (i4_l0_match & i4_l1_match); |
545 | 13.9M | else if(i4_pred_l0) |
546 | 13.6M | return i4_l0_match; |
547 | 345k | else |
548 | 345k | return i4_l1_match; |
549 | | |
550 | 14.8M | } /* End of ihevce_compare_pu_mv_t */ |
551 | | |
552 | | /*! |
553 | | ****************************************************************************** |
554 | | * \if Function name : ihevce_set_pred_buf_as_free \endif |
555 | | * |
556 | | * \brief |
557 | | * Mark buffer as free |
558 | | * |
559 | | ***************************************************************************** |
560 | | */ |
561 | | void ihevce_set_pred_buf_as_free(UWORD32 *pu4_idx_array, UWORD8 u1_buf_id) |
562 | 62.8M | { |
563 | 62.8M | (*pu4_idx_array) &= ~(1 << u1_buf_id); |
564 | 62.8M | } |
565 | | |
566 | | /*! |
567 | | ****************************************************************************** |
568 | | * \if Function name : ihevce_get_free_pred_buf_indices \endif |
569 | | * |
570 | | * \brief |
571 | | * get free buffer indices |
572 | | * |
573 | | ***************************************************************************** |
574 | | */ |
575 | | UWORD8 ihevce_get_free_pred_buf_indices( |
576 | | UWORD8 *pu1_idx_array, UWORD32 *pu4_bitfield, UWORD8 u1_num_bufs_requested) |
577 | 34.1M | { |
578 | 34.1M | UWORD8 i; |
579 | | |
580 | 34.1M | UWORD8 u1_num_free_bufs_found = 0; |
581 | 34.1M | UWORD32 u4_local_bitfield = *pu4_bitfield; |
582 | | |
583 | 34.1M | ASSERT(u1_num_bufs_requested <= (32 - ihevce_num_ones_generic(u4_local_bitfield))); |
584 | | |
585 | 115M | for(i = 0; u1_num_free_bufs_found < u1_num_bufs_requested; i++) |
586 | 81.4M | { |
587 | 81.4M | if(!(u4_local_bitfield & (1 << i))) |
588 | 43.7M | { |
589 | 43.7M | pu1_idx_array[u1_num_free_bufs_found++] = i; |
590 | 43.7M | u4_local_bitfield |= (1 << i); |
591 | 43.7M | } |
592 | 81.4M | } |
593 | | |
594 | 34.1M | (*pu4_bitfield) = u4_local_bitfield; |
595 | | |
596 | 34.1M | return u1_num_free_bufs_found; |
597 | 34.1M | } |
598 | | |
599 | | /*! |
600 | | ****************************************************************************** |
601 | | * \if Function name : ihevce_scale_mv \endif |
602 | | * |
603 | | * \brief |
604 | | * Scale mv basing on displacement of POC |
605 | | * |
606 | | ***************************************************************************** |
607 | | */ |
608 | | void ihevce_scale_mv(mv_t *ps_mv, WORD32 i4_poc_to, WORD32 i4_poc_from, WORD32 i4_curr_poc) |
609 | 573k | { |
610 | 573k | WORD32 td, tb, tx; |
611 | 573k | WORD32 dist_scale_factor; |
612 | 573k | WORD32 mvx, mvy; |
613 | | |
614 | 573k | td = CLIP_S8(i4_curr_poc - i4_poc_from); |
615 | 573k | tb = CLIP_S8(i4_curr_poc - i4_poc_to); |
616 | | |
617 | 573k | tx = (16384 + (abs(td) >> 1)) / td; |
618 | | |
619 | 573k | dist_scale_factor = (tb * tx + 32) >> 6; |
620 | 573k | dist_scale_factor = CLIP3(dist_scale_factor, -4096, 4095); |
621 | | |
622 | 573k | mvx = ps_mv->i2_mvx; |
623 | 573k | mvy = ps_mv->i2_mvy; |
624 | | |
625 | 573k | mvx = SIGN(dist_scale_factor * mvx) * ((abs(dist_scale_factor * mvx) + 127) >> 8); |
626 | 573k | mvy = SIGN(dist_scale_factor * mvy) * ((abs(dist_scale_factor * mvy) + 127) >> 8); |
627 | | |
628 | 573k | ps_mv->i2_mvx = CLIP_S16(mvx); |
629 | 573k | ps_mv->i2_mvy = CLIP_S16(mvy); |
630 | 573k | } |
631 | | |
632 | | /*! |
633 | | ****************************************************************************** |
634 | | * \if Function name : ihevce_osal_alloc \endif |
635 | | * |
636 | | * \brief |
637 | | * Memory allocate call back function passed to OSAL |
638 | | * |
639 | | * \param[in] pv_handle : handle to hle ctxt |
640 | | * \param[in] u4_size : size of memory required |
641 | | * |
642 | | * \return |
643 | | * Memory pointer |
644 | | * |
645 | | * \author |
646 | | * Ittiam |
647 | | * |
648 | | ***************************************************************************** |
649 | | */ |
650 | | void *ihevce_osal_alloc(void *pv_handle, UWORD32 u4_size) |
651 | 312k | { |
652 | 312k | ihevce_hle_ctxt_t *ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_handle; |
653 | 312k | iv_mem_rec_t s_mem_tab; |
654 | | |
655 | | /* def init of memtab */ |
656 | 312k | s_mem_tab.i4_size = sizeof(iv_mem_rec_t); |
657 | 312k | s_mem_tab.i4_mem_alignment = 8; |
658 | 312k | s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM; |
659 | | |
660 | | /* allocate memory for required size */ |
661 | 312k | s_mem_tab.i4_mem_size = u4_size; |
662 | | |
663 | 312k | ps_hle_ctxt->ihevce_mem_alloc( |
664 | 312k | ps_hle_ctxt->pv_mem_mgr_hdl, &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api, &s_mem_tab); |
665 | | |
666 | 312k | return (s_mem_tab.pv_base); |
667 | 312k | } |
668 | | |
669 | | /*! |
670 | | ****************************************************************************** |
671 | | * \if Function name : ihevce_osal_free \endif |
672 | | * |
673 | | * \brief |
674 | | * Memory free call back function passed to OSAL |
675 | | * |
676 | | * \param[in] pv_handle : handle to hle ctxt |
677 | | * \param[in] pv_mem : memory to be freed |
678 | | * |
679 | | * \return |
680 | | * none |
681 | | * |
682 | | * \author |
683 | | * Ittiam |
684 | | * |
685 | | ***************************************************************************** |
686 | | */ |
687 | | void ihevce_osal_free(void *pv_handle, void *pv_mem) |
688 | 312k | { |
689 | 312k | ihevce_hle_ctxt_t *ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_handle; |
690 | 312k | iv_mem_rec_t s_mem_tab; |
691 | | |
692 | | /* def init of memtab */ |
693 | 312k | s_mem_tab.i4_size = sizeof(iv_mem_rec_t); |
694 | 312k | s_mem_tab.i4_mem_alignment = 8; |
695 | 312k | s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM; |
696 | | |
697 | | /* free memory */ |
698 | 312k | s_mem_tab.pv_base = pv_mem; |
699 | | |
700 | 312k | ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_mem_tab); |
701 | | |
702 | 312k | return; |
703 | 312k | } |
704 | | |
705 | | /*! |
706 | | ****************************************************************************** |
707 | | * \if Function name : ihevce_osal_init \endif |
708 | | * |
709 | | * \brief |
710 | | * Function to initialise OSAL handle |
711 | | * |
712 | | * \return |
713 | | * None |
714 | | * |
715 | | * \author |
716 | | * Ittiam |
717 | | * |
718 | | ***************************************************************************** |
719 | | */ |
720 | | WORD32 ihevce_osal_init(void *pv_hle_ctxt) |
721 | 8.96k | { |
722 | | /* local variables */ |
723 | 8.96k | ihevce_hle_ctxt_t *ps_hle_ctxt; |
724 | 8.96k | osal_cb_funcs_t s_cb_funcs; |
725 | 8.96k | WORD32 status = 0; |
726 | 8.96k | void *pv_osal_handle; |
727 | 8.96k | iv_mem_rec_t s_mem_tab; |
728 | | |
729 | 8.96k | ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_hle_ctxt; |
730 | | |
731 | | /* def init of memtab */ |
732 | 8.96k | s_mem_tab.i4_size = sizeof(iv_mem_rec_t); |
733 | 8.96k | s_mem_tab.i4_mem_alignment = 8; |
734 | 8.96k | s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM; |
735 | | |
736 | | /* --------------------------------------------------------------------- */ |
737 | | /* OSAL Hanndle create */ |
738 | | /* --------------------------------------------------------------------- */ |
739 | | |
740 | | /* Allocate memory for the handle */ |
741 | 8.96k | s_mem_tab.i4_mem_size = OSAL_HANDLE_SIZE; |
742 | | |
743 | 8.96k | ps_hle_ctxt->ihevce_mem_alloc( |
744 | 8.96k | ps_hle_ctxt->pv_mem_mgr_hdl, &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api, &s_mem_tab); |
745 | 8.96k | if(NULL == s_mem_tab.pv_base) |
746 | 0 | { |
747 | 0 | ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf( |
748 | 0 | ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle, |
749 | 0 | "IHEVCE ERROR: Error in OSAL initialization\n"); |
750 | 0 | return (-1); |
751 | 0 | } |
752 | | |
753 | 8.96k | pv_osal_handle = s_mem_tab.pv_base; |
754 | | |
755 | | /* Initialize OSAL call back functions */ |
756 | 8.96k | s_cb_funcs.mmr_handle = (void *)ps_hle_ctxt; |
757 | 8.96k | s_cb_funcs.osal_alloc = &ihevce_osal_alloc; |
758 | 8.96k | s_cb_funcs.osal_free = &ihevce_osal_free; |
759 | | |
760 | 8.96k | status = osal_init(pv_osal_handle); |
761 | 8.96k | if(OSAL_SUCCESS != status) |
762 | 0 | { |
763 | 0 | ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf( |
764 | 0 | ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle, |
765 | 0 | "IHEVCE ERROR: Error in OSAL initialization\n"); |
766 | 0 | return (-1); |
767 | 0 | } |
768 | | |
769 | 8.96k | status = osal_register_callbacks(pv_osal_handle, &s_cb_funcs); |
770 | 8.96k | if(OSAL_SUCCESS != status) |
771 | 0 | { |
772 | 0 | ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf( |
773 | 0 | ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle, |
774 | 0 | "IHEVCE ERROR: Error in OSAL initialization\n"); |
775 | 0 | return (-1); |
776 | 0 | } |
777 | 8.96k | ps_hle_ctxt->pv_osal_handle = pv_osal_handle; |
778 | | |
779 | 8.96k | return (0); |
780 | 8.96k | } |
781 | | |
782 | | /*! |
783 | | ****************************************************************************** |
784 | | * \if Function name : ihevce_osal_delete \endif |
785 | | * |
786 | | * \brief |
787 | | * Function to delete OSAL handle |
788 | | * |
789 | | * \return |
790 | | * None |
791 | | * |
792 | | * \author |
793 | | * Ittiam |
794 | | * |
795 | | ***************************************************************************** |
796 | | */ |
797 | | WORD32 ihevce_osal_delete(void *pv_hle_ctxt) |
798 | 8.96k | { |
799 | | /* local variables */ |
800 | 8.96k | ihevce_hle_ctxt_t *ps_hle_ctxt; |
801 | 8.96k | void *pv_osal_handle; |
802 | 8.96k | iv_mem_rec_t s_mem_tab; |
803 | | |
804 | 8.96k | ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_hle_ctxt; |
805 | 8.96k | pv_osal_handle = ps_hle_ctxt->pv_osal_handle; |
806 | | |
807 | | /* def init of memtab */ |
808 | 8.96k | s_mem_tab.i4_size = sizeof(iv_mem_rec_t); |
809 | 8.96k | s_mem_tab.i4_mem_alignment = 8; |
810 | 8.96k | s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM; |
811 | | |
812 | 8.96k | if(0 != osal_close(pv_osal_handle)) |
813 | 0 | { |
814 | 0 | ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf( |
815 | 0 | ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle, |
816 | 0 | "IHEVCE ERROR>> Unable to close OSAL\n"); |
817 | 0 | return (-1); |
818 | 0 | } |
819 | | |
820 | | /* free osal handle */ |
821 | 8.96k | s_mem_tab.pv_base = pv_osal_handle; |
822 | | |
823 | 8.96k | ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_mem_tab); |
824 | | |
825 | 8.96k | return (0); |
826 | 8.96k | } |
827 | | |
828 | | /** |
829 | | ******************************************************************************* |
830 | | * |
831 | | * @brief |
832 | | * Compute SSD between two blocks (8 bit input) |
833 | | * |
834 | | * @par Description: |
835 | | * |
836 | | * @param[in] pu1_inp |
837 | | * UWORD8 pointer to the src block |
838 | | * |
839 | | * @param[in] pu1_ref |
840 | | * UWORD8 pointer to the ref block |
841 | | * |
842 | | * @param[in] inp_stride |
843 | | * UWORD32 Source stride |
844 | | * |
845 | | * @param[in] ref_stride |
846 | | * UWORD32 ref stride |
847 | | * |
848 | | * @param[in] wd |
849 | | * UWORD32 width of the block |
850 | | * |
851 | | * @param[in] ht |
852 | | * UWORD32 height of the block |
853 | | * |
854 | | * @returns SSD |
855 | | * |
856 | | * @remarks none |
857 | | * |
858 | | ******************************************************************************* |
859 | | */ |
860 | | LWORD64 ihevce_ssd_calculator( |
861 | | UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, |
862 | | UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane) |
863 | 17.9M | { |
864 | 17.9M | UWORD32 i, j; |
865 | 17.9M | LWORD64 ssd = 0; |
866 | 17.9M | UNUSED(chroma_plane); |
867 | 273M | for(i = 0; i < ht; i++) |
868 | 255M | { |
869 | 8.18G | for(j = 0; j < wd; j++) |
870 | 7.93G | { |
871 | 7.93G | ssd += (pu1_inp[j] - pu1_ref[j]) * (pu1_inp[j] - pu1_ref[j]); |
872 | 7.93G | } |
873 | | |
874 | 255M | pu1_inp += inp_stride; |
875 | 255M | pu1_ref += ref_stride; |
876 | 255M | } |
877 | | |
878 | 17.9M | return ssd; |
879 | 17.9M | } |
880 | | |
881 | | /** |
882 | | ******************************************************************************* |
883 | | * |
884 | | * @brief |
885 | | * Compute SSD between two blocks (8 bit input, chroma interleaved input) |
886 | | * |
887 | | * @par Description: |
888 | | * |
889 | | * @param[in] pu1_inp |
890 | | * UWORD8 pointer to the src block |
891 | | * |
892 | | * @param[in] pu1_ref |
893 | | * UWORD8 pointer to the ref block |
894 | | * |
895 | | * @param[in] inp_stride |
896 | | * UWORD32 Source stride |
897 | | * |
898 | | * @param[in] ref_stride |
899 | | * UWORD32 ref stride |
900 | | * |
901 | | * @param[in] wd |
902 | | * UWORD32 width of the block |
903 | | * |
904 | | * @param[in] ht |
905 | | * UWORD32 height of the block |
906 | | * |
907 | | * @returns SSD |
908 | | * |
909 | | * @remarks none |
910 | | * |
911 | | ******************************************************************************* |
912 | | */ |
913 | | LWORD64 ihevce_chroma_interleave_ssd_calculator( |
914 | | UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, |
915 | | UWORD32 ht, CHROMA_PLANE_ID_T chroma_plane) |
916 | 22.6M | { |
917 | 22.6M | UWORD32 i, j; |
918 | 22.6M | LWORD64 ssd = 0; |
919 | 22.6M | pu1_inp += chroma_plane; |
920 | 22.6M | pu1_ref += chroma_plane; |
921 | | |
922 | | /* run a loop and find the ssd by doing diff followed by square */ |
923 | 185M | for(i = 0; i < ht; i++) |
924 | 162M | { |
925 | 1.68G | for(j = 0; j < wd; j++) |
926 | 1.52G | { |
927 | 1.52G | WORD32 val; |
928 | | |
929 | | /* note that chroma is interleaved */ |
930 | 1.52G | val = pu1_inp[j * 2] - pu1_ref[j * 2]; |
931 | 1.52G | ssd += val * val; |
932 | 1.52G | } |
933 | | /* row level update */ |
934 | 162M | pu1_inp += inp_stride; |
935 | 162M | pu1_ref += ref_stride; |
936 | 162M | } |
937 | | |
938 | 22.6M | return (ssd); |
939 | 22.6M | } |
940 | | |
941 | | /** |
942 | | ******************************************************************************* |
943 | | * |
944 | | * @brief |
945 | | * Compute SSD & SAD between two blocks (8 bit input) |
946 | | * |
947 | | * @par Description: |
948 | | * |
949 | | * @param[in] pu1_recon |
950 | | * UWORD8 pointer to the block 1 |
951 | | * |
952 | | * @param[in] recon_strd |
953 | | * UWORD32 stride of block 1 |
954 | | * |
955 | | * @param[in] pu1_src |
956 | | * UWORD8 pointer to the block 2 |
957 | | * |
958 | | * @param[in] src_strd |
959 | | * UWORD32 stride of block 2 |
960 | | * |
961 | | * @param[in] trans_size |
962 | | * UWORD32 block wd/ht |
963 | | * |
964 | | * @param[out] *pu4_blk_sad |
965 | | * UWORD32 block SAD |
966 | | * |
967 | | * @returns SSD |
968 | | * |
969 | | * @remarks none |
970 | | * |
971 | | ******************************************************************************* |
972 | | */ |
973 | | LWORD64 ihevce_ssd_and_sad_calculator( |
974 | | UWORD8 *pu1_recon, |
975 | | WORD32 recon_strd, |
976 | | UWORD8 *pu1_src, |
977 | | WORD32 src_strd, |
978 | | WORD32 trans_size, |
979 | | UWORD32 *pu4_blk_sad) |
980 | 2.08M | { |
981 | 2.08M | WORD32 i, j, sad = 0; |
982 | 2.08M | LWORD64 ssd = 0; |
983 | | |
984 | | /* run a loop and find the ssd by doing diff followed by square */ |
985 | 40.0M | for(i = 0; i < trans_size; i++) |
986 | 37.9M | { |
987 | 928M | for(j = 0; j < trans_size; j++) |
988 | 890M | { |
989 | 890M | WORD32 val; |
990 | | |
991 | 890M | val = *pu1_src++ - *pu1_recon++; |
992 | 890M | ssd += val * val; |
993 | 890M | sad += abs(val); |
994 | 890M | } |
995 | | /* row level update */ |
996 | 37.9M | pu1_src += src_strd - trans_size; |
997 | 37.9M | pu1_recon += recon_strd - trans_size; |
998 | 37.9M | } |
999 | 2.08M | *pu4_blk_sad = sad; |
1000 | | |
1001 | | /* The return value is of type WORD32 */ |
1002 | 2.08M | ssd = CLIP3(ssd, 0, 0x7fffffff); |
1003 | | |
1004 | 2.08M | return (ssd); |
1005 | 2.08M | } |
1006 | | |
1007 | | /*! |
1008 | | ****************************************************************************** |
1009 | | * \if Function name : ihevce_chroma_interleave_2d_copy \endif |
1010 | | * |
1011 | | * \brief |
1012 | | * This function copies one plane (u/v) of interleaved chroma buffer from |
1013 | | * source to destination |
1014 | | ****************************************************************************** |
1015 | | */ |
1016 | | void ihevce_chroma_interleave_2d_copy( |
1017 | | UWORD8 *pu1_uv_src_bp, |
1018 | | WORD32 src_strd, |
1019 | | UWORD8 *pu1_uv_dst_bp, |
1020 | | WORD32 dst_strd, |
1021 | | WORD32 w, |
1022 | | WORD32 h, |
1023 | | CHROMA_PLANE_ID_T e_chroma_plane) |
1024 | 29.3M | { |
1025 | 29.3M | WORD32 i, j; |
1026 | | |
1027 | 29.3M | UWORD8 *pu1_src = (U_PLANE == e_chroma_plane) ? pu1_uv_src_bp : pu1_uv_src_bp + 1; |
1028 | 29.3M | UWORD8 *pu1_dst = (U_PLANE == e_chroma_plane) ? pu1_uv_dst_bp : pu1_uv_dst_bp + 1; |
1029 | | |
1030 | 253M | for(i = 0; i < h; i++) |
1031 | 224M | { |
1032 | 2.46G | for(j = 0; j < w; j++) |
1033 | 2.23G | { |
1034 | | /* note that chroma is interleaved */ |
1035 | 2.23G | pu1_dst[j * 2] = pu1_src[j * 2]; |
1036 | 2.23G | } |
1037 | | |
1038 | | /* row level update */ |
1039 | 224M | pu1_src += src_strd; |
1040 | 224M | pu1_dst += dst_strd; |
1041 | 224M | } |
1042 | 29.3M | } |
1043 | | |
1044 | | /** |
1045 | | ******************************************************************************* |
1046 | | * |
1047 | | * @brief |
1048 | | * Gets edge offset params |
1049 | | * |
1050 | | * @par Description: |
1051 | | * Given the ctb and sao angle this function will calculate accumulated |
1052 | | * error between source and recon and the corresponding count for 4 edge |
1053 | | * indexes one each for peak,valley, half peak and half valley. |
1054 | | * |
1055 | | * @param[in] |
1056 | | * ps_sao_ctxt: Pointer to SAO context |
1057 | | * eo_sao_class: specifies edge offset class |
1058 | | * pi4_acc_error_category: pointer to an array to store accumulated error between source and recon |
1059 | | * pi4_category_count : pointer to an array to store number of peaks,valleys,half peaks and half valleys. |
1060 | | * @returns |
1061 | | * |
1062 | | * @remarks |
1063 | | * None |
1064 | | * |
1065 | | *******************************************************************************/ |
1066 | | void ihevce_get_chroma_eo_sao_params( |
1067 | | void *pv_sao_ctxt, |
1068 | | WORD32 eo_sao_class, |
1069 | | WORD32 *pi4_acc_error_category, |
1070 | | WORD32 *pi4_category_count) |
1071 | 158k | { |
1072 | 158k | WORD32 row_start, row_end, col_start, col_end, row, col; |
1073 | 158k | WORD32 row_offset = 0, col_offset = 0; |
1074 | 158k | WORD32 a, b, c, pel_error, edgeidx; |
1075 | 158k | sao_ctxt_t *ps_sao_ctxt = (sao_ctxt_t *)pv_sao_ctxt; |
1076 | | |
1077 | 158k | row_start = 0; |
1078 | 158k | row_end = ps_sao_ctxt->i4_sao_blk_ht >> 1; |
1079 | 158k | col_start = 0; |
1080 | 158k | col_end = ps_sao_ctxt->i4_sao_blk_wd; |
1081 | | |
1082 | 158k | if((ps_sao_ctxt->i4_ctb_x == 0) && (eo_sao_class != SAO_EDGE_90_DEG)) |
1083 | 71.1k | { |
1084 | 71.1k | col_start = 2; |
1085 | 71.1k | } |
1086 | | |
1087 | 158k | if(((ps_sao_ctxt->i4_ctb_x + 1) == ps_sao_ctxt->ps_sps->i2_pic_wd_in_ctb) && |
1088 | 158k | (eo_sao_class != SAO_EDGE_90_DEG)) |
1089 | 71.4k | { |
1090 | 71.4k | col_end = ps_sao_ctxt->i4_sao_blk_wd - 2; |
1091 | 71.4k | } |
1092 | | |
1093 | 158k | if((ps_sao_ctxt->i4_ctb_y == 0) && (eo_sao_class != SAO_EDGE_0_DEG)) |
1094 | 42.7k | { |
1095 | 42.7k | row_start = 1; |
1096 | 42.7k | } |
1097 | | |
1098 | 158k | if(((ps_sao_ctxt->i4_ctb_y + 1) == ps_sao_ctxt->ps_sps->i2_pic_ht_in_ctb) && |
1099 | 158k | (eo_sao_class != SAO_EDGE_0_DEG)) |
1100 | 41.2k | { |
1101 | 41.2k | row_end = row_end - 1; //ps_sao_ctxt->i4_sao_blk_ht - 1; |
1102 | 41.2k | } |
1103 | | |
1104 | 158k | if(eo_sao_class == SAO_EDGE_0_DEG) |
1105 | 109k | { |
1106 | 109k | row_offset = 0; |
1107 | 109k | col_offset = 2; |
1108 | 109k | } |
1109 | 48.8k | else if(eo_sao_class == SAO_EDGE_90_DEG) |
1110 | 23.5k | { |
1111 | 23.5k | row_offset = 1; |
1112 | 23.5k | col_offset = 0; |
1113 | 23.5k | } |
1114 | 25.2k | else if(eo_sao_class == SAO_EDGE_135_DEG) |
1115 | 13.0k | { |
1116 | 13.0k | row_offset = 1; |
1117 | 13.0k | col_offset = 2; |
1118 | 13.0k | } |
1119 | 12.2k | else if(eo_sao_class == SAO_EDGE_45_DEG) |
1120 | 12.2k | { |
1121 | 12.2k | row_offset = 1; |
1122 | 12.2k | col_offset = -2; |
1123 | 12.2k | } |
1124 | | |
1125 | 5.00M | for(row = row_start; row < row_end; row++) |
1126 | 4.84M | { |
1127 | 297M | for(col = col_start; col < col_end; col++) |
1128 | 292M | { |
1129 | 292M | c = ps_sao_ctxt |
1130 | 292M | ->pu1_cur_chroma_recon_buf[col + row * ps_sao_ctxt->i4_cur_chroma_recon_stride]; |
1131 | 292M | a = ps_sao_ctxt->pu1_cur_chroma_recon_buf |
1132 | 292M | [(col - col_offset) + |
1133 | 292M | (row - row_offset) * ps_sao_ctxt->i4_cur_chroma_recon_stride]; |
1134 | 292M | b = ps_sao_ctxt->pu1_cur_chroma_recon_buf |
1135 | 292M | [(col + col_offset) + |
1136 | 292M | (row + row_offset) * ps_sao_ctxt->i4_cur_chroma_recon_stride]; |
1137 | 292M | pel_error = |
1138 | 292M | ps_sao_ctxt |
1139 | 292M | ->pu1_cur_chroma_src_buf[col + row * ps_sao_ctxt->i4_cur_chroma_src_stride] - |
1140 | 292M | ps_sao_ctxt |
1141 | 292M | ->pu1_cur_chroma_recon_buf[col + row * ps_sao_ctxt->i4_cur_chroma_recon_stride]; |
1142 | 292M | edgeidx = 2 + SIGN(c - a) + SIGN(c - b); |
1143 | | |
1144 | 292M | if(pel_error != 0) |
1145 | 41.2M | { |
1146 | 41.2M | pi4_acc_error_category[edgeidx] += pel_error; |
1147 | 41.2M | pi4_category_count[edgeidx]++; |
1148 | 41.2M | } |
1149 | 292M | } |
1150 | 4.84M | } |
1151 | 158k | } |
1152 | | |
1153 | | /** |
1154 | | ******************************************************************************* |
1155 | | * |
1156 | | * @brief |
1157 | | * Gets edge offset params |
1158 | | * |
1159 | | * @par Description: |
1160 | | * Given the ctb and sao angle this function will calculate accumulated |
1161 | | * error between source and recon and the coresponding count for 4 edge |
1162 | | * indexes one each for peak,valley, half peak and half valley. |
1163 | | * |
1164 | | * @param[in] |
1165 | | * ps_sao_ctxt: Pointer to SAO context |
1166 | | * eo_sao_class: specifies edge offset class |
1167 | | * pi4_acc_error_category: pointer to an array to store accumulated error between source and recon |
1168 | | * pi4_category_count : pointer to an array to store number of peaks,valleys,half peaks and half valleys. |
1169 | | * @returns |
1170 | | * |
1171 | | * @remarks |
1172 | | * None |
1173 | | * |
1174 | | *******************************************************************************/ |
1175 | | void ihevce_get_luma_eo_sao_params( |
1176 | | void *pv_sao_ctxt, |
1177 | | WORD32 eo_sao_class, |
1178 | | WORD32 *pi4_acc_error_category, |
1179 | | WORD32 *pi4_category_count) |
1180 | 634k | { |
1181 | 634k | WORD32 row_start, row_end, col_start, col_end, row, col; |
1182 | 634k | WORD32 row_offset = 0, col_offset = 0; |
1183 | 634k | WORD32 a, b, c, pel_error, edgeidx; |
1184 | 634k | sao_ctxt_t *ps_sao_ctxt = (sao_ctxt_t *)pv_sao_ctxt; |
1185 | | |
1186 | 634k | row_start = 0; |
1187 | 634k | row_end = ps_sao_ctxt->i4_sao_blk_ht; |
1188 | 634k | col_start = 0; |
1189 | 634k | col_end = ps_sao_ctxt->i4_sao_blk_wd; |
1190 | | |
1191 | 634k | if((ps_sao_ctxt->i4_ctb_x == 0) && (eo_sao_class != SAO_EDGE_90_DEG)) |
1192 | 268k | { |
1193 | 268k | col_start = 1; |
1194 | 268k | } |
1195 | | |
1196 | 634k | if(((ps_sao_ctxt->i4_ctb_x + 1) == ps_sao_ctxt->ps_sps->i2_pic_wd_in_ctb) && |
1197 | 634k | (eo_sao_class != SAO_EDGE_90_DEG)) |
1198 | 268k | { |
1199 | 268k | col_end = ps_sao_ctxt->i4_sao_blk_wd - 1; |
1200 | 268k | } |
1201 | | |
1202 | 634k | if((ps_sao_ctxt->i4_ctb_y == 0) && (eo_sao_class != SAO_EDGE_0_DEG)) |
1203 | 302k | { |
1204 | 302k | row_start = 1; |
1205 | 302k | } |
1206 | | |
1207 | 634k | if(((ps_sao_ctxt->i4_ctb_y + 1) == ps_sao_ctxt->ps_sps->i2_pic_ht_in_ctb) && |
1208 | 634k | (eo_sao_class != SAO_EDGE_0_DEG)) |
1209 | 302k | { |
1210 | 302k | row_end = ps_sao_ctxt->i4_sao_blk_ht - 1; |
1211 | 302k | } |
1212 | | |
1213 | 634k | if(eo_sao_class == SAO_EDGE_0_DEG) |
1214 | 158k | { |
1215 | 158k | row_offset = 0; |
1216 | 158k | col_offset = 1; |
1217 | 158k | } |
1218 | 476k | else if(eo_sao_class == SAO_EDGE_90_DEG) |
1219 | 158k | { |
1220 | 158k | row_offset = 1; |
1221 | 158k | col_offset = 0; |
1222 | 158k | } |
1223 | 317k | else if(eo_sao_class == SAO_EDGE_135_DEG) |
1224 | 158k | { |
1225 | 158k | row_offset = 1; |
1226 | 158k | col_offset = 1; |
1227 | 158k | } |
1228 | 158k | else if(eo_sao_class == SAO_EDGE_45_DEG) |
1229 | 158k | { |
1230 | 158k | row_offset = 1; |
1231 | 158k | col_offset = -1; |
1232 | 158k | } |
1233 | | |
1234 | 39.4M | for(row = row_start; row < row_end; row++) |
1235 | 38.8M | { |
1236 | 2.42G | for(col = col_start; col < col_end; col++) |
1237 | 2.38G | { |
1238 | 2.38G | c = ps_sao_ctxt |
1239 | 2.38G | ->pu1_cur_luma_recon_buf[col + row * ps_sao_ctxt->i4_cur_luma_recon_stride]; |
1240 | 2.38G | a = ps_sao_ctxt->pu1_cur_luma_recon_buf |
1241 | 2.38G | [(col - col_offset) + |
1242 | 2.38G | (row - row_offset) * ps_sao_ctxt->i4_cur_luma_recon_stride]; |
1243 | 2.38G | b = ps_sao_ctxt->pu1_cur_luma_recon_buf |
1244 | 2.38G | [(col + col_offset) + |
1245 | 2.38G | (row + row_offset) * ps_sao_ctxt->i4_cur_luma_recon_stride]; |
1246 | 2.38G | pel_error = |
1247 | 2.38G | ps_sao_ctxt->pu1_cur_luma_src_buf[col + row * ps_sao_ctxt->i4_cur_luma_src_stride] - |
1248 | 2.38G | ps_sao_ctxt |
1249 | 2.38G | ->pu1_cur_luma_recon_buf[col + row * ps_sao_ctxt->i4_cur_luma_recon_stride]; |
1250 | 2.38G | edgeidx = 2 + SIGN(c - a) + SIGN(c - b); |
1251 | | |
1252 | 2.38G | if(pel_error != 0) |
1253 | 276M | { |
1254 | 276M | pi4_acc_error_category[edgeidx] += pel_error; |
1255 | 276M | pi4_category_count[edgeidx]++; |
1256 | 276M | } |
1257 | 2.38G | } |
1258 | 38.8M | } |
1259 | 634k | } |
1260 | | |
1261 | | /*! |
1262 | | ****************************************************************************** |
1263 | | * \if Function name : ihevce_compute_area_of_valid_cus_in_ctb \endif |
1264 | | * |
1265 | | * \brief |
1266 | | * |
1267 | | * |
1268 | | ***************************************************************************** |
1269 | | */ |
1270 | | WORD32 ihevce_compute_area_of_valid_cus_in_ctb(cur_ctb_cu_tree_t *ps_cu_tree) |
1271 | 3.37M | { |
1272 | 3.37M | WORD32 i4_area; |
1273 | | |
1274 | 3.37M | if(NULL == ps_cu_tree) |
1275 | 1.47M | { |
1276 | 1.47M | return 0; |
1277 | 1.47M | } |
1278 | | |
1279 | 1.89M | if(ps_cu_tree->is_node_valid) |
1280 | 1.09M | { |
1281 | 1.09M | i4_area = ps_cu_tree->u1_cu_size * ps_cu_tree->u1_cu_size; |
1282 | 1.09M | } |
1283 | 807k | else |
1284 | 807k | { |
1285 | 807k | i4_area = ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_tl) + |
1286 | 807k | ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_tr) + |
1287 | 807k | ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_bl) + |
1288 | 807k | ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_br); |
1289 | 807k | } |
1290 | | |
1291 | 1.89M | return i4_area; |
1292 | 3.37M | } |
1293 | | |
1294 | | /*! |
1295 | | ****************************************************************************** |
1296 | | * \if Function name : ihevce_create_cuNode_children \endif |
1297 | | * |
1298 | | * \brief |
1299 | | * |
1300 | | * |
1301 | | ***************************************************************************** |
1302 | | */ |
1303 | | static WORD32 ihevce_create_cuNode_children( |
1304 | | cur_ctb_cu_tree_t *ps_cu_tree_root, |
1305 | | cur_ctb_cu_tree_t *ps_cu_tree_cur_node, |
1306 | | WORD32 nodes_already_created) |
1307 | 3.08M | { |
1308 | 3.08M | cur_ctb_cu_tree_t *ps_tl; |
1309 | 3.08M | cur_ctb_cu_tree_t *ps_tr; |
1310 | 3.08M | cur_ctb_cu_tree_t *ps_bl; |
1311 | 3.08M | cur_ctb_cu_tree_t *ps_br; |
1312 | | |
1313 | 3.08M | ps_tl = ps_cu_tree_root + nodes_already_created; |
1314 | 3.08M | ps_tr = ps_tl + 1; |
1315 | 3.08M | ps_bl = ps_tr + 1; |
1316 | 3.08M | ps_br = ps_bl + 1; |
1317 | | /* |
1318 | | ps_tl = (ai4_child_node_enable[0]) ? ps_tl : NULL; |
1319 | | ps_tr = (ai4_child_node_enable[1]) ? ps_tr : NULL; |
1320 | | ps_bl = (ai4_child_node_enable[2]) ? ps_bl : NULL; |
1321 | | ps_br = (ai4_child_node_enable[3]) ? ps_br : NULL; |
1322 | | */ |
1323 | 3.08M | ps_cu_tree_cur_node->ps_child_node_tl = ps_tl; |
1324 | 3.08M | ps_cu_tree_cur_node->ps_child_node_tr = ps_tr; |
1325 | 3.08M | ps_cu_tree_cur_node->ps_child_node_bl = ps_bl; |
1326 | 3.08M | ps_cu_tree_cur_node->ps_child_node_br = ps_br; |
1327 | | |
1328 | 3.08M | return 4; |
1329 | 3.08M | } |
1330 | | |
1331 | | /*! |
1332 | | ****************************************************************************** |
1333 | | * \if Function name : ihevce_cu_tree_init \endif |
1334 | | * |
1335 | | * \brief |
1336 | | * |
1337 | | * |
1338 | | ***************************************************************************** |
1339 | | */ |
1340 | | void ihevce_cu_tree_init( |
1341 | | cur_ctb_cu_tree_t *ps_cu_tree, |
1342 | | cur_ctb_cu_tree_t *ps_cu_tree_root, |
1343 | | WORD32 *pi4_nodes_created_in_cu_tree, |
1344 | | WORD32 tree_depth, |
1345 | | CU_POS_T e_grandparent_blk_pos, |
1346 | | CU_POS_T e_parent_blk_pos, |
1347 | | CU_POS_T e_cur_blk_pos) |
1348 | 12.4M | { |
1349 | 12.4M | WORD32 cu_pos_x = 0; |
1350 | 12.4M | WORD32 cu_pos_y = 0; |
1351 | 12.4M | WORD32 cu_size = 0; |
1352 | | |
1353 | 12.4M | WORD32 children_nodes_required = 1; |
1354 | 12.4M | WORD32 node_validity = 0; |
1355 | | |
1356 | 12.4M | switch(tree_depth) |
1357 | 12.4M | { |
1358 | 146k | case 0: |
1359 | 146k | { |
1360 | | /* 64x64 block */ |
1361 | 146k | cu_size = 64; |
1362 | 146k | cu_pos_x = 0; |
1363 | 146k | cu_pos_y = 0; |
1364 | | |
1365 | 146k | break; |
1366 | 0 | } |
1367 | 587k | case 1: |
1368 | 587k | { |
1369 | | /* 32x32 block */ |
1370 | 587k | cu_size = 32; |
1371 | | |
1372 | | /* Explanation for logic below - */ |
1373 | | /* * pos_x and pos_y are in units of 8x8 CU's */ |
1374 | | /* * pos_x = 0 for TL and BL children */ |
1375 | | /* * pos_x = 4 for TR and BR children */ |
1376 | | /* * pos_y = 0 for TL and TR children */ |
1377 | | /* * pos_y = 4 for BL and BR children */ |
1378 | 587k | cu_pos_x = (e_cur_blk_pos & 1) << 2; |
1379 | 587k | cu_pos_y = (e_cur_blk_pos & 2) << 1; |
1380 | | |
1381 | 587k | break; |
1382 | 0 | } |
1383 | 2.34M | case 2: |
1384 | 2.34M | { |
1385 | | /* 16x16 block */ |
1386 | 2.34M | WORD32 cu_pos_x_parent; |
1387 | 2.34M | WORD32 cu_pos_y_parent; |
1388 | | |
1389 | 2.34M | cu_size = 16; |
1390 | | |
1391 | | /* Explanation for logic below - */ |
1392 | | /* See similar explanation above */ |
1393 | 2.34M | cu_pos_x_parent = (e_parent_blk_pos & 1) << 2; |
1394 | 2.34M | cu_pos_y_parent = (e_parent_blk_pos & 2) << 1; |
1395 | 2.34M | cu_pos_x = cu_pos_x_parent + ((e_cur_blk_pos & 1) << 1); |
1396 | 2.34M | cu_pos_y = cu_pos_y_parent + (e_cur_blk_pos & 2); |
1397 | | |
1398 | 2.34M | break; |
1399 | 0 | } |
1400 | 9.39M | case 3: |
1401 | 9.39M | { |
1402 | | /* 8x8 block */ |
1403 | 9.39M | WORD32 cu_pos_x_grandparent; |
1404 | 9.39M | WORD32 cu_pos_y_grandparent; |
1405 | | |
1406 | 9.39M | WORD32 cu_pos_x_parent; |
1407 | 9.39M | WORD32 cu_pos_y_parent; |
1408 | | |
1409 | 9.39M | cu_size = 8; |
1410 | | |
1411 | 9.39M | cu_pos_x_grandparent = (e_grandparent_blk_pos & 1) << 2; |
1412 | 9.39M | cu_pos_y_grandparent = (e_grandparent_blk_pos & 2) << 1; |
1413 | 9.39M | cu_pos_x_parent = cu_pos_x_grandparent + ((e_parent_blk_pos & 1) << 1); |
1414 | 9.39M | cu_pos_y_parent = cu_pos_y_grandparent + (e_parent_blk_pos & 2); |
1415 | 9.39M | cu_pos_x = cu_pos_x_parent + (e_cur_blk_pos & 1); |
1416 | 9.39M | cu_pos_y = cu_pos_y_parent + ((e_cur_blk_pos & 2) >> 1); |
1417 | | |
1418 | 9.39M | children_nodes_required = 0; |
1419 | | |
1420 | 9.39M | break; |
1421 | 0 | } |
1422 | 12.4M | } |
1423 | | |
1424 | | /* Fill the current cu_tree node */ |
1425 | 12.4M | CU_TREE_NODE_FILL(ps_cu_tree, node_validity, cu_pos_x, cu_pos_y, cu_size, 1); |
1426 | | |
1427 | 12.4M | if(children_nodes_required) |
1428 | 3.08M | { |
1429 | 3.08M | tree_depth++; |
1430 | | |
1431 | 3.08M | (*pi4_nodes_created_in_cu_tree) += ihevce_create_cuNode_children( |
1432 | 3.08M | ps_cu_tree_root, ps_cu_tree, (*pi4_nodes_created_in_cu_tree)); |
1433 | | |
1434 | 3.08M | ihevce_cu_tree_init( |
1435 | 3.08M | ps_cu_tree->ps_child_node_tl, |
1436 | 3.08M | ps_cu_tree_root, |
1437 | 3.08M | pi4_nodes_created_in_cu_tree, |
1438 | 3.08M | tree_depth, |
1439 | 3.08M | e_parent_blk_pos, |
1440 | 3.08M | e_cur_blk_pos, |
1441 | 3.08M | POS_TL); |
1442 | | |
1443 | 3.08M | ihevce_cu_tree_init( |
1444 | 3.08M | ps_cu_tree->ps_child_node_tr, |
1445 | 3.08M | ps_cu_tree_root, |
1446 | 3.08M | pi4_nodes_created_in_cu_tree, |
1447 | 3.08M | tree_depth, |
1448 | 3.08M | e_parent_blk_pos, |
1449 | 3.08M | e_cur_blk_pos, |
1450 | 3.08M | POS_TR); |
1451 | | |
1452 | 3.08M | ihevce_cu_tree_init( |
1453 | 3.08M | ps_cu_tree->ps_child_node_bl, |
1454 | 3.08M | ps_cu_tree_root, |
1455 | 3.08M | pi4_nodes_created_in_cu_tree, |
1456 | 3.08M | tree_depth, |
1457 | 3.08M | e_parent_blk_pos, |
1458 | 3.08M | e_cur_blk_pos, |
1459 | 3.08M | POS_BL); |
1460 | | |
1461 | 3.08M | ihevce_cu_tree_init( |
1462 | 3.08M | ps_cu_tree->ps_child_node_br, |
1463 | 3.08M | ps_cu_tree_root, |
1464 | 3.08M | pi4_nodes_created_in_cu_tree, |
1465 | 3.08M | tree_depth, |
1466 | 3.08M | e_parent_blk_pos, |
1467 | 3.08M | e_cur_blk_pos, |
1468 | 3.08M | POS_BR); |
1469 | 3.08M | } |
1470 | 9.39M | else |
1471 | 9.39M | { |
1472 | 9.39M | NULLIFY_THE_CHILDREN_NODES(ps_cu_tree); |
1473 | 9.39M | } |
1474 | 12.4M | } |