/src/libhevc/encoder/hme_utils.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /*****************************************************************************/ |
22 | | /* File Includes */ |
23 | | /*****************************************************************************/ |
24 | | /* System include files */ |
25 | | #include <stdio.h> |
26 | | #include <string.h> |
27 | | #include <stdlib.h> |
28 | | #include <assert.h> |
29 | | #include <stdarg.h> |
30 | | #include <math.h> |
31 | | #include <limits.h> |
32 | | |
33 | | /* User include files */ |
34 | | #include "ihevc_typedefs.h" |
35 | | #include "itt_video_api.h" |
36 | | #include "ihevce_api.h" |
37 | | |
38 | | #include "rc_cntrl_param.h" |
39 | | #include "rc_frame_info_collector.h" |
40 | | #include "rc_look_ahead_params.h" |
41 | | |
42 | | #include "ihevc_defs.h" |
43 | | #include "ihevc_structs.h" |
44 | | #include "ihevc_platform_macros.h" |
45 | | #include "ihevc_deblk.h" |
46 | | #include "ihevc_itrans_recon.h" |
47 | | #include "ihevc_chroma_itrans_recon.h" |
48 | | #include "ihevc_chroma_intra_pred.h" |
49 | | #include "ihevc_intra_pred.h" |
50 | | #include "ihevc_inter_pred.h" |
51 | | #include "ihevc_mem_fns.h" |
52 | | #include "ihevc_padding.h" |
53 | | #include "ihevc_weighted_pred.h" |
54 | | #include "ihevc_sao.h" |
55 | | #include "ihevc_resi_trans.h" |
56 | | #include "ihevc_quant_iquant_ssd.h" |
57 | | #include "ihevc_cabac_tables.h" |
58 | | |
59 | | #include "ihevce_defs.h" |
60 | | #include "ihevce_lap_enc_structs.h" |
61 | | #include "ihevce_multi_thrd_structs.h" |
62 | | #include "ihevce_multi_thrd_funcs.h" |
63 | | #include "ihevce_me_common_defs.h" |
64 | | #include "ihevce_had_satd.h" |
65 | | #include "ihevce_error_codes.h" |
66 | | #include "ihevce_bitstream.h" |
67 | | #include "ihevce_cabac.h" |
68 | | #include "ihevce_rdoq_macros.h" |
69 | | #include "ihevce_function_selector.h" |
70 | | #include "ihevce_enc_structs.h" |
71 | | #include "ihevce_entropy_structs.h" |
72 | | #include "ihevce_cmn_utils_instr_set_router.h" |
73 | | #include "ihevce_enc_loop_structs.h" |
74 | | #include "ihevce_inter_pred.h" |
75 | | #include "ihevce_global_tables.h" |
76 | | #include "ihevce_dep_mngr_interface.h" |
77 | | #include "hme_datatype.h" |
78 | | #include "hme_interface.h" |
79 | | #include "hme_common_defs.h" |
80 | | #include "hme_defs.h" |
81 | | #include "ihevce_me_instr_set_router.h" |
82 | | #include "hme_globals.h" |
83 | | #include "hme_utils.h" |
84 | | #include "hme_coarse.h" |
85 | | #include "hme_fullpel.h" |
86 | | #include "hme_subpel.h" |
87 | | #include "hme_refine.h" |
88 | | #include "hme_err_compute.h" |
89 | | #include "hme_common_utils.h" |
90 | | #include "hme_search_algo.h" |
91 | | #include "ihevce_stasino_helpers.h" |
92 | | #include "ihevce_common_utils.h" |
93 | | |
94 | | /*****************************************************************************/ |
95 | | /* Macros */ |
96 | | /*****************************************************************************/ |
97 | | #define UNI_SATD_SCALE 1 |
98 | | |
99 | | /*****************************************************************************/ |
100 | | /* Function Definitions */ |
101 | | /*****************************************************************************/ |
102 | | void ihevce_open_loop_pred_data( |
103 | | me_frm_ctxt_t *ps_ctxt, |
104 | | inter_pu_results_t *ps_pu_results, |
105 | | U08 *pu1_src, |
106 | | U08 *pu1_temp_pred, |
107 | | S32 stride, |
108 | | S32 src_strd, |
109 | | UWORD8 e_part_id) |
110 | 0 | { |
111 | 0 | S32 best_sad_l0 = -1, best_sad_l1 = -1; |
112 | 0 | S32 sad_diff, status; |
113 | 0 | inter_pred_me_ctxt_t *ps_inter_pred_me_ctxt; |
114 | 0 | U08 enable_bi = 0; |
115 | 0 | pu_t s_pu; |
116 | |
|
117 | 0 | ps_inter_pred_me_ctxt = &ps_ctxt->s_mc_ctxt; |
118 | 0 | ps_ctxt->i4_count++; |
119 | | /* L0*/ |
120 | 0 | if(ps_pu_results->u1_num_results_per_part_l0[e_part_id]) |
121 | 0 | { |
122 | 0 | pu_result_t *ps_best_l0_pu; |
123 | 0 | ps_best_l0_pu = ps_pu_results->aps_pu_results[0][PRT_2Nx2N]; |
124 | 0 | best_sad_l0 = ps_best_l0_pu->i4_tot_cost - ps_best_l0_pu->i4_mv_cost; |
125 | 0 | s_pu.b2_pred_mode = PRED_L0; |
126 | 0 | s_pu.b4_ht = ps_best_l0_pu->pu.b4_ht; |
127 | 0 | s_pu.b4_wd = ps_best_l0_pu->pu.b4_wd; |
128 | 0 | s_pu.b4_pos_x = ps_best_l0_pu->pu.b4_pos_x; |
129 | 0 | s_pu.b4_pos_y = ps_best_l0_pu->pu.b4_pos_y; |
130 | 0 | s_pu.b1_intra_flag = 0; |
131 | 0 | s_pu.mv.s_l0_mv.i2_mvx = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvx; |
132 | 0 | s_pu.mv.s_l0_mv.i2_mvy = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvy; |
133 | 0 | s_pu.mv.i1_l0_ref_idx = ps_best_l0_pu->pu.mv.i1_l0_ref_idx; |
134 | 0 | } |
135 | | /*L1*/ |
136 | 0 | if(ps_pu_results->u1_num_results_per_part_l1[e_part_id]) |
137 | 0 | { |
138 | 0 | pu_result_t *ps_best_l1_pu; |
139 | 0 | ps_best_l1_pu = ps_pu_results->aps_pu_results[1][PRT_2Nx2N]; |
140 | 0 | best_sad_l1 = ps_best_l1_pu->i4_tot_cost - ps_best_l1_pu->i4_mv_cost; |
141 | 0 | s_pu.b2_pred_mode = PRED_L1; |
142 | 0 | s_pu.b4_ht = ps_best_l1_pu->pu.b4_ht; |
143 | 0 | s_pu.b4_wd = ps_best_l1_pu->pu.b4_wd; |
144 | 0 | s_pu.b4_pos_x = ps_best_l1_pu->pu.b4_pos_x; |
145 | 0 | s_pu.b4_pos_y = ps_best_l1_pu->pu.b4_pos_y; |
146 | 0 | s_pu.b1_intra_flag = 0; |
147 | 0 | s_pu.mv.s_l1_mv.i2_mvx = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvx; |
148 | 0 | s_pu.mv.s_l1_mv.i2_mvy = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvy; |
149 | 0 | s_pu.mv.i1_l1_ref_idx = ps_best_l1_pu->pu.mv.i1_l1_ref_idx; |
150 | 0 | } |
151 | 0 | ASSERT((best_sad_l0 != -1) || (best_sad_l1 != -1)); |
152 | | /*bi selection*/ |
153 | 0 | if((best_sad_l0 != -1) && (best_sad_l1 != -1)) |
154 | 0 | { |
155 | 0 | sad_diff = abs(best_sad_l0 - best_sad_l1); |
156 | 0 | if((sad_diff < (best_sad_l0 * 0.15)) && (sad_diff < (best_sad_l1 * 0.15))) |
157 | 0 | { |
158 | 0 | enable_bi = 1; |
159 | 0 | s_pu.b2_pred_mode = PRED_BI; |
160 | 0 | } |
161 | 0 | if(!enable_bi) |
162 | 0 | { |
163 | 0 | if(best_sad_l0 < best_sad_l1) |
164 | 0 | { |
165 | 0 | s_pu.b2_pred_mode = PRED_L0; |
166 | 0 | } |
167 | 0 | else |
168 | 0 | { |
169 | 0 | s_pu.b2_pred_mode = PRED_L1; |
170 | 0 | } |
171 | 0 | } |
172 | 0 | } |
173 | 0 | status = ihevce_luma_inter_pred_pu(ps_inter_pred_me_ctxt, &s_pu, pu1_temp_pred, stride, 1); |
174 | 0 | if(status == -1) |
175 | 0 | { |
176 | 0 | ASSERT(0); |
177 | 0 | } |
178 | 0 | } |
179 | | |
180 | | /** |
181 | | ******************************************************************************** |
182 | | * @fn void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size) |
183 | | * |
184 | | * @brief Allocates a block of size = i4_size from working memory and returns |
185 | | * |
186 | | * @param[in,out] ps_buf_mgr: Buffer manager for wkg memory |
187 | | * |
188 | | * @param[in] i4_size : size required |
189 | | * |
190 | | * @return void pointer to allocated memory, NULL if failure |
191 | | ******************************************************************************** |
192 | | */ |
193 | | void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size) |
194 | 4.81M | { |
195 | 4.81M | U08 *pu1_mem; |
196 | | |
197 | 4.81M | if(ps_buf_mgr->i4_used + i4_size > ps_buf_mgr->i4_total) |
198 | 0 | return NULL; |
199 | | |
200 | 4.81M | pu1_mem = ps_buf_mgr->pu1_wkg_mem + ps_buf_mgr->i4_used; |
201 | 4.81M | ps_buf_mgr->i4_used += i4_size; |
202 | | |
203 | 4.81M | return ((void *)pu1_mem); |
204 | 4.81M | } |
205 | | |
206 | | /** |
207 | | ******************************************************************************** |
208 | | * @fn hme_init_histogram( |
209 | | * |
210 | | * @brief Top level entry point for Coarse ME. Runs across blocks and does the |
211 | | * needful by calling other low level routines. |
212 | | * |
213 | | * @param[in,out] ps_hist : the histogram structure |
214 | | * |
215 | | * @param[in] i4_max_mv_x : Maximum mv allowed in x direction (fpel units) |
216 | | * |
217 | | * @param[in] i4_max_mv_y : Maximum mv allowed in y direction (fpel units) |
218 | | * |
219 | | * @return None |
220 | | ******************************************************************************** |
221 | | */ |
222 | | |
223 | | void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y) |
224 | 150k | { |
225 | 150k | S32 i4_num_bins, i4_num_cols, i4_num_rows; |
226 | 150k | S32 i4_shift_x, i4_shift_y, i, i4_range, i4_val; |
227 | | |
228 | | /*************************************************************************/ |
229 | | /* Evaluate the shift_x and shift_y. For this, we use the following logic*/ |
230 | | /* Assuming that we use up all MAX_NUM_BINS. Then the number of bins is */ |
231 | | /* given by formula ((max_mv_x * 2) >> shift_x)*((max_mv_y * 2)>>shift_y)*/ |
232 | | /* or shift_x + shift_y is log ((max_mv_x * max_mv_y * 4) / MAX_NUM_BINS)*/ |
233 | | /* if above quantity is negative, then we make it zero. */ |
234 | | /* If result is odd, then shift_y is result >> 1, shift_x is shift_y + 1 */ |
235 | | /*************************************************************************/ |
236 | 150k | i4_val = i4_max_mv_x * i4_max_mv_y * 4; |
237 | 150k | i4_range = (hme_get_range(i4_val - 1)) + 1; |
238 | 150k | if(i4_range > LOG_MAX_NUM_BINS) |
239 | 125k | { |
240 | 125k | i4_shift_y = (i4_range - LOG_MAX_NUM_BINS); |
241 | 125k | i4_shift_x = (i4_shift_y + 1) >> 1; |
242 | 125k | i4_shift_y >>= 1; |
243 | 125k | } |
244 | 25.6k | else |
245 | 25.6k | { |
246 | 25.6k | i4_shift_y = 0; |
247 | 25.6k | i4_shift_x = 0; |
248 | 25.6k | } |
249 | | |
250 | | /* we assume the mv range is -max_mv_x to +max_mv_x, ditto for y */ |
251 | | /* So number of columns is 2*max_mv_x >> i4_shift_x. Ditto for rows */ |
252 | | /* this helps us compute num bins that are active for this histo session */ |
253 | 150k | i4_num_cols = (i4_max_mv_x << 1) >> i4_shift_x; |
254 | 150k | i4_num_rows = (i4_max_mv_y << 1) >> i4_shift_y; |
255 | 150k | i4_num_bins = i4_num_rows * i4_num_cols; |
256 | | |
257 | 150k | ASSERT(i4_num_bins <= MAX_NUM_BINS); |
258 | | |
259 | 150k | ps_hist->i4_num_rows = i4_num_rows; |
260 | 150k | ps_hist->i4_num_cols = i4_num_cols; |
261 | 150k | ps_hist->i4_min_x = -i4_max_mv_x; |
262 | 150k | ps_hist->i4_min_y = -i4_max_mv_y; |
263 | 150k | ps_hist->i4_shift_x = i4_shift_x; |
264 | 150k | ps_hist->i4_shift_y = i4_shift_y; |
265 | 150k | ps_hist->i4_lobe1_size = 5; |
266 | 150k | ps_hist->i4_lobe2_size = 3; |
267 | | |
268 | 150k | ps_hist->i4_num_bins = i4_num_bins; |
269 | | |
270 | 136M | for(i = 0; i < i4_num_bins; i++) |
271 | 136M | { |
272 | 136M | ps_hist->ai4_bin_count[i] = 0; |
273 | 136M | } |
274 | 150k | } |
275 | | |
276 | | /** |
277 | | ******************************************************************************** |
278 | | * @fn hme_update_histogram( |
279 | | * |
280 | | * @brief Updates the histogram given an mv entry |
281 | | * |
282 | | * @param[in,out] ps_hist : the histogram structure |
283 | | * |
284 | | * @param[in] i4_mv_x : x component of the mv (fpel units) |
285 | | * |
286 | | * @param[in] i4_mv_y : y component of the mv (fpel units) |
287 | | * |
288 | | * @return None |
289 | | ******************************************************************************** |
290 | | */ |
291 | | void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y) |
292 | 5.33M | { |
293 | 5.33M | S32 i4_bin_index, i4_col, i4_row; |
294 | | |
295 | 5.33M | i4_col = (i4_mv_x - ps_hist->i4_min_x) >> ps_hist->i4_shift_x; |
296 | 5.33M | i4_row = (i4_mv_y - ps_hist->i4_min_y) >> ps_hist->i4_shift_y; |
297 | | |
298 | 5.33M | i4_bin_index = i4_col + (i4_row * ps_hist->i4_num_cols); |
299 | | /* Sanity Check */ |
300 | 5.33M | ASSERT(i4_bin_index < MAX_NUM_BINS); |
301 | | |
302 | 5.33M | ps_hist->ai4_bin_count[i4_bin_index]++; |
303 | 5.33M | } |
304 | | |
305 | | /** |
306 | | ******************************************************************************** |
307 | | * @fn hme_get_global_mv( |
308 | | * |
309 | | * @brief returns the global mv of a previous picture. Accounts for the fact |
310 | | * that the delta poc of the previous picture may have been different |
311 | | * from delta poc of current picture. Delta poc is POC difference |
312 | | * between a picture and its reference. |
313 | | * |
314 | | * @param[out] ps_mv: mv_t structure where the motion vector is returned |
315 | | * |
316 | | * @param[in] i4_delta_poc: the delta poc for the current pic w.r.t. reference |
317 | | * |
318 | | * @return None |
319 | | ******************************************************************************** |
320 | | */ |
321 | | void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc) |
322 | 150k | { |
323 | 150k | S16 i2_mv_x, i2_mv_y; |
324 | 150k | S32 i4_delta_poc_prev; |
325 | 150k | S32 i4_poc_prev = ps_prev_layer->i4_poc; |
326 | 150k | S32 i4_poc_prev_ref = ps_prev_layer->ai4_ref_id_to_poc_lc[0]; |
327 | | |
328 | 150k | i4_delta_poc_prev = i4_poc_prev - i4_poc_prev_ref; |
329 | 150k | i2_mv_x = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_x; |
330 | 150k | i2_mv_y = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_y; |
331 | | |
332 | 150k | i2_mv_x = (S16)((i2_mv_x * i4_delta_poc) / i4_delta_poc_prev); |
333 | 150k | i2_mv_y = (S16)((i2_mv_y * i4_delta_poc) / i4_delta_poc_prev); |
334 | | |
335 | 150k | ps_mv->i2_mv_x = i2_mv_x; |
336 | 150k | ps_mv->i2_mv_y = i2_mv_y; |
337 | 150k | } |
338 | | |
339 | | /** |
340 | | ******************************************************************************** |
341 | | * @fn hme_calculate_global_mv( |
342 | | * |
343 | | * @brief Calculates global mv for a given histogram |
344 | | * |
345 | | * @param[in] ps_hist : the histogram structure |
346 | | * |
347 | | * @param[in] ps_mv : used to return the global mv |
348 | | * |
349 | | * @param[in] e_lobe_type : refer to GMV_MVTYPE_T |
350 | | * |
351 | | * @return None |
352 | | ******************************************************************************** |
353 | | */ |
354 | | void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type) |
355 | 141k | { |
356 | 141k | S32 i4_offset, i4_lobe_size, i4_y, i4_x, *pi4_bin_count; |
357 | 141k | S32 i4_max_sum = -1; |
358 | 141k | S32 i4_max_x = 0, i4_max_y = 0; |
359 | | |
360 | 141k | if(e_lobe_type == GMV_THICK_LOBE) |
361 | 141k | i4_lobe_size = ps_hist->i4_lobe1_size; |
362 | 0 | else |
363 | 0 | i4_lobe_size = ps_hist->i4_lobe2_size; |
364 | | |
365 | 141k | i4_offset = i4_lobe_size >> 1; |
366 | 2.99M | for(i4_y = i4_offset; i4_y < ps_hist->i4_num_rows - i4_offset; i4_y++) |
367 | 2.85M | { |
368 | 91.1M | for(i4_x = i4_offset; i4_x < ps_hist->i4_num_cols - i4_offset; i4_x++) |
369 | 88.2M | { |
370 | 88.2M | S32 i4_bin_id, i4_sum; |
371 | 88.2M | i4_bin_id = (i4_x - 2) + ((i4_y - 2) * ps_hist->i4_num_cols); |
372 | | |
373 | 88.2M | pi4_bin_count = &ps_hist->ai4_bin_count[i4_bin_id]; |
374 | 88.2M | i4_sum = hme_compute_2d_sum_unsigned( |
375 | 88.2M | (void *)pi4_bin_count, |
376 | 88.2M | i4_lobe_size, |
377 | 88.2M | i4_lobe_size, |
378 | 88.2M | ps_hist->i4_num_cols, |
379 | 88.2M | sizeof(U32)); |
380 | | |
381 | 88.2M | if(i4_sum > i4_max_sum) |
382 | 373k | { |
383 | 373k | i4_max_x = i4_x; |
384 | 373k | i4_max_y = i4_y; |
385 | 373k | i4_max_sum = i4_sum; |
386 | 373k | } |
387 | 88.2M | } |
388 | 2.85M | } |
389 | | |
390 | 141k | ps_mv->i2_mv_y = (S16)((i4_max_y << ps_hist->i4_shift_y) + ps_hist->i4_min_y); |
391 | 141k | ps_mv->i2_mv_x = (S16)((i4_max_x << ps_hist->i4_shift_x) + ps_hist->i4_min_x); |
392 | 141k | } |
393 | | |
394 | | /** |
395 | | ******************************************************************************** |
396 | | * @fn ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr) |
397 | | * |
398 | | * @brief returns a new ctb node usable for creating a new ctb candidate |
399 | | * |
400 | | * @param[in] ps_mem_mgr : memory manager holding all ctb nodes |
401 | | * |
402 | | * @return NULL if no free nodes, else ptr to the new ctb node |
403 | | ******************************************************************************** |
404 | | */ |
405 | | ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr) |
406 | 0 | { |
407 | 0 | U08 *pu1_ret; |
408 | 0 | if((ps_mem_mgr->i4_used + ps_mem_mgr->i4_size) > ps_mem_mgr->i4_tot) |
409 | 0 | return (NULL); |
410 | 0 | pu1_ret = ps_mem_mgr->pu1_mem + ps_mem_mgr->i4_used; |
411 | 0 | ps_mem_mgr->i4_used += ps_mem_mgr->i4_size; |
412 | 0 | return ((ctb_node_t *)pu1_ret); |
413 | 0 | } |
414 | | |
415 | | /** |
416 | | ******************************************************************************** |
417 | | * @fn hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid, |
418 | | search_results_t *ps_search_results, S32 i4_num_ref) |
419 | | * |
420 | | * @brief For a given CU whose results are in ps_search_results, the 17x17 |
421 | | * mv grid is updated for future use within the CTB |
422 | | * |
423 | | * @param[in] ps_search_results : Search results data structure |
424 | | * |
425 | | * @param[out] pps_mv_grid: The mv grid (as many as num ref) |
426 | | * |
427 | | * @param[in] i4_num_ref: nuber of search iterations to update |
428 | | * |
429 | | * @return None |
430 | | ******************************************************************************** |
431 | | */ |
432 | | void hme_map_mvs_to_grid( |
433 | | mv_grid_t **pps_mv_grid, |
434 | | search_results_t *ps_search_results, |
435 | | U08 *pu1_pred_dir_searched, |
436 | | S32 i4_num_pred_dir) |
437 | 1.78M | { |
438 | 1.78M | S32 i4_cu_start_offset; |
439 | | /*************************************************************************/ |
440 | | /* Start x, y offset of CU relative to CTB. To update the mv grid which */ |
441 | | /* stores 1 mv per 4x4, we convert pixel offset to 4x4 blk offset */ |
442 | | /*************************************************************************/ |
443 | 1.78M | S32 i4_cu_offset_x = (S32)ps_search_results->u1_x_off >> 2; |
444 | 1.78M | S32 i4_cu_offset_y = (S32)ps_search_results->u1_y_off >> 2; |
445 | | |
446 | | /* Controls the attribute of a given partition within CU */ |
447 | | /* , i.e. start locn, size */ |
448 | 1.78M | part_attr_t *ps_part_attr; |
449 | | |
450 | 1.78M | S32 i4_part, i4_part_id, num_parts, i4_stride; |
451 | 1.78M | S16 i2_mv_x, i2_mv_y; |
452 | 1.78M | S08 i1_ref_idx; |
453 | | |
454 | | /* Per partition, attributes w.r.t. CU start */ |
455 | 1.78M | S32 x_start, y_start, x_end, y_end, i4_x, i4_y; |
456 | 1.78M | PART_TYPE_T e_part_type; |
457 | | |
458 | | /* Points to exact mv structures within the grid to be udpated */ |
459 | 1.78M | search_node_t *ps_grid_node, *ps_grid_node_tmp; |
460 | | |
461 | | /* points to exact mv grid (based on search iteration) to be updated */ |
462 | 1.78M | mv_grid_t *ps_mv_grid; |
463 | | |
464 | 1.78M | search_node_t *ps_search_node; |
465 | | |
466 | 1.78M | S32 shift, i, mv_shift = 2; |
467 | | /* Proportional to the size of CU, controls the number of 4x4 blks */ |
468 | | /* to be updated */ |
469 | 1.78M | shift = ps_search_results->e_cu_size; |
470 | 1.78M | ASSERT(i4_num_pred_dir <= 2); |
471 | | |
472 | 1.78M | e_part_type = (PART_TYPE_T)ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type; |
473 | | |
474 | 1.78M | if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) && |
475 | 1.78M | (ps_search_results->i4_part_mask & ENABLE_NxN)) |
476 | 84.4k | { |
477 | 84.4k | e_part_type = PRT_NxN; |
478 | 84.4k | } |
479 | | |
480 | 3.91M | for(i = 0; i < i4_num_pred_dir; i++) |
481 | 2.13M | { |
482 | 2.13M | num_parts = gau1_num_parts_in_part_type[e_part_type]; |
483 | 2.13M | ps_mv_grid = pps_mv_grid[pu1_pred_dir_searched[i]]; |
484 | 2.13M | i4_stride = ps_mv_grid->i4_stride; |
485 | | |
486 | 2.13M | i4_cu_start_offset = |
487 | 2.13M | i4_cu_offset_x + i4_cu_offset_y * i4_stride + ps_mv_grid->i4_start_offset; |
488 | | |
489 | | /* Move to the appropriate 2d locn of CU start within Grid */ |
490 | 2.13M | ps_grid_node = &ps_mv_grid->as_node[i4_cu_start_offset]; |
491 | | |
492 | 4.59M | for(i4_part = 0; i4_part < num_parts; i4_part++) |
493 | 2.45M | { |
494 | 2.45M | i4_part_id = ge_part_type_to_part_id[e_part_type][i4_part]; |
495 | | |
496 | | /* Pick the mvx and y and ref id corresponding to this partition */ |
497 | 2.45M | ps_search_node = |
498 | 2.45M | ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id]; |
499 | | |
500 | 2.45M | i2_mv_x = ps_search_node->s_mv.i2_mvx; |
501 | 2.45M | i2_mv_y = ps_search_node->s_mv.i2_mvy; |
502 | 2.45M | i1_ref_idx = ps_search_node->i1_ref_idx; |
503 | | |
504 | | /* Move to the appropriate location within the CU */ |
505 | 2.45M | ps_part_attr = &gas_part_attr_in_cu[i4_part_id]; |
506 | 2.45M | x_start = ps_part_attr->u1_x_start; |
507 | 2.45M | x_end = x_start + ps_part_attr->u1_x_count; |
508 | 2.45M | y_start = ps_part_attr->u1_y_start; |
509 | 2.45M | y_end = y_start + ps_part_attr->u1_y_count; |
510 | | |
511 | | /* Convert attributes from 8x8 CU size to given CU size */ |
512 | 2.45M | x_start = (x_start << shift) >> mv_shift; |
513 | 2.45M | x_end = (x_end << shift) >> mv_shift; |
514 | 2.45M | y_start = (y_start << shift) >> mv_shift; |
515 | 2.45M | y_end = (y_end << shift) >> mv_shift; |
516 | | |
517 | 2.45M | ps_grid_node_tmp = ps_grid_node + y_start * i4_stride; |
518 | | |
519 | | /* Update all 4x4 blk mvs with the part mv */ |
520 | | /* For e.g. we update 4 units in case of NxN for 16x16 CU */ |
521 | 12.4M | for(i4_y = y_start; i4_y < y_end; i4_y++) |
522 | 9.95M | { |
523 | 55.4M | for(i4_x = x_start; i4_x < x_end; i4_x++) |
524 | 45.5M | { |
525 | 45.5M | ps_grid_node_tmp[i4_x].s_mv.i2_mvx = i2_mv_x; |
526 | 45.5M | ps_grid_node_tmp[i4_x].s_mv.i2_mvy = i2_mv_y; |
527 | 45.5M | ps_grid_node_tmp[i4_x].i1_ref_idx = i1_ref_idx; |
528 | 45.5M | ps_grid_node_tmp[i4_x].u1_subpel_done = 1; |
529 | 45.5M | } |
530 | 9.95M | ps_grid_node_tmp += i4_stride; |
531 | 9.95M | } |
532 | 2.45M | } |
533 | 2.13M | } |
534 | 1.78M | } |
535 | | |
536 | | void hme_set_ctb_pred_attr(ctb_node_t *ps_parent, U08 *pu1_pred0, U08 *pu1_pred1, S32 i4_stride) |
537 | 0 | { |
538 | 0 | ps_parent->apu1_pred[0] = pu1_pred0; |
539 | 0 | ps_parent->apu1_pred[1] = pu1_pred1; |
540 | 0 | ps_parent->i4_pred_stride = i4_stride; |
541 | 0 | if(ps_parent->ps_tl != NULL) |
542 | 0 | { |
543 | 0 | S32 blk_wd = (S32)ps_parent->ps_tr->u1_x_off; |
544 | 0 | blk_wd -= (S32)ps_parent->u1_x_off; |
545 | |
|
546 | 0 | hme_set_ctb_pred_attr(ps_parent->ps_tl, pu1_pred0, pu1_pred1, i4_stride >> 1); |
547 | |
|
548 | 0 | hme_set_ctb_pred_attr( |
549 | 0 | ps_parent->ps_tr, pu1_pred0 + blk_wd, pu1_pred1 + blk_wd, i4_stride >> 1); |
550 | |
|
551 | 0 | hme_set_ctb_pred_attr( |
552 | 0 | ps_parent->ps_bl, |
553 | 0 | pu1_pred0 + (blk_wd * i4_stride), |
554 | 0 | pu1_pred1 + (blk_wd * i4_stride), |
555 | 0 | i4_stride >> 1); |
556 | |
|
557 | 0 | hme_set_ctb_pred_attr( |
558 | 0 | ps_parent->ps_tr, |
559 | 0 | pu1_pred0 + (blk_wd * (1 + i4_stride)), |
560 | 0 | pu1_pred1 + (blk_wd * (1 + i4_stride)), |
561 | 0 | i4_stride >> 1); |
562 | 0 | } |
563 | 0 | } |
564 | | |
565 | | /** |
566 | | ******************************************************************************** |
567 | | * @fn hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids) |
568 | | * |
569 | | * @brief Expands the part mask to a list of valid part ids terminated by -1 |
570 | | * |
571 | | * @param[in] i4_part_mask : bit mask of active partitino ids |
572 | | * |
573 | | * @param[out] pi4_valid_part_ids : array, each entry has one valid part id |
574 | | * Terminated by -1 to signal end. |
575 | | * |
576 | | * @return number of partitions |
577 | | ******************************************************************************** |
578 | | */ |
579 | | S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids) |
580 | 14.3M | { |
581 | 14.3M | S32 id = 0, i; |
582 | 258M | for(i = 0; i < TOT_NUM_PARTS; i++) |
583 | 244M | { |
584 | 244M | if(i4_part_mask & (1 << i)) |
585 | 49.3M | { |
586 | 49.3M | pi4_valid_part_ids[id] = i; |
587 | 49.3M | id++; |
588 | 49.3M | } |
589 | 244M | } |
590 | 14.3M | pi4_valid_part_ids[id] = -1; |
591 | | |
592 | 14.3M | return id; |
593 | 14.3M | } |
594 | | |
595 | | ctb_boundary_attrs_t * |
596 | | get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt) |
597 | 104k | { |
598 | 104k | S32 horz_crop, vert_crop; |
599 | 104k | ctb_boundary_attrs_t *ps_attrs; |
600 | | |
601 | 104k | horz_crop = ((ctb_start_x + 64) > pic_wd) ? 2 : 0; |
602 | 104k | vert_crop = ((ctb_start_y + 64) > pic_ht) ? 1 : 0; |
603 | 104k | switch(horz_crop + vert_crop) |
604 | 104k | { |
605 | 94.4k | case 0: |
606 | 94.4k | ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_CENTRE]; |
607 | 94.4k | break; |
608 | 4.60k | case 1: |
609 | 4.60k | ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_PIC_BOUNDARY]; |
610 | 4.60k | break; |
611 | 4.89k | case 2: |
612 | 4.89k | ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_RT_PIC_BOUNDARY]; |
613 | 4.89k | break; |
614 | 437 | case 3: |
615 | 437 | ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_RT_PIC_BOUNDARY]; |
616 | 437 | break; |
617 | 104k | } |
618 | 104k | return (ps_attrs); |
619 | 104k | } |
620 | | |
621 | | /** |
622 | | ******************************************************************************** |
623 | | * @fn hevc_avg_2d(U08 *pu1_src1, |
624 | | * U08 *pu1_src2, |
625 | | * S32 i4_src1_stride, |
626 | | * S32 i4_src2_stride, |
627 | | * S32 i4_blk_wd, |
628 | | * S32 i4_blk_ht, |
629 | | * U08 *pu1_dst, |
630 | | * S32 i4_dst_stride) |
631 | | * |
632 | | * |
633 | | * @brief point wise average of two buffers into a third buffer |
634 | | * |
635 | | * @param[in] pu1_src1 : first source buffer |
636 | | * |
637 | | * @param[in] pu1_src2 : 2nd source buffer |
638 | | * |
639 | | * @param[in] i4_src1_stride : stride of source 1 buffer |
640 | | * |
641 | | * @param[in] i4_src2_stride : stride of source 2 buffer |
642 | | * |
643 | | * @param[in] i4_blk_wd : block width |
644 | | * |
645 | | * @param[in] i4_blk_ht : block height |
646 | | * |
647 | | * @param[out] pu1_dst : destination buffer |
648 | | * |
649 | | * @param[in] i4_dst_stride : stride of the destination buffer |
650 | | * |
651 | | * @return void |
652 | | ******************************************************************************** |
653 | | */ |
654 | | void hevc_avg_2d( |
655 | | U08 *pu1_src1, |
656 | | U08 *pu1_src2, |
657 | | S32 i4_src1_stride, |
658 | | S32 i4_src2_stride, |
659 | | S32 i4_blk_wd, |
660 | | S32 i4_blk_ht, |
661 | | U08 *pu1_dst, |
662 | | S32 i4_dst_stride) |
663 | 12.9M | { |
664 | 12.9M | S32 i, j; |
665 | | |
666 | 222M | for(i = 0; i < i4_blk_ht; i++) |
667 | 209M | { |
668 | 3.67G | for(j = 0; j < i4_blk_wd; j++) |
669 | 3.46G | { |
670 | 3.46G | pu1_dst[j] = (pu1_src1[j] + pu1_src2[j] + 1) >> 1; |
671 | 3.46G | } |
672 | 209M | pu1_src1 += i4_src1_stride; |
673 | 209M | pu1_src2 += i4_src2_stride; |
674 | 209M | pu1_dst += i4_dst_stride; |
675 | 209M | } |
676 | 12.9M | } |
677 | | /** |
678 | | ******************************************************************************** |
679 | | * @fn hme_pick_back_search_node(search_results_t *ps_search_results, |
680 | | * search_node_t *ps_search_node_fwd, |
681 | | * S32 i4_part_idx, |
682 | | * layer_ctxt_t *ps_curr_layer) |
683 | | * |
684 | | * |
685 | | * @brief returns the search node corresponding to a ref idx in same or |
686 | | * opp direction. Preference is given to opp direction, but if that |
687 | | * does not yield results, same direction is attempted. |
688 | | * |
689 | | * @param[in] ps_search_results: search results overall |
690 | | * |
691 | | * @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction |
692 | | * |
693 | | * @param[in] i4_part_idx : partition id |
694 | | * |
695 | | * @param[in] ps_curr_layer : layer context for current layer. |
696 | | * |
697 | | * @return search node corresponding to hte "other direction" |
698 | | ******************************************************************************** |
699 | | */ |
700 | | //#define PICK_L1_REF_SAME_DIR |
701 | | search_node_t *hme_pick_back_search_node( |
702 | | search_results_t *ps_search_results, |
703 | | search_node_t *ps_search_node_fwd, |
704 | | S32 i4_part_idx, |
705 | | layer_ctxt_t *ps_curr_layer) |
706 | 0 | { |
707 | 0 | S32 is_past_l0, is_past_l1, id, i, i4_poc; |
708 | 0 | S32 *pi4_ref_id_to_poc_lc = ps_curr_layer->ai4_ref_id_to_poc_lc; |
709 | | //ref_attr_t *ps_ref_attr_lc; |
710 | 0 | S08 i1_ref_idx_fwd; |
711 | 0 | S16 i2_mv_x, i2_mv_y; |
712 | 0 | search_node_t *ps_search_node; |
713 | |
|
714 | 0 | i1_ref_idx_fwd = ps_search_node_fwd->i1_ref_idx; |
715 | 0 | i2_mv_x = ps_search_node_fwd->s_mv.i2_mvx; |
716 | 0 | i2_mv_y = ps_search_node_fwd->s_mv.i2_mvy; |
717 | 0 | i4_poc = ps_curr_layer->i4_poc; |
718 | | |
719 | | //ps_ref_attr_lc = &ps_curr_layer->as_ref_attr_lc[0]; |
720 | | /* If the ref id already picked up maps to a past pic, then we pick */ |
721 | | /* a result corresponding to future pic. If such a result is not */ |
722 | | /* to be found, then we pick a result corresponding to a past pic */ |
723 | | //is_past = ps_ref_attr_lc[i1_ref_idx_fwd].u1_is_past; |
724 | 0 | is_past_l0 = (i4_poc > pi4_ref_id_to_poc_lc[i1_ref_idx_fwd]) ? 1 : 0; |
725 | |
|
726 | 0 | ASSERT(ps_search_results->u1_num_active_ref <= 2); |
727 | | |
728 | | /* pick the right iteration of search nodes to pick up */ |
729 | | #ifdef PICK_L1_REF_SAME_DIR |
730 | | if(ps_search_results->u1_num_active_ref == 2) |
731 | | id = !is_past_l0; |
732 | | #else |
733 | 0 | if(ps_search_results->u1_num_active_ref == 2) |
734 | 0 | id = is_past_l0; |
735 | 0 | #endif |
736 | 0 | else |
737 | 0 | id = 0; |
738 | |
|
739 | 0 | ps_search_node = ps_search_results->aps_part_results[id][i4_part_idx]; |
740 | |
|
741 | 0 | for(i = 0; i < ps_search_results->u1_num_results_per_part; i++) |
742 | 0 | { |
743 | 0 | S08 i1_ref_test = ps_search_node[i].i1_ref_idx; |
744 | 0 | is_past_l1 = (pi4_ref_id_to_poc_lc[i1_ref_test] < i4_poc) ? 1 : 0; |
745 | | //if (ps_ref_attr_lc[ps_search_node[i].i1_ref_idx].u1_is_past != is_past) |
746 | | #ifdef PICK_L1_REF_SAME_DIR |
747 | | if(is_past_l1 == is_past_l0) |
748 | | #else |
749 | 0 | if(is_past_l1 != is_past_l0) |
750 | 0 | #endif |
751 | 0 | { |
752 | | /* belongs to same direction as the ref idx passed, so continue */ |
753 | 0 | return (ps_search_node + i); |
754 | 0 | } |
755 | 0 | } |
756 | | |
757 | | /* Unable to find best result in opp direction, so try same direction */ |
758 | | /* However we need to ensure that we do not pick up same result */ |
759 | 0 | for(i = 0; i < ps_search_results->u1_num_results_per_part; i++) |
760 | 0 | { |
761 | 0 | if((ps_search_node->i1_ref_idx != i1_ref_idx_fwd) || |
762 | 0 | (ps_search_node->s_mv.i2_mvx != i2_mv_x) || (ps_search_node->s_mv.i2_mvy != i2_mv_y)) |
763 | 0 | { |
764 | 0 | return (ps_search_node); |
765 | 0 | } |
766 | 0 | ps_search_node++; |
767 | 0 | } |
768 | | |
769 | | //ASSERT(0); |
770 | 0 | return (ps_search_results->aps_part_results[id][i4_part_idx]); |
771 | | |
772 | | //return (NULL); |
773 | 0 | } |
774 | | |
775 | | /** |
776 | | ******************************************************************************** |
777 | | * @fn hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride) |
778 | | * |
779 | | * |
780 | | * @brief Examines input 16x16 for possible edges and orientations of those, |
781 | | * and returns a bit mask of partitions that should be searched for |
782 | | * |
783 | | * @param[in] pu1_inp : input buffer |
784 | | * |
785 | | * @param[in] i4_inp_stride: input stride |
786 | | * |
787 | | * @return part mask (bit mask of active partitions to search) |
788 | | ******************************************************************************** |
789 | | */ |
790 | | |
791 | | S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions) |
792 | 1.49M | { |
793 | 1.49M | S32 i4_rsum[16], i4_csum[16]; |
794 | 1.49M | U08 *pu1_tmp, u1_tmp; |
795 | 1.49M | S32 i4_max_ridx, i4_max_cidx, i4_tmp; |
796 | 1.49M | S32 i, j, i4_ret; |
797 | 1.49M | S32 i4_max_rp[4], i4_max_cp[4]; |
798 | 1.49M | S32 i4_seg_lutc[4] = { 0, ENABLE_nLx2N, ENABLE_Nx2N, ENABLE_nRx2N }; |
799 | 1.49M | S32 i4_seg_lutr[4] = { 0, ENABLE_2NxnU, ENABLE_2NxN, ENABLE_2NxnD }; |
800 | 1.89M | #define EDGE_THR (15 * 16) |
801 | 17.0M | #define HI_PASS(ptr, i) (2 * (ptr[i] - ptr[i - 1]) + (ptr[i + 1] - ptr[i - 2])) |
802 | | |
803 | 1.49M | if(0 == limit_active_partitions) |
804 | 544k | { |
805 | | /*********************************************************************/ |
806 | | /* In this case, we do not optimize on active partitions and search */ |
807 | | /* brute force. This way, 17 partitinos would be enabled. */ |
808 | | /*********************************************************************/ |
809 | 544k | return (ENABLE_ALL_PARTS); |
810 | 544k | } |
811 | | |
812 | | /*************************************************************************/ |
813 | | /* Control passes below in case we wish to optimize on active partitions.*/ |
814 | | /* This is based on input characteristics, check how an edge passes along*/ |
815 | | /* an input 16x16 area, if at all, and decide active partitinos. */ |
816 | | /*************************************************************************/ |
817 | | |
818 | | /* Initialize row and col sums */ |
819 | 16.0M | for(i = 0; i < 16; i++) |
820 | 15.1M | { |
821 | 15.1M | i4_rsum[i] = 0; |
822 | 15.1M | i4_csum[i] = 0; |
823 | 15.1M | } |
824 | 947k | pu1_tmp = pu1_inp; |
825 | 16.0M | for(i = 0; i < 16; i++) |
826 | 15.1M | { |
827 | 257M | for(j = 0; j < 16; j++) |
828 | 242M | { |
829 | 242M | u1_tmp = *pu1_tmp++; |
830 | 242M | i4_rsum[i] += u1_tmp; |
831 | 242M | i4_csum[j] += u1_tmp; |
832 | 242M | } |
833 | 15.1M | pu1_tmp += (i4_inp_stride - 16); |
834 | 15.1M | } |
835 | | |
836 | | /* 0 is dummy; 1 is 4; 2 is 8; 3 is 12 */ |
837 | 947k | i4_max_rp[0] = 0; |
838 | 947k | i4_max_cp[0] = 0; |
839 | 947k | i4_max_rp[1] = 0; |
840 | 947k | i4_max_cp[1] = 0; |
841 | 947k | i4_max_rp[2] = 0; |
842 | 947k | i4_max_cp[2] = 0; |
843 | 947k | i4_max_rp[3] = 0; |
844 | 947k | i4_max_cp[3] = 0; |
845 | | |
846 | | /* Get Max edge strength across (2,3) (3,4) (4,5) */ |
847 | 3.78M | for(i = 3; i < 6; i++) |
848 | 2.84M | { |
849 | | /* Run [-1 -2 2 1] filter through rsum/csum */ |
850 | 2.84M | i4_tmp = HI_PASS(i4_rsum, i); |
851 | 2.84M | if(ABS(i4_tmp) > i4_max_rp[1]) |
852 | 226k | i4_max_rp[1] = i4_tmp; |
853 | | |
854 | 2.84M | i4_tmp = HI_PASS(i4_csum, i); |
855 | 2.84M | if(ABS(i4_tmp) > i4_max_cp[1]) |
856 | 245k | i4_max_cp[1] = i4_tmp; |
857 | 2.84M | } |
858 | | |
859 | | /* Get Max edge strength across (6,7) (7,8) (8,9) */ |
860 | 3.78M | for(i = 7; i < 10; i++) |
861 | 2.84M | { |
862 | | /* Run [-1 -2 2 1] filter through rsum/csum */ |
863 | 2.84M | i4_tmp = HI_PASS(i4_rsum, i); |
864 | 2.84M | if(ABS(i4_tmp) > i4_max_rp[2]) |
865 | 127k | i4_max_rp[2] = i4_tmp; |
866 | | |
867 | 2.84M | i4_tmp = HI_PASS(i4_csum, i); |
868 | 2.84M | if(ABS(i4_tmp) > i4_max_cp[2]) |
869 | 157k | i4_max_cp[2] = i4_tmp; |
870 | 2.84M | } |
871 | | |
872 | | /* Get Max edge strength across (10,11) (11,12) (12,13) */ |
873 | 3.78M | for(i = 11; i < 14; i++) |
874 | 2.84M | { |
875 | | /* Run [-1 -2 2 1] filter through rsum/csum */ |
876 | 2.84M | i4_tmp = HI_PASS(i4_rsum, i); |
877 | 2.84M | if(ABS(i4_tmp) > i4_max_rp[3]) |
878 | 113k | i4_max_rp[3] = i4_tmp; |
879 | | |
880 | 2.84M | i4_tmp = HI_PASS(i4_csum, i); |
881 | 2.84M | if(ABS(i4_tmp) > i4_max_cp[3]) |
882 | 106k | i4_max_cp[3] = i4_tmp; |
883 | 2.84M | } |
884 | | |
885 | | /* Find the maximum across the 3 and see whether the strength qualifies as edge */ |
886 | 947k | i4_max_ridx = 1; |
887 | 947k | i4_max_cidx = 1; |
888 | 2.84M | for(i = 2; i <= 3; i++) |
889 | 1.89M | { |
890 | 1.89M | if(i4_max_rp[i] > i4_max_rp[i4_max_ridx]) |
891 | 42.6k | i4_max_ridx = i; |
892 | | |
893 | 1.89M | if(i4_max_cp[i] > i4_max_cp[i4_max_cidx]) |
894 | 54.9k | i4_max_cidx = i; |
895 | 1.89M | } |
896 | | |
897 | 947k | if(EDGE_THR > i4_max_rp[i4_max_ridx]) |
898 | 902k | { |
899 | 902k | i4_max_ridx = 0; |
900 | 902k | } |
901 | | |
902 | 947k | if(EDGE_THR > i4_max_cp[i4_max_cidx]) |
903 | 907k | { |
904 | 907k | i4_max_cidx = 0; |
905 | 907k | } |
906 | | |
907 | 947k | i4_ret = ENABLE_2Nx2N; |
908 | | |
909 | | /* If only vertical discontinuity, go with one of 2Nx? */ |
910 | 947k | if(0 == (i4_max_ridx + i4_max_cidx)) |
911 | 884k | { |
912 | | //num_me_parts++; |
913 | 884k | return i4_ret; |
914 | 884k | } |
915 | | |
916 | 62.0k | if(i4_max_ridx && (i4_max_cidx == 0)) |
917 | 22.6k | { |
918 | | //num_me_parts += 3; |
919 | 22.6k | return ((i4_ret | i4_seg_lutr[i4_max_ridx])); |
920 | 22.6k | } |
921 | | |
922 | | /* If only horizontal discontinuity, go with one of ?x2N */ |
923 | 39.4k | if(i4_max_cidx && (i4_max_ridx == 0)) |
924 | 17.5k | { |
925 | | //num_me_parts += 3; |
926 | 17.5k | return ((i4_ret | i4_seg_lutc[i4_max_cidx])); |
927 | 17.5k | } |
928 | | |
929 | | /* If middle is dominant in both directions, go with NxN */ |
930 | 21.8k | if((2 == i4_max_cidx) && (2 == i4_max_ridx)) |
931 | 2.20k | { |
932 | | //num_me_parts += 5; |
933 | 2.20k | return ((i4_ret | ENABLE_NxN)); |
934 | 2.20k | } |
935 | | |
936 | | /* Otherwise, conservatively, enable NxN and the 2 AMPs */ |
937 | | //num_me_parts += 9; |
938 | 19.6k | return (i4_ret | ENABLE_NxN | i4_seg_lutr[i4_max_ridx] | i4_seg_lutc[i4_max_cidx]); |
939 | 21.8k | } |
940 | | |
941 | | /** |
942 | | ******************************************************************************** |
943 | | * @fn hme_init_search_results(search_results_t *ps_search_results, |
944 | | * S32 i4_num_ref, |
945 | | * S32 i4_num_best_results, |
946 | | * S32 i4_num_results_per_part, |
947 | | * BLK_SIZE_T e_blk_size, |
948 | | * S32 i4_x_off, |
949 | | * S32 i4_y_off) |
950 | | * |
951 | | * @brief Initializes the search results structure with some key attributes |
952 | | * |
953 | | * @param[out] ps_search_results : search results structure to initialise |
954 | | * |
955 | | * @param[in] i4_num_Ref: corresponds to the number of ref ids searched |
956 | | * |
957 | | * @param[in] i4_num_best_results: Number of best results for the CU to |
958 | | * be maintained in the result structure |
959 | | * |
960 | | * @param[in] i4_num_results_per_part: Per active partition the number of best |
961 | | * results to be maintained |
962 | | * |
963 | | * @param[in] e_blk_size: blk size of the CU for which this structure used |
964 | | * |
965 | | * @param[in] i4_x_off: x offset of the top left of CU from CTB top left |
966 | | * |
967 | | * @param[in] i4_y_off: y offset of the top left of CU from CTB top left |
968 | | * |
969 | | * @param[in] pu1_is_past : points ot an array that tells whether a given ref id |
970 | | * has prominence in L0 or in L1 list (past or future ) |
971 | | * |
972 | | * @return void |
973 | | ******************************************************************************** |
974 | | */ |
975 | | void hme_init_search_results( |
976 | | search_results_t *ps_search_results, |
977 | | S32 i4_num_ref, |
978 | | S32 i4_num_best_results, |
979 | | S32 i4_num_results_per_part, |
980 | | BLK_SIZE_T e_blk_size, |
981 | | S32 i4_x_off, |
982 | | S32 i4_y_off, |
983 | | U08 *pu1_is_past) |
984 | 2.05M | { |
985 | 2.05M | CU_SIZE_T e_cu_size = ge_blk_size_to_cu_size[e_blk_size]; |
986 | | |
987 | 2.05M | ASSERT(e_cu_size != -1); |
988 | 2.05M | ps_search_results->e_cu_size = e_cu_size; |
989 | 2.05M | ps_search_results->u1_x_off = (U08)i4_x_off; |
990 | 2.05M | ps_search_results->u1_y_off = (U08)i4_y_off; |
991 | 2.05M | ps_search_results->u1_num_active_ref = (U08)i4_num_ref; |
992 | 2.05M | ps_search_results->u1_num_best_results = (U08)i4_num_best_results; |
993 | 2.05M | ps_search_results->u1_num_results_per_part = (U08)i4_num_results_per_part; |
994 | 2.05M | ps_search_results->pu1_is_past = pu1_is_past; |
995 | 2.05M | ps_search_results->u1_split_flag = 0; |
996 | 2.05M | ps_search_results->best_cu_cost = MAX_32BIT_VAL; |
997 | 2.05M | } |
998 | | |
999 | | /** |
1000 | | ******************************************************************************** |
1001 | | * @fn hme_reset_search_results((search_results_t *ps_search_results, |
1002 | | * S32 i4_part_mask) |
1003 | | * |
1004 | | * |
1005 | | * @brief Resets the best results to maximum values, so as to allow search |
1006 | | * for the new CU's partitions. The existing results may be from an |
1007 | | * older CU using same structure. |
1008 | | * |
1009 | | * @param[in] ps_search_results: search results structure |
1010 | | * |
1011 | | * @param[in] i4_part_mask : bit mask of active partitions |
1012 | | * |
1013 | | * @return part mask (bit mask of active partitions to search) |
1014 | | ******************************************************************************** |
1015 | | */ |
1016 | | void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res) |
1017 | 6.04M | { |
1018 | 6.04M | S32 i4_num_ref = (S32)ps_search_results->u1_num_active_ref; |
1019 | 6.04M | S08 i1_ref_idx; |
1020 | 6.04M | S32 i, j; |
1021 | 6.04M | search_node_t *ps_search_node; |
1022 | | |
1023 | | /* store this for future use */ |
1024 | 6.04M | ps_search_results->i4_part_mask = i4_part_mask; |
1025 | | |
1026 | | /* Reset the spli_flag to zero */ |
1027 | 6.04M | ps_search_results->u1_split_flag = 0; |
1028 | | |
1029 | 6.04M | HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[0]), mv_res); |
1030 | 6.04M | HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[1]), mv_res); |
1031 | | |
1032 | 16.8M | for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++) |
1033 | 10.7M | { |
1034 | | /* Reset the individual partitino results */ |
1035 | 193M | for(i = 0; i < TOT_NUM_PARTS; i++) |
1036 | 183M | { |
1037 | 183M | if(!(i4_part_mask & (1 << i))) |
1038 | 144M | continue; |
1039 | | |
1040 | 39.0M | ps_search_node = ps_search_results->aps_part_results[i1_ref_idx][i]; |
1041 | | |
1042 | 80.9M | for(j = 0; j < ps_search_results->u1_num_results_per_part; j++) |
1043 | 41.8M | { |
1044 | 41.8M | ps_search_node[j].s_mv.i2_mvx = 0; |
1045 | 41.8M | ps_search_node[j].s_mv.i2_mvy = 0; |
1046 | 41.8M | ps_search_node[j].i4_tot_cost = MAX_32BIT_VAL; |
1047 | 41.8M | ps_search_node[j].i4_sad = MAX_32BIT_VAL; |
1048 | 41.8M | ps_search_node[j].i4_sdi = 0; |
1049 | 41.8M | ps_search_node[j].i1_ref_idx = -1; |
1050 | 41.8M | ps_search_node[j].u1_subpel_done = 0; |
1051 | 41.8M | ps_search_node[j].u1_is_avail = 1; |
1052 | 41.8M | ps_search_node[j].i4_mv_cost = 0; |
1053 | 41.8M | } |
1054 | 39.0M | } |
1055 | 10.7M | } |
1056 | 6.04M | } |
1057 | | /** |
1058 | | ******************************************************************************** |
1059 | | * @fn hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, |
1060 | | * S32 i4_step, |
1061 | | * range_prms_t *ps_mvrange) |
1062 | | * |
1063 | | * @brief Given a central pt within mv range, and a grid of points surrounding |
1064 | | * this pt, this function returns a grid mask of pts within search rng |
1065 | | * |
1066 | | * @param[in] ps_search_node: the centre pt of the grid |
1067 | | * |
1068 | | * @param[in] i4_step: step size of grid |
1069 | | * |
1070 | | * @param[in] ps_mvrange: structure containing the current mv range |
1071 | | * |
1072 | | * @return bitmask of the pts in grid within search range |
1073 | | ******************************************************************************** |
1074 | | */ |
1075 | | S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange) |
1076 | 40.5M | { |
1077 | 40.5M | S32 i4_mask = GRID_ALL_PTS_VALID; |
1078 | 40.5M | if(ps_search_node->s_mv.i2_mvx + i4_step >= ps_mvrange->i2_max_x) |
1079 | 8.11k | { |
1080 | 8.11k | i4_mask &= (GRID_RT_3_INVALID); |
1081 | 8.11k | } |
1082 | 40.5M | if(ps_search_node->s_mv.i2_mvx - i4_step < ps_mvrange->i2_min_x) |
1083 | 347k | { |
1084 | 347k | i4_mask &= (GRID_LT_3_INVALID); |
1085 | 347k | } |
1086 | 40.5M | if(ps_search_node->s_mv.i2_mvy + i4_step >= ps_mvrange->i2_max_y) |
1087 | 186k | { |
1088 | 186k | i4_mask &= (GRID_BOT_3_INVALID); |
1089 | 186k | } |
1090 | 40.5M | if(ps_search_node->s_mv.i2_mvy - i4_step < ps_mvrange->i2_min_y) |
1091 | 377k | { |
1092 | 377k | i4_mask &= (GRID_TOP_3_INVALID); |
1093 | 377k | } |
1094 | 40.5M | return i4_mask; |
1095 | 40.5M | } |
1096 | | |
1097 | | /** |
1098 | | ******************************************************************************** |
1099 | | * @fn layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt, |
1100 | | S32 i4_layer_id) |
1101 | | * |
1102 | | * @brief returns the layer ctxt of the layer with given id from the temporally |
1103 | | * previous frame |
1104 | | * |
1105 | | * @param[in] ps_ctxt : ME context |
1106 | | * |
1107 | | * @param[in] i4_layer_id : id of layer required |
1108 | | * |
1109 | | * @return layer ctxt of given layer id in temporally previous frame |
1110 | | ******************************************************************************** |
1111 | | */ |
1112 | | layer_ctxt_t *hme_get_past_layer_ctxt( |
1113 | | me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel) |
1114 | 85.3k | { |
1115 | 85.3k | S32 i4_poc = ps_frm_ctxt->ai4_ref_idx_to_poc_lc[0]; |
1116 | 85.3k | S32 i; |
1117 | 85.3k | layers_descr_t *ps_desc; |
1118 | | |
1119 | 238k | for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++) |
1120 | 238k | { |
1121 | 238k | ps_desc = &ps_ctxt->as_ref_descr[i]; |
1122 | 238k | if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc) |
1123 | 85.3k | return (ps_desc->aps_layers[i4_layer_id]); |
1124 | 238k | } |
1125 | 0 | return NULL; |
1126 | 85.3k | } |
1127 | | |
1128 | | /** |
1129 | | ******************************************************************************** |
1130 | | * @fn layer_ctxt_t *hme_coarse_get_past_layer_ctxt(me_ctxt_t *ps_ctxt, |
1131 | | S32 i4_layer_id) |
1132 | | * |
1133 | | * @brief returns the layer ctxt of the layer with given id from the temporally |
1134 | | * previous frame |
1135 | | * |
1136 | | * @param[in] ps_ctxt : ME context |
1137 | | * |
1138 | | * @param[in] i4_layer_id : id of layer required |
1139 | | * |
1140 | | * @return layer ctxt of given layer id in temporally previous frame |
1141 | | ******************************************************************************** |
1142 | | */ |
1143 | | layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id) |
1144 | 128k | { |
1145 | 128k | S32 i4_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[0]; |
1146 | 128k | S32 i; |
1147 | 128k | layers_descr_t *ps_desc; |
1148 | | |
1149 | 415k | for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++) |
1150 | 400k | { |
1151 | 400k | ps_desc = &ps_ctxt->as_ref_descr[i]; |
1152 | 400k | if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc) |
1153 | 113k | return (ps_desc->aps_layers[i4_layer_id]); |
1154 | 400k | } |
1155 | 14.6k | return NULL; |
1156 | 128k | } |
1157 | | |
1158 | | /** |
1159 | | ******************************************************************************** |
1160 | | * @fn void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt, |
1161 | | BLK_SIZE_T e_blk_size, |
1162 | | S32 i4_num_ref, |
1163 | | S32 i4_num_results_per_part) |
1164 | | * |
1165 | | * @brief Given a blk size to be used for this layer, this function initialize |
1166 | | * the mv bank to make it ready to store and return results. |
1167 | | * |
1168 | | * @param[in, out] ps_layer_ctxt: pointer to layer ctxt |
1169 | | * |
1170 | | * @param[in] e_blk_size : resolution at which mvs are stored |
1171 | | * |
1172 | | * @param[in] i4_num_ref: number of reference frames corresponding to which |
1173 | | * results are stored. |
1174 | | * |
1175 | | * @param[in] e_blk_size : resolution at which mvs are stored |
1176 | | * |
1177 | | * @param[in] i4_num_results_per_part : Number of results to be stored per |
1178 | | * ref idx. So these many best results stored |
1179 | | * |
1180 | | * @return void |
1181 | | ******************************************************************************** |
1182 | | */ |
1183 | | void hme_init_mv_bank( |
1184 | | layer_ctxt_t *ps_layer_ctxt, |
1185 | | BLK_SIZE_T e_blk_size, |
1186 | | S32 i4_num_ref, |
1187 | | S32 i4_num_results_per_part, |
1188 | | U08 u1_enc) |
1189 | 385k | { |
1190 | 385k | layer_mv_t *ps_mv_bank; |
1191 | 385k | hme_mv_t *ps_mv1, *ps_mv2; |
1192 | 385k | S08 *pi1_ref_id1, *pi1_ref_id2; |
1193 | 385k | S32 blk_wd, mvs_in_blk, blks_in_row, mvs_in_row, blks_in_col; |
1194 | 385k | S32 i4_i, i4_j, blk_ht; |
1195 | | |
1196 | 385k | ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank; |
1197 | 385k | ps_mv_bank->i4_num_mvs_per_ref = i4_num_results_per_part; |
1198 | 385k | ps_mv_bank->i4_num_ref = i4_num_ref; |
1199 | 385k | mvs_in_blk = i4_num_ref * i4_num_results_per_part; |
1200 | 385k | ps_mv_bank->i4_num_mvs_per_blk = mvs_in_blk; |
1201 | | |
1202 | | /*************************************************************************/ |
1203 | | /* Store blk size, from blk size derive blk width and use this to compute*/ |
1204 | | /* number of blocks every row. We also pad to left and top by 1, to */ |
1205 | | /* support the prediction mechanism. */ |
1206 | | /*************************************************************************/ |
1207 | 385k | ps_mv_bank->e_blk_size = e_blk_size; |
1208 | 385k | blk_wd = gau1_blk_size_to_wd[e_blk_size]; |
1209 | 385k | blk_ht = gau1_blk_size_to_ht[e_blk_size]; |
1210 | | |
1211 | 385k | blks_in_row = (ps_layer_ctxt->i4_wd + (blk_wd - 1)) / blk_wd; |
1212 | 385k | blks_in_col = (ps_layer_ctxt->i4_ht + (blk_ht - 1)) / blk_ht; |
1213 | | |
1214 | 385k | if(u1_enc) |
1215 | 128k | { |
1216 | | /* TODO: CTB64x64 is assumed. FIX according to actual CTB */ |
1217 | 128k | WORD32 num_ctb_cols = ((ps_layer_ctxt->i4_wd + 63) >> 6); |
1218 | 128k | WORD32 num_ctb_rows = ((ps_layer_ctxt->i4_ht + 63) >> 6); |
1219 | | |
1220 | 128k | blks_in_row = (num_ctb_cols << 3); |
1221 | 128k | blks_in_col = (num_ctb_rows << 3); |
1222 | 128k | } |
1223 | | |
1224 | 385k | blks_in_row += 2; |
1225 | 385k | mvs_in_row = blks_in_row * mvs_in_blk; |
1226 | | |
1227 | 385k | ps_mv_bank->i4_num_blks_per_row = blks_in_row; |
1228 | 385k | ps_mv_bank->i4_num_mvs_per_row = mvs_in_row; |
1229 | | |
1230 | | /* To ensure run time requirements fall within allocation time request */ |
1231 | 385k | ASSERT(ps_mv_bank->i4_num_mvs_per_row <= ps_mv_bank->max_num_mvs_per_row); |
1232 | | |
1233 | | /*************************************************************************/ |
1234 | | /* Increment by one full row at top for padding and one column in left */ |
1235 | | /* this gives us the actual start of mv for 0,0 blk */ |
1236 | | /*************************************************************************/ |
1237 | 385k | ps_mv_bank->ps_mv = ps_mv_bank->ps_mv_base + mvs_in_row + mvs_in_blk; |
1238 | 385k | ps_mv_bank->pi1_ref_idx = ps_mv_bank->pi1_ref_idx_base + mvs_in_row + mvs_in_blk; |
1239 | | |
1240 | 385k | memset(ps_mv_bank->ps_mv_base, 0, mvs_in_row * sizeof(hme_mv_t)); |
1241 | 385k | memset(ps_mv_bank->pi1_ref_idx_base, -1, mvs_in_row * sizeof(U08)); |
1242 | | |
1243 | | /*************************************************************************/ |
1244 | | /* Initialize top row, left col and right col with zeros since these are */ |
1245 | | /* used as candidates during searches. */ |
1246 | | /*************************************************************************/ |
1247 | 385k | ps_mv1 = ps_mv_bank->ps_mv_base + mvs_in_row; |
1248 | 385k | ps_mv2 = ps_mv1 + mvs_in_row - mvs_in_blk; |
1249 | 385k | pi1_ref_id1 = ps_mv_bank->pi1_ref_idx_base + mvs_in_row; |
1250 | 385k | pi1_ref_id2 = pi1_ref_id1 + mvs_in_row - mvs_in_blk; |
1251 | 3.01M | for(i4_i = 0; i4_i < blks_in_col; i4_i++) |
1252 | 2.62M | { |
1253 | 10.4M | for(i4_j = 0; i4_j < mvs_in_blk; i4_j++) |
1254 | 7.84M | { |
1255 | 7.84M | ps_mv1[i4_j].i2_mv_x = 0; |
1256 | 7.84M | ps_mv1[i4_j].i2_mv_y = 0; |
1257 | 7.84M | ps_mv2[i4_j].i2_mv_x = 0; |
1258 | 7.84M | ps_mv2[i4_j].i2_mv_y = 0; |
1259 | 7.84M | pi1_ref_id1[i4_j] = -1; |
1260 | 7.84M | pi1_ref_id2[i4_j] = -1; |
1261 | 7.84M | } |
1262 | 2.62M | ps_mv1 += mvs_in_row; |
1263 | 2.62M | ps_mv2 += mvs_in_row; |
1264 | 2.62M | pi1_ref_id1 += mvs_in_row; |
1265 | 2.62M | pi1_ref_id2 += mvs_in_row; |
1266 | 2.62M | } |
1267 | 385k | } |
1268 | | void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt) |
1269 | 128k | { |
1270 | 128k | layer_mv_t *ps_mv_bank; |
1271 | 128k | hme_mv_t *ps_mv; |
1272 | 128k | S08 *pi1_ref_id; |
1273 | 128k | S32 blk_wd, blks_in_row, mvs_in_row, blks_in_col; |
1274 | 128k | S32 i, j, blk_ht; |
1275 | 128k | BLK_SIZE_T e_blk_size; |
1276 | | |
1277 | 128k | ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank; |
1278 | | |
1279 | | /*************************************************************************/ |
1280 | | /* Store blk size, from blk size derive blk width and use this to compute*/ |
1281 | | /* number of blocks every row. We also pad to left and top by 1, to */ |
1282 | | /* support the prediction mechanism. */ |
1283 | | /*************************************************************************/ |
1284 | 128k | e_blk_size = ps_mv_bank->e_blk_size; |
1285 | 128k | blk_wd = gau1_blk_size_to_wd[e_blk_size]; |
1286 | 128k | blk_ht = gau1_blk_size_to_wd[e_blk_size]; |
1287 | 128k | blks_in_row = ps_layer_ctxt->i4_wd / blk_wd; |
1288 | 128k | blks_in_col = ps_layer_ctxt->i4_ht / blk_ht; |
1289 | 128k | mvs_in_row = blks_in_row * ps_mv_bank->i4_num_mvs_per_blk; |
1290 | | |
1291 | | /*************************************************************************/ |
1292 | | /* Increment by one full row at top for padding and one column in left */ |
1293 | | /* this gives us the actual start of mv for 0,0 blk */ |
1294 | | /*************************************************************************/ |
1295 | 128k | ps_mv = ps_mv_bank->ps_mv; |
1296 | 128k | pi1_ref_id = ps_mv_bank->pi1_ref_idx; |
1297 | | |
1298 | 1.06M | for(i = 0; i < blks_in_col; i++) |
1299 | 934k | { |
1300 | 14.9M | for(j = 0; j < blks_in_row; j++) |
1301 | 13.9M | { |
1302 | 13.9M | ps_mv[j].i2_mv_x = INTRA_MV; |
1303 | 13.9M | ps_mv[j].i2_mv_y = INTRA_MV; |
1304 | 13.9M | pi1_ref_id[j] = -1; |
1305 | 13.9M | } |
1306 | 934k | ps_mv += ps_mv_bank->i4_num_mvs_per_row; |
1307 | 934k | pi1_ref_id += ps_mv_bank->i4_num_mvs_per_row; |
1308 | 934k | } |
1309 | 128k | } |
1310 | | |
1311 | | /** |
1312 | | ******************************************************************************** |
1313 | | * @fn void hme_derive_search_range(range_prms_t *ps_range, |
1314 | | * range_prms_t *ps_pic_limit, |
1315 | | * range_prms_t *ps_mv_limit, |
1316 | | * S32 i4_x, |
1317 | | * S32 i4_y, |
1318 | | * S32 blk_wd, |
1319 | | * S32 blk_ht) |
1320 | | * |
1321 | | * @brief given picture limits and blk dimensions and mv search limits, obtains |
1322 | | * teh valid search range such that the blk stays within pic boundaries, |
1323 | | * where picture boundaries include padded portions of picture |
1324 | | * |
1325 | | * @param[out] ps_range: updated with actual search range |
1326 | | * |
1327 | | * @param[in] ps_pic_limit : picture boundaries |
1328 | | * |
1329 | | * @param[in] ps_mv_limit: Search range limits for the mvs |
1330 | | * |
1331 | | * @param[in] i4_x : x coordinate of the blk |
1332 | | * |
1333 | | * @param[in] i4_y : y coordinate of the blk |
1334 | | * |
1335 | | * @param[in] blk_wd : blk width |
1336 | | * |
1337 | | * @param[in] blk_ht : blk height |
1338 | | * |
1339 | | * @return void |
1340 | | ******************************************************************************** |
1341 | | */ |
1342 | | void hme_derive_search_range( |
1343 | | range_prms_t *ps_range, |
1344 | | range_prms_t *ps_pic_limit, |
1345 | | range_prms_t *ps_mv_limit, |
1346 | | S32 i4_x, |
1347 | | S32 i4_y, |
1348 | | S32 blk_wd, |
1349 | | S32 blk_ht) |
1350 | 14.0M | { |
1351 | 14.0M | ps_range->i2_max_x = |
1352 | 14.0M | MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)i4_x), ps_mv_limit->i2_max_x); |
1353 | 14.0M | ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x); |
1354 | 14.0M | ps_range->i2_max_y = |
1355 | 14.0M | MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)i4_y), ps_mv_limit->i2_max_y); |
1356 | 14.0M | ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y); |
1357 | 14.0M | } |
1358 | | |
1359 | | /** |
1360 | | ******************************************************************************** |
1361 | | * @fn void hme_get_spatial_candt(search_node_t *ps_search_node, |
1362 | | * layer_ctxt_t *ps_curr_layer, |
1363 | | * S32 i4_blk_x, |
1364 | | * S32 i4_blk_y, |
1365 | | * S08 i1_ref_id, |
1366 | | * S32 i4_result_id) |
1367 | | * |
1368 | | * @brief obtains a candt from the same mv bank as the current one, its called |
1369 | | * spatial candt as it does not require scaling for temporal distances |
1370 | | * |
1371 | | * @param[out] ps_search_node: mv and ref id updated here of the candt |
1372 | | * |
1373 | | * @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer |
1374 | | * |
1375 | | * @param[in] i4_blk_x : x coordinate of the block in mv bank |
1376 | | * |
1377 | | * @param[in] i4_blk_y : y coordinate of the block in mv bank |
1378 | | * |
1379 | | * @param[in] i1_ref_id : Corresponds to ref idx from which to pick up mv |
1380 | | * results, useful if multiple ref idx candts maintained separately. |
1381 | | * |
1382 | | * @param[in] i4_result_id : If multiple results stored per ref idx, this |
1383 | | * pts to the id of the result |
1384 | | * |
1385 | | * @param[in] tr_avail : top right availability of the block |
1386 | | * |
1387 | | * @param[in] bl_avail : bottom left availability of the block |
1388 | | * |
1389 | | * @return void |
1390 | | ******************************************************************************** |
1391 | | */ |
1392 | | void hme_get_spatial_candt( |
1393 | | layer_ctxt_t *ps_curr_layer, |
1394 | | BLK_SIZE_T e_search_blk_size, |
1395 | | S32 i4_blk_x, |
1396 | | S32 i4_blk_y, |
1397 | | S08 i1_ref_idx, |
1398 | | search_node_t *ps_top_neighbours, |
1399 | | search_node_t *ps_left_neighbours, |
1400 | | S32 i4_result_id, |
1401 | | S32 tr_avail, |
1402 | | S32 bl_avail, |
1403 | | S32 encode) |
1404 | | |
1405 | 3.33M | { |
1406 | 3.33M | layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; |
1407 | 3.33M | S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; |
1408 | 3.33M | S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; |
1409 | 3.33M | search_node_t *ps_search_node; |
1410 | 3.33M | S32 i4_offset; |
1411 | 3.33M | hme_mv_t *ps_mv, *ps_mv_base; |
1412 | 3.33M | S08 *pi1_ref_idx, *pi1_ref_idx_base; |
1413 | 3.33M | S32 jump = 1, mvs_in_blk, mvs_in_row; |
1414 | 3.33M | S32 shift = (encode ? 2 : 0); |
1415 | | |
1416 | 3.33M | if(i4_blk_size1 != i4_blk_size2) |
1417 | 17.6k | { |
1418 | 17.6k | i4_blk_x <<= 1; |
1419 | 17.6k | i4_blk_y <<= 1; |
1420 | 17.6k | jump = 2; |
1421 | 17.6k | if((i4_blk_size1 << 2) == i4_blk_size2) |
1422 | 0 | { |
1423 | 0 | i4_blk_x <<= 1; |
1424 | 0 | i4_blk_y <<= 1; |
1425 | 0 | jump = 4; |
1426 | 0 | } |
1427 | 17.6k | } |
1428 | | |
1429 | 3.33M | mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; |
1430 | 3.33M | mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; |
1431 | | |
1432 | | /* Adjust teh blk coord to point to top left locn */ |
1433 | 3.33M | i4_blk_x -= 1; |
1434 | 3.33M | i4_blk_y -= 1; |
1435 | | /* Pick up the mvs from the location */ |
1436 | 3.33M | i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); |
1437 | 3.33M | i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y); |
1438 | | |
1439 | 3.33M | ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
1440 | 3.33M | pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
1441 | | |
1442 | 3.33M | ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id; |
1443 | 3.33M | pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id; |
1444 | | |
1445 | 3.33M | ps_mv_base = ps_mv; |
1446 | 3.33M | pi1_ref_idx_base = pi1_ref_idx; |
1447 | | |
1448 | | /* ps_mv and pi1_ref_idx now point to the top left locn */ |
1449 | | /* Get 4 mvs as follows: */ |
1450 | 3.33M | ps_search_node = ps_top_neighbours; |
1451 | 3.33M | COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); |
1452 | | |
1453 | | /* Move to top */ |
1454 | 3.33M | ps_search_node++; |
1455 | 3.33M | ps_mv += mvs_in_blk; |
1456 | 3.33M | pi1_ref_idx += mvs_in_blk; |
1457 | 3.33M | COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); |
1458 | | |
1459 | | /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */ |
1460 | 3.33M | if(ps_layer_mvbank->i4_num_mvs_per_ref > 1) |
1461 | 3.33M | { |
1462 | 3.33M | ps_search_node++; |
1463 | 3.33M | ps_mv += (mvs_in_blk * (jump >> 1)); |
1464 | 3.33M | pi1_ref_idx += (mvs_in_blk * (jump >> 1)); |
1465 | 3.33M | COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); |
1466 | 3.33M | } |
1467 | 0 | else |
1468 | 0 | { |
1469 | 0 | ps_search_node++; |
1470 | 0 | ps_search_node->s_mv.i2_mvx = 0; |
1471 | 0 | ps_search_node->s_mv.i2_mvy = 0; |
1472 | 0 | ps_search_node->i1_ref_idx = i1_ref_idx; |
1473 | 0 | ps_search_node->u1_is_avail = 0; |
1474 | 0 | ps_search_node->u1_subpel_done = 0; |
1475 | 0 | } |
1476 | | |
1477 | | /* Move to tr: this will be tr w.r.t. the blk being searched */ |
1478 | 3.33M | ps_search_node++; |
1479 | 3.33M | if(tr_avail == 0) |
1480 | 0 | { |
1481 | 0 | ps_search_node->s_mv.i2_mvx = 0; |
1482 | 0 | ps_search_node->s_mv.i2_mvy = 0; |
1483 | 0 | ps_search_node->i1_ref_idx = i1_ref_idx; |
1484 | 0 | ps_search_node->u1_is_avail = 0; |
1485 | 0 | ps_search_node->u1_subpel_done = 0; |
1486 | 0 | } |
1487 | 3.33M | else |
1488 | 3.33M | { |
1489 | 3.33M | ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump)); |
1490 | 3.33M | pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump)); |
1491 | 3.33M | COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); |
1492 | 3.33M | } |
1493 | | |
1494 | | /* Move to left */ |
1495 | 3.33M | ps_search_node = ps_left_neighbours; |
1496 | 3.33M | ps_mv = ps_mv_base + mvs_in_row; |
1497 | 3.33M | pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; |
1498 | 3.33M | COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); |
1499 | | |
1500 | | /* Move to l1 */ |
1501 | 3.33M | if(ps_layer_mvbank->i4_num_mvs_per_ref > 1) |
1502 | 3.33M | { |
1503 | 3.33M | ps_search_node++; |
1504 | 3.33M | ps_mv += (mvs_in_row * (jump >> 1)); |
1505 | 3.33M | pi1_ref_idx += (mvs_in_row * (jump >> 1)); |
1506 | 3.33M | COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); |
1507 | 3.33M | } |
1508 | 0 | else |
1509 | 0 | { |
1510 | 0 | ps_search_node++; |
1511 | 0 | ps_search_node->s_mv.i2_mvx = 0; |
1512 | 0 | ps_search_node->s_mv.i2_mvy = 0; |
1513 | 0 | ps_search_node->i1_ref_idx = i1_ref_idx; |
1514 | 0 | ps_search_node->u1_is_avail = 0; |
1515 | 0 | ps_search_node->u1_subpel_done = 0; |
1516 | 0 | } |
1517 | | |
1518 | | /* Move to bl */ |
1519 | 3.33M | ps_search_node++; |
1520 | 3.33M | if(bl_avail == 0) |
1521 | 3.33M | { |
1522 | 3.33M | ps_search_node->s_mv.i2_mvx = 0; |
1523 | 3.33M | ps_search_node->s_mv.i2_mvy = 0; |
1524 | 3.33M | ps_search_node->i1_ref_idx = i1_ref_idx; |
1525 | 3.33M | ps_search_node->u1_is_avail = 0; |
1526 | 3.33M | } |
1527 | 0 | else |
1528 | 0 | { |
1529 | 0 | ps_mv = ps_mv_base + (mvs_in_row * (1 + jump)); |
1530 | 0 | pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump)); |
1531 | 0 | COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift); |
1532 | 0 | } |
1533 | 3.33M | } |
1534 | | |
1535 | | void hme_get_spatial_candt_in_l1_me( |
1536 | | layer_ctxt_t *ps_curr_layer, |
1537 | | BLK_SIZE_T e_search_blk_size, |
1538 | | S32 i4_blk_x, |
1539 | | S32 i4_blk_y, |
1540 | | S08 i1_ref_idx, |
1541 | | U08 u1_pred_dir, |
1542 | | search_node_t *ps_top_neighbours, |
1543 | | search_node_t *ps_left_neighbours, |
1544 | | S32 i4_result_id, |
1545 | | S32 tr_avail, |
1546 | | S32 bl_avail, |
1547 | | S32 i4_num_act_ref_l0, |
1548 | | S32 i4_num_act_ref_l1) |
1549 | 1.63M | { |
1550 | 1.63M | search_node_t *ps_search_node; |
1551 | 1.63M | hme_mv_t *ps_mv, *ps_mv_base; |
1552 | | |
1553 | 1.63M | S32 i4_offset; |
1554 | 1.63M | S32 mvs_in_blk, mvs_in_row; |
1555 | 1.63M | S08 *pi1_ref_idx, *pi1_ref_idx_base; |
1556 | 1.63M | S32 i4_mv_pos_in_implicit_array; |
1557 | | |
1558 | 1.63M | layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; |
1559 | | |
1560 | 1.63M | S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size]; |
1561 | 1.63M | S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size]; |
1562 | 1.63M | S32 jump = 1; |
1563 | 1.63M | S32 shift = 0; |
1564 | 1.63M | S32 i4_num_results_in_given_dir = |
1565 | 1.63M | ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l1) |
1566 | 1.63M | : (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0)); |
1567 | | |
1568 | 1.63M | if(i4_blk_size1 != i4_blk_size2) |
1569 | 1.58M | { |
1570 | 1.58M | i4_blk_x <<= 1; |
1571 | 1.58M | i4_blk_y <<= 1; |
1572 | 1.58M | jump = 2; |
1573 | 1.58M | if((i4_blk_size1 << 2) == i4_blk_size2) |
1574 | 0 | { |
1575 | 0 | i4_blk_x <<= 1; |
1576 | 0 | i4_blk_y <<= 1; |
1577 | 0 | jump = 4; |
1578 | 0 | } |
1579 | 1.58M | } |
1580 | | |
1581 | 1.63M | mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; |
1582 | 1.63M | mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; |
1583 | | |
1584 | | /* Adjust the blk coord to point to top left locn */ |
1585 | 1.63M | i4_blk_x -= 1; |
1586 | 1.63M | i4_blk_y -= 1; |
1587 | | /* Pick up the mvs from the location */ |
1588 | 1.63M | i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); |
1589 | 1.63M | i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y); |
1590 | | |
1591 | 1.63M | i4_offset += |
1592 | 1.63M | ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0) : 0); |
1593 | | |
1594 | 1.63M | ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
1595 | 1.63M | pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
1596 | | |
1597 | 1.63M | ps_mv_base = ps_mv; |
1598 | 1.63M | pi1_ref_idx_base = pi1_ref_idx; |
1599 | | |
1600 | | /* TL */ |
1601 | 1.63M | { |
1602 | | /* ps_mv and pi1_ref_idx now point to the top left locn */ |
1603 | 1.63M | ps_search_node = ps_top_neighbours; |
1604 | | |
1605 | 1.63M | i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( |
1606 | 1.63M | pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); |
1607 | | |
1608 | 1.63M | if(-1 != i4_mv_pos_in_implicit_array) |
1609 | 714k | { |
1610 | 714k | COPY_MV_TO_SEARCH_NODE( |
1611 | 714k | ps_search_node, |
1612 | 714k | &ps_mv[i4_mv_pos_in_implicit_array], |
1613 | 714k | &pi1_ref_idx[i4_mv_pos_in_implicit_array], |
1614 | 714k | i1_ref_idx, |
1615 | 714k | shift); |
1616 | 714k | } |
1617 | 921k | else |
1618 | 921k | { |
1619 | 921k | ps_search_node->u1_is_avail = 0; |
1620 | 921k | ps_search_node->s_mv.i2_mvx = 0; |
1621 | 921k | ps_search_node->s_mv.i2_mvy = 0; |
1622 | 921k | ps_search_node->i1_ref_idx = i1_ref_idx; |
1623 | 921k | } |
1624 | 1.63M | } |
1625 | | |
1626 | | /* Move to top */ |
1627 | 1.63M | { |
1628 | | /* ps_mv and pi1_ref_idx now point to the top left locn */ |
1629 | 1.63M | ps_search_node++; |
1630 | 1.63M | ps_mv += mvs_in_blk; |
1631 | 1.63M | pi1_ref_idx += mvs_in_blk; |
1632 | | |
1633 | 1.63M | i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( |
1634 | 1.63M | pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); |
1635 | | |
1636 | 1.63M | if(-1 != i4_mv_pos_in_implicit_array) |
1637 | 1.00M | { |
1638 | 1.00M | COPY_MV_TO_SEARCH_NODE( |
1639 | 1.00M | ps_search_node, |
1640 | 1.00M | &ps_mv[i4_mv_pos_in_implicit_array], |
1641 | 1.00M | &pi1_ref_idx[i4_mv_pos_in_implicit_array], |
1642 | 1.00M | i1_ref_idx, |
1643 | 1.00M | shift); |
1644 | 1.00M | } |
1645 | 631k | else |
1646 | 631k | { |
1647 | 631k | ps_search_node->u1_is_avail = 0; |
1648 | 631k | ps_search_node->s_mv.i2_mvx = 0; |
1649 | 631k | ps_search_node->s_mv.i2_mvy = 0; |
1650 | 631k | ps_search_node->i1_ref_idx = i1_ref_idx; |
1651 | 631k | } |
1652 | 1.63M | } |
1653 | | |
1654 | | /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */ |
1655 | 1.63M | if(ps_layer_mvbank->i4_num_mvs_per_ref > 1) |
1656 | 0 | { |
1657 | 0 | ps_search_node++; |
1658 | 0 | ps_mv += (mvs_in_blk * (jump >> 1)); |
1659 | 0 | pi1_ref_idx += (mvs_in_blk * (jump >> 1)); |
1660 | |
|
1661 | 0 | i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( |
1662 | 0 | pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); |
1663 | |
|
1664 | 0 | if(-1 != i4_mv_pos_in_implicit_array) |
1665 | 0 | { |
1666 | 0 | COPY_MV_TO_SEARCH_NODE( |
1667 | 0 | ps_search_node, |
1668 | 0 | &ps_mv[i4_mv_pos_in_implicit_array], |
1669 | 0 | &pi1_ref_idx[i4_mv_pos_in_implicit_array], |
1670 | 0 | i1_ref_idx, |
1671 | 0 | shift); |
1672 | 0 | } |
1673 | 0 | else |
1674 | 0 | { |
1675 | 0 | ps_search_node->u1_is_avail = 0; |
1676 | 0 | ps_search_node->s_mv.i2_mvx = 0; |
1677 | 0 | ps_search_node->s_mv.i2_mvy = 0; |
1678 | 0 | ps_search_node->i1_ref_idx = i1_ref_idx; |
1679 | 0 | } |
1680 | 0 | } |
1681 | 1.63M | else |
1682 | 1.63M | { |
1683 | 1.63M | ps_search_node++; |
1684 | 1.63M | ps_search_node->u1_is_avail = 0; |
1685 | 1.63M | ps_search_node->s_mv.i2_mvx = 0; |
1686 | 1.63M | ps_search_node->s_mv.i2_mvy = 0; |
1687 | 1.63M | ps_search_node->i1_ref_idx = i1_ref_idx; |
1688 | 1.63M | } |
1689 | | |
1690 | | /* Move to tr: this will be tr w.r.t. the blk being searched */ |
1691 | 1.63M | ps_search_node++; |
1692 | 1.63M | if(tr_avail == 0) |
1693 | 0 | { |
1694 | 0 | ps_search_node->s_mv.i2_mvx = 0; |
1695 | 0 | ps_search_node->s_mv.i2_mvy = 0; |
1696 | 0 | ps_search_node->i1_ref_idx = i1_ref_idx; |
1697 | 0 | ps_search_node->u1_is_avail = 0; |
1698 | 0 | ps_search_node->u1_subpel_done = 0; |
1699 | 0 | } |
1700 | 1.63M | else |
1701 | 1.63M | { |
1702 | | /* ps_mv and pi1_ref_idx now point to the top left locn */ |
1703 | 1.63M | ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump)); |
1704 | 1.63M | pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump)); |
1705 | | |
1706 | 1.63M | i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( |
1707 | 1.63M | pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); |
1708 | | |
1709 | 1.63M | if(-1 != i4_mv_pos_in_implicit_array) |
1710 | 769k | { |
1711 | 769k | COPY_MV_TO_SEARCH_NODE( |
1712 | 769k | ps_search_node, |
1713 | 769k | &ps_mv[i4_mv_pos_in_implicit_array], |
1714 | 769k | &pi1_ref_idx[i4_mv_pos_in_implicit_array], |
1715 | 769k | i1_ref_idx, |
1716 | 769k | shift); |
1717 | 769k | } |
1718 | 866k | else |
1719 | 866k | { |
1720 | 866k | ps_search_node->u1_is_avail = 0; |
1721 | 866k | ps_search_node->s_mv.i2_mvx = 0; |
1722 | 866k | ps_search_node->s_mv.i2_mvy = 0; |
1723 | 866k | ps_search_node->i1_ref_idx = i1_ref_idx; |
1724 | 866k | } |
1725 | 1.63M | } |
1726 | | |
1727 | | /* Move to left */ |
1728 | 1.63M | { |
1729 | | /* ps_mv and pi1_ref_idx now point to the top left locn */ |
1730 | 1.63M | ps_search_node = ps_left_neighbours; |
1731 | 1.63M | ps_mv = ps_mv_base + mvs_in_row; |
1732 | 1.63M | pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; |
1733 | | |
1734 | 1.63M | i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( |
1735 | 1.63M | pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); |
1736 | | |
1737 | 1.63M | if(-1 != i4_mv_pos_in_implicit_array) |
1738 | 1.04M | { |
1739 | 1.04M | COPY_MV_TO_SEARCH_NODE( |
1740 | 1.04M | ps_search_node, |
1741 | 1.04M | &ps_mv[i4_mv_pos_in_implicit_array], |
1742 | 1.04M | &pi1_ref_idx[i4_mv_pos_in_implicit_array], |
1743 | 1.04M | i1_ref_idx, |
1744 | 1.04M | shift); |
1745 | 1.04M | } |
1746 | 588k | else |
1747 | 588k | { |
1748 | 588k | ps_search_node->u1_is_avail = 0; |
1749 | 588k | ps_search_node->s_mv.i2_mvx = 0; |
1750 | 588k | ps_search_node->s_mv.i2_mvy = 0; |
1751 | 588k | ps_search_node->i1_ref_idx = i1_ref_idx; |
1752 | 588k | } |
1753 | 1.63M | } |
1754 | | |
1755 | | /* Move to l1 */ |
1756 | 1.63M | if(ps_layer_mvbank->i4_num_mvs_per_ref > 1) |
1757 | 0 | { |
1758 | | /* ps_mv and pi1_ref_idx now point to the top left locn */ |
1759 | 0 | ps_search_node++; |
1760 | 0 | ps_mv += (mvs_in_row * (jump >> 1)); |
1761 | 0 | pi1_ref_idx += (mvs_in_row * (jump >> 1)); |
1762 | |
|
1763 | 0 | i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( |
1764 | 0 | pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); |
1765 | |
|
1766 | 0 | if(-1 != i4_mv_pos_in_implicit_array) |
1767 | 0 | { |
1768 | 0 | COPY_MV_TO_SEARCH_NODE( |
1769 | 0 | ps_search_node, |
1770 | 0 | &ps_mv[i4_mv_pos_in_implicit_array], |
1771 | 0 | &pi1_ref_idx[i4_mv_pos_in_implicit_array], |
1772 | 0 | i1_ref_idx, |
1773 | 0 | shift); |
1774 | 0 | } |
1775 | 0 | else |
1776 | 0 | { |
1777 | 0 | ps_search_node->u1_is_avail = 0; |
1778 | 0 | ps_search_node->s_mv.i2_mvx = 0; |
1779 | 0 | ps_search_node->s_mv.i2_mvy = 0; |
1780 | 0 | ps_search_node->i1_ref_idx = i1_ref_idx; |
1781 | 0 | } |
1782 | 0 | } |
1783 | 1.63M | else |
1784 | 1.63M | { |
1785 | 1.63M | ps_search_node++; |
1786 | 1.63M | ps_search_node->u1_is_avail = 0; |
1787 | 1.63M | ps_search_node->s_mv.i2_mvx = 0; |
1788 | 1.63M | ps_search_node->s_mv.i2_mvy = 0; |
1789 | 1.63M | ps_search_node->i1_ref_idx = i1_ref_idx; |
1790 | 1.63M | } |
1791 | | |
1792 | | /* Move to bl */ |
1793 | 1.63M | ps_search_node++; |
1794 | 1.63M | if(bl_avail == 0) |
1795 | 1.63M | { |
1796 | 1.63M | ps_search_node->s_mv.i2_mvx = 0; |
1797 | 1.63M | ps_search_node->s_mv.i2_mvy = 0; |
1798 | 1.63M | ps_search_node->i1_ref_idx = i1_ref_idx; |
1799 | 1.63M | ps_search_node->u1_is_avail = 0; |
1800 | 1.63M | } |
1801 | 0 | else |
1802 | 0 | { |
1803 | | /* ps_mv and pi1_ref_idx now point to the top left locn */ |
1804 | 0 | ps_mv = ps_mv_base + (mvs_in_row * (1 + jump)); |
1805 | 0 | pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump)); |
1806 | |
|
1807 | 0 | i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id( |
1808 | 0 | pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir); |
1809 | |
|
1810 | 0 | if(-1 != i4_mv_pos_in_implicit_array) |
1811 | 0 | { |
1812 | 0 | COPY_MV_TO_SEARCH_NODE( |
1813 | 0 | ps_search_node, |
1814 | 0 | &ps_mv[i4_mv_pos_in_implicit_array], |
1815 | 0 | &pi1_ref_idx[i4_mv_pos_in_implicit_array], |
1816 | 0 | i1_ref_idx, |
1817 | 0 | shift); |
1818 | 0 | } |
1819 | 0 | else |
1820 | 0 | { |
1821 | 0 | ps_search_node->u1_is_avail = 0; |
1822 | 0 | ps_search_node->s_mv.i2_mvx = 0; |
1823 | 0 | ps_search_node->s_mv.i2_mvy = 0; |
1824 | 0 | ps_search_node->i1_ref_idx = i1_ref_idx; |
1825 | 0 | } |
1826 | 0 | } |
1827 | 1.63M | } |
1828 | | |
1829 | | /** |
1830 | | ******************************************************************************** |
1831 | | * @fn void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer, |
1832 | | * S32 i4_blk_x, |
1833 | | * S32 i4_blk_y, |
1834 | | * mvgrid_t *ps_mv_grid , |
1835 | | * S32 i1_ref_id) |
1836 | | * |
1837 | | * @brief The 18x18 MV grid for a ctb, is filled in first row and 1st col |
1838 | | * this corresponds to neighbours (TL, T, TR, L, BL) |
1839 | | * |
1840 | | * @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer |
1841 | | * |
1842 | | * @param[in] blk_x : x coordinate of the block in mv bank |
1843 | | * |
1844 | | * @param[in] blk_y : y coordinate of the block in mv bank |
1845 | | * |
1846 | | * @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level) |
1847 | | * |
1848 | | * @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv |
1849 | | * results, useful if multiple ref idx candts maintained separately. |
1850 | | * |
1851 | | * @return void |
1852 | | ******************************************************************************** |
1853 | | */ |
1854 | | void hme_fill_ctb_neighbour_mvs( |
1855 | | layer_ctxt_t *ps_curr_layer, |
1856 | | S32 blk_x, |
1857 | | S32 blk_y, |
1858 | | mv_grid_t *ps_mv_grid, |
1859 | | U08 u1_pred_dir_ctr, |
1860 | | U08 u1_default_ref_id, |
1861 | | S32 i4_num_act_ref_l0) |
1862 | 124k | { |
1863 | 124k | search_node_t *ps_grid_node; |
1864 | 124k | layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank; |
1865 | 124k | S32 i4_offset; |
1866 | 124k | hme_mv_t *ps_mv, *ps_mv_base; |
1867 | 124k | S08 *pi1_ref_idx, *pi1_ref_idx_base; |
1868 | 124k | S32 jump = 0, inc, i, mvs_in_blk, mvs_in_row; |
1869 | | |
1870 | 124k | if(ps_layer_mvbank->e_blk_size == BLK_4x4) |
1871 | 0 | { |
1872 | | /* searching 16x16, mvs are for 4x4 */ |
1873 | 0 | jump = 1; |
1874 | 0 | blk_x <<= 2; |
1875 | 0 | blk_y <<= 2; |
1876 | 0 | } |
1877 | 124k | else |
1878 | 124k | { |
1879 | | /* Searching 16x16, mvs are for 8x8 */ |
1880 | 124k | blk_x <<= 1; |
1881 | 124k | blk_y <<= 1; |
1882 | 124k | } |
1883 | 124k | ASSERT(ps_layer_mvbank->e_blk_size != BLK_16x16); |
1884 | | |
1885 | 124k | mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk; |
1886 | 124k | mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row; |
1887 | | |
1888 | | /* Adjust the blk coord to point to top left locn */ |
1889 | 124k | blk_x -= 1; |
1890 | 124k | blk_y -= 1; |
1891 | | |
1892 | | /* Pick up the mvs from the location */ |
1893 | 124k | i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); |
1894 | 124k | i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); |
1895 | | |
1896 | 124k | i4_offset += (u1_pred_dir_ctr == 1); |
1897 | | |
1898 | 124k | ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
1899 | 124k | pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
1900 | | |
1901 | 124k | ps_mv_base = ps_mv; |
1902 | 124k | pi1_ref_idx_base = pi1_ref_idx; |
1903 | | |
1904 | | /* the 0, 0 entry of the grid pts to top left for the ctb */ |
1905 | 124k | ps_grid_node = &ps_mv_grid->as_node[0]; |
1906 | | |
1907 | | /* Copy 18 mvs at 4x4 level including top left, 16 top mvs for ctb, 1 tr */ |
1908 | 2.37M | for(i = 0; i < 18; i++) |
1909 | 2.24M | { |
1910 | 2.24M | COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0); |
1911 | 2.24M | ps_grid_node++; |
1912 | 2.24M | inc = 1; |
1913 | | /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */ |
1914 | 2.24M | if(i & 1) |
1915 | 1.12M | inc = jump; |
1916 | | |
1917 | 2.24M | ps_mv += (mvs_in_blk * inc); |
1918 | 2.24M | pi1_ref_idx += (mvs_in_blk * inc); |
1919 | 2.24M | } |
1920 | | |
1921 | 124k | ps_mv = ps_mv_base + mvs_in_row; |
1922 | 124k | pi1_ref_idx = pi1_ref_idx_base + mvs_in_row; |
1923 | | |
1924 | | /* now copy left 16 left mvs */ |
1925 | 124k | ps_grid_node = &ps_mv_grid->as_node[0]; |
1926 | 124k | ps_grid_node += (ps_mv_grid->i4_stride); |
1927 | 2.12M | for(i = 0; i < 16; i++) |
1928 | 1.99M | { |
1929 | 1.99M | COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0); |
1930 | 1.99M | ps_grid_node += ps_mv_grid->i4_stride; |
1931 | 1.99M | inc = 1; |
1932 | | /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */ |
1933 | 1.99M | if(!(i & 1)) |
1934 | 999k | inc = jump; |
1935 | | |
1936 | 1.99M | ps_mv += (mvs_in_row * inc); |
1937 | 1.99M | pi1_ref_idx += (mvs_in_row * inc); |
1938 | 1.99M | } |
1939 | | /* last one set to invalid as bottom left not yet encoded */ |
1940 | 124k | ps_grid_node->u1_is_avail = 0; |
1941 | 124k | } |
1942 | | |
1943 | | void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr) |
1944 | 4.82M | { |
1945 | 4.82M | ps_buf_mgr->i4_used = 0; |
1946 | 4.82M | } |
1947 | | void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size) |
1948 | 7.59k | { |
1949 | 7.59k | ps_buf_mgr->pu1_wkg_mem = pu1_mem; |
1950 | 7.59k | ps_buf_mgr->i4_total = size; |
1951 | 7.59k | hme_reset_wkg_mem(ps_buf_mgr); |
1952 | 7.59k | } |
1953 | | |
1954 | | void hme_init_mv_grid(mv_grid_t *ps_mv_grid) |
1955 | 45.5k | { |
1956 | 45.5k | S32 i, j; |
1957 | 45.5k | search_node_t *ps_search_node; |
1958 | | /*************************************************************************/ |
1959 | | /* We have a 64x64 CTB in the worst case. For this, we have 16x16 4x4 MVs*/ |
1960 | | /* Additionally, we have 1 neighbour on each side. This makes it a 18x18 */ |
1961 | | /* MV Grid. The boundary of this Grid on all sides are neighbours and the*/ |
1962 | | /* left and top edges of this grid is filled run time. The center portion*/ |
1963 | | /* represents the actual CTB MVs (16x16) and is also filled run time. */ |
1964 | | /* However, the availability is always set as available (init time) */ |
1965 | | /*************************************************************************/ |
1966 | 45.5k | ps_mv_grid->i4_stride = NUM_COLUMNS_IN_CTB_GRID; |
1967 | 45.5k | ps_mv_grid->i4_start_offset = ps_mv_grid->i4_stride + CTB_MV_GRID_PAD; |
1968 | 45.5k | ps_search_node = &ps_mv_grid->as_node[ps_mv_grid->i4_start_offset]; |
1969 | 774k | for(i = 0; i < 16; i++) |
1970 | 728k | { |
1971 | 12.3M | for(j = 0; j < 16; j++) |
1972 | 11.6M | { |
1973 | 11.6M | ps_search_node[j].u1_is_avail = 1; |
1974 | 11.6M | } |
1975 | | |
1976 | 728k | ps_search_node += ps_mv_grid->i4_stride; |
1977 | 728k | } |
1978 | 45.5k | } |
1979 | | /** |
1980 | | ******************************************************************************** |
1981 | | * @fn void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht) |
1982 | | * |
1983 | | * @brief Pads horizontally to left side. Each pixel replicated across a line |
1984 | | * |
1985 | | * @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated |
1986 | | * |
1987 | | * @param[in] stride : stride of destination buffer |
1988 | | * |
1989 | | * @param[in] pad_wd : Amt of horizontal padding to be done |
1990 | | * |
1991 | | * @param[in] pad_ht : Number of lines for which horizontal padding to be done |
1992 | | * |
1993 | | * @return void |
1994 | | ******************************************************************************** |
1995 | | */ |
1996 | | void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht) |
1997 | 0 | { |
1998 | 0 | S32 i, j; |
1999 | 0 | U08 u1_val; |
2000 | 0 | for(i = 0; i < pad_ht; i++) |
2001 | 0 | { |
2002 | 0 | u1_val = pu1_dst[0]; |
2003 | 0 | for(j = -pad_wd; j < 0; j++) |
2004 | 0 | pu1_dst[j] = u1_val; |
2005 | |
|
2006 | 0 | pu1_dst += stride; |
2007 | 0 | } |
2008 | 0 | } |
2009 | | /** |
2010 | | ******************************************************************************** |
2011 | | * @fn void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht) |
2012 | | * |
2013 | | * @brief Pads horizontally to rt side. Each pixel replicated across a line |
2014 | | * |
2015 | | * @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated |
2016 | | * |
2017 | | * @param[in] stride : stride of destination buffer |
2018 | | * |
2019 | | * @param[in] pad_wd : Amt of horizontal padding to be done |
2020 | | * |
2021 | | * @param[in] pad_ht : Number of lines for which horizontal padding to be done |
2022 | | * |
2023 | | * @return void |
2024 | | ******************************************************************************** |
2025 | | */ |
2026 | | void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht) |
2027 | 2.84M | { |
2028 | 2.84M | S32 i, j; |
2029 | 2.84M | U08 u1_val; |
2030 | 24.2M | for(i = 0; i < pad_ht; i++) |
2031 | 21.4M | { |
2032 | 21.4M | u1_val = pu1_dst[0]; |
2033 | 179M | for(j = 1; j <= pad_wd; j++) |
2034 | 158M | pu1_dst[j] = u1_val; |
2035 | | |
2036 | 21.4M | pu1_dst += stride; |
2037 | 21.4M | } |
2038 | 2.84M | } |
2039 | | /** |
2040 | | ******************************************************************************** |
2041 | | * @fn void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd) |
2042 | | * |
2043 | | * @brief Pads vertically on the top. Repeats the top line for top padding |
2044 | | * |
2045 | | * @param[in] pu1_dst : destination pointer. Points to the line to be repeated |
2046 | | * |
2047 | | * @param[in] stride : stride of destination buffer |
2048 | | * |
2049 | | * @param[in] pad_ht : Amt of vertical padding to be done |
2050 | | * |
2051 | | * @param[in] pad_wd : Number of columns for which vertical padding to be done |
2052 | | * |
2053 | | * @return void |
2054 | | ******************************************************************************** |
2055 | | */ |
2056 | | void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd) |
2057 | 0 | { |
2058 | 0 | S32 i; |
2059 | 0 | for(i = 1; i <= pad_ht; i++) |
2060 | 0 | memcpy(pu1_dst - (i * stride), pu1_dst, pad_wd); |
2061 | 0 | } |
2062 | | /** |
2063 | | ******************************************************************************** |
2064 | | * @fn void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd) |
2065 | | * |
2066 | | * @brief Pads vertically on the bot. Repeats the top line for top padding |
2067 | | * |
2068 | | * @param[in] pu1_dst : destination pointer. Points to the line to be repeated |
2069 | | * |
2070 | | * @param[in] stride : stride of destination buffer |
2071 | | * |
2072 | | * @param[in] pad_ht : Amt of vertical padding to be done |
2073 | | * |
2074 | | * @param[in] pad_wd : Number of columns for which vertical padding to be done |
2075 | | * |
2076 | | * @return void |
2077 | | ******************************************************************************** |
2078 | | */ |
2079 | | void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd) |
2080 | 1.41M | { |
2081 | 1.41M | S32 i; |
2082 | 7.58M | for(i = 1; i <= pad_ht; i++) |
2083 | 6.17M | memcpy(pu1_dst + (i * stride), pu1_dst, pad_wd); |
2084 | 1.41M | } |
2085 | | |
2086 | | /** |
2087 | | ******************************************************************************** |
2088 | | * @fn void hme_get_wt_inp(layer_ctxt_t *ps_curr_layer, S32 pos_x, |
2089 | | * S32 pos_y, S32 size) |
2090 | | * |
2091 | | * @brief Does weighting of the input in case the search needs to happen |
2092 | | * with reference frames weighted |
2093 | | * |
2094 | | * @param[in] ps_curr_layer: layer ctxt |
2095 | | * |
2096 | | * @param[in] pos_x : x coordinate of the input blk in the picture |
2097 | | * |
2098 | | * @param[in] pos_y : y coordinate of hte input blk in the picture |
2099 | | * |
2100 | | * @param[in] size : size of the input block |
2101 | | * |
2102 | | * @param[in] num_ref : Number of reference frames |
2103 | | * |
2104 | | * @return void |
2105 | | ******************************************************************************** |
2106 | | */ |
2107 | | void hme_get_wt_inp( |
2108 | | layer_ctxt_t *ps_curr_layer, |
2109 | | wgt_pred_ctxt_t *ps_wt_inp_prms, |
2110 | | S32 dst_stride, |
2111 | | S32 pos_x, |
2112 | | S32 pos_y, |
2113 | | S32 size, |
2114 | | S32 num_ref, |
2115 | | U08 u1_is_wt_pred_on) |
2116 | 4.17M | { |
2117 | 4.17M | S32 ref, i, j; |
2118 | 4.17M | U08 *pu1_src, *pu1_dst, *pu1_src_tmp; |
2119 | 4.17M | S32 log_wdc = ps_wt_inp_prms->wpred_log_wdc; |
2120 | 4.17M | S32 x_count, y_count; |
2121 | | |
2122 | | /* Fixed source */ |
2123 | 4.17M | pu1_src = ps_curr_layer->pu1_inp; |
2124 | | |
2125 | | /* Make sure the start positions of block are inside frame limits */ |
2126 | 4.17M | pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1); |
2127 | 4.17M | pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1); |
2128 | | |
2129 | 4.17M | pu1_src += (pos_x + (pos_y * ps_curr_layer->i4_inp_stride)); |
2130 | | |
2131 | | /* In case we handle imcomplete CTBs, we copy only as much as reqd */ |
2132 | | /* from input buffers to prevent out of bound accesses. In this */ |
2133 | | /* case, we do padding in x or y or both dirns */ |
2134 | 4.17M | x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x)); |
2135 | 4.17M | y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y)); |
2136 | | |
2137 | 16.9M | for(i = 0; i < num_ref + 1; i++) |
2138 | 12.7M | { |
2139 | 12.7M | ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref]; |
2140 | 12.7M | } |
2141 | | |
2142 | | /* Run thro all ref ids */ |
2143 | 16.9M | for(ref = 0; ref < num_ref + 1; ref++) |
2144 | 12.7M | { |
2145 | 12.7M | S32 wt, off; |
2146 | 12.7M | S32 inv_wt; |
2147 | | |
2148 | 12.7M | pu1_src_tmp = pu1_src; |
2149 | | |
2150 | | /* Each ref id may have differnet wt/offset. */ |
2151 | | /* So we have unique inp buf for each ref id */ |
2152 | 12.7M | pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref]; |
2153 | | |
2154 | 12.7M | if(ref == num_ref) |
2155 | 4.17M | { |
2156 | | /* last ref will be non weighted input */ |
2157 | 41.4M | for(i = 0; i < y_count; i++) |
2158 | 37.2M | { |
2159 | 646M | for(j = 0; j < x_count; j++) |
2160 | 609M | { |
2161 | 609M | pu1_dst[j] = pu1_src_tmp[j]; |
2162 | 609M | } |
2163 | 37.2M | pu1_src_tmp += ps_curr_layer->i4_inp_stride; |
2164 | 37.2M | pu1_dst += dst_stride; |
2165 | 37.2M | } |
2166 | 4.17M | } |
2167 | 8.59M | else |
2168 | 8.59M | { |
2169 | | /* Wt and off specific to this ref id */ |
2170 | 8.59M | wt = ps_wt_inp_prms->a_wpred_wt[ref]; |
2171 | 8.59M | inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ref]; |
2172 | 8.59M | off = ps_wt_inp_prms->a_wpred_off[ref]; |
2173 | | |
2174 | | /* Generate size*size worth of modified input samples */ |
2175 | 85.1M | for(i = 0; i < y_count; i++) |
2176 | 76.5M | { |
2177 | 1.32G | for(j = 0; j < x_count; j++) |
2178 | 1.25G | { |
2179 | 1.25G | S32 tmp; |
2180 | | |
2181 | | /* Since we scale input, we use inverse transform of wt pred */ |
2182 | | //tmp = HME_INV_WT_PRED(pu1_src_tmp[j], wt, off, log_wdc); |
2183 | 1.25G | tmp = HME_INV_WT_PRED1(pu1_src_tmp[j], inv_wt, off, log_wdc); |
2184 | 1.25G | pu1_dst[j] = (U08)(HME_CLIP(tmp, 0, 255)); |
2185 | 1.25G | } |
2186 | 76.5M | pu1_src_tmp += ps_curr_layer->i4_inp_stride; |
2187 | 76.5M | pu1_dst += dst_stride; |
2188 | 76.5M | } |
2189 | 8.59M | } |
2190 | | |
2191 | | /* Check and do padding in right direction if need be */ |
2192 | 12.7M | pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref]; |
2193 | 12.7M | if(x_count != size) |
2194 | 2.84M | { |
2195 | 2.84M | hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count); |
2196 | 2.84M | } |
2197 | | |
2198 | | /* Check and do padding in bottom directino if need be */ |
2199 | 12.7M | if(y_count != size) |
2200 | 1.41M | { |
2201 | 1.41M | hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size); |
2202 | 1.41M | } |
2203 | 12.7M | } |
2204 | 4.17M | } |
2205 | | /** |
2206 | | **************************************************************************************** |
2207 | | * @fn hme_pick_best_pu_cand(pu_result_t *ps_pu_results_dst, |
2208 | | * pu_result_t *ps_pu_results_inp, |
2209 | | * UWORD8 u1_num_results_per_part, |
2210 | | * UWORD8 u1_num_best_cand) |
2211 | | * |
2212 | | * @brief Does the candidate evaluation across all the current candidates and returns |
2213 | | * the best two or one candidates across given lists |
2214 | | * |
2215 | | * @param[in] - ps_pu_results_inp : Pointer to the input candidates |
2216 | | * - u1_num_results_per_part: Number of available candidates |
2217 | | * |
2218 | | * @param[out] - ps_pu_results_dst : Pointer to best PU results |
2219 | | * |
2220 | | **************************************************************************************** |
2221 | | */ |
2222 | | void hme_pick_best_pu_cand( |
2223 | | pu_result_t *ps_pu_results_dst, |
2224 | | pu_result_t *ps_pu_results_list0, |
2225 | | pu_result_t *ps_pu_results_list1, |
2226 | | UWORD8 u1_num_results_per_part_l0, |
2227 | | UWORD8 u1_num_results_per_part_l1, |
2228 | | UWORD8 u1_candidate_rank) |
2229 | 14.5M | { |
2230 | 14.5M | struct cand_pos_data |
2231 | 14.5M | { |
2232 | 14.5M | U08 u1_cand_list_id; |
2233 | | |
2234 | 14.5M | U08 u1_cand_id_in_cand_list; |
2235 | 14.5M | } as_cand_pos_data[MAX_NUM_RESULTS_PER_PART_LIST << 1]; |
2236 | | |
2237 | 14.5M | S32 ai4_costs[MAX_NUM_RESULTS_PER_PART_LIST << 1]; |
2238 | 14.5M | U08 i, j; |
2239 | | |
2240 | 28.9M | for(i = 0; i < u1_num_results_per_part_l0; i++) |
2241 | 14.4M | { |
2242 | 14.4M | ai4_costs[i] = ps_pu_results_list0[i].i4_tot_cost; |
2243 | 14.4M | as_cand_pos_data[i].u1_cand_id_in_cand_list = i; |
2244 | 14.4M | as_cand_pos_data[i].u1_cand_list_id = 0; |
2245 | 14.4M | } |
2246 | | |
2247 | 18.4M | for(i = 0, j = u1_num_results_per_part_l0; i < u1_num_results_per_part_l1; i++, j++) |
2248 | 3.94M | { |
2249 | 3.94M | ai4_costs[j] = ps_pu_results_list1[i].i4_tot_cost; |
2250 | 3.94M | as_cand_pos_data[j].u1_cand_id_in_cand_list = i; |
2251 | 3.94M | as_cand_pos_data[j].u1_cand_list_id = 1; |
2252 | 3.94M | } |
2253 | | |
2254 | 14.5M | SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY( |
2255 | 14.5M | ai4_costs, |
2256 | 14.5M | as_cand_pos_data, |
2257 | 14.5M | u1_num_results_per_part_l0 + u1_num_results_per_part_l1, |
2258 | 14.5M | struct cand_pos_data); |
2259 | | |
2260 | 14.5M | if(as_cand_pos_data[u1_candidate_rank].u1_cand_list_id) |
2261 | 933k | { |
2262 | 933k | ps_pu_results_dst[0] = |
2263 | 933k | ps_pu_results_list1[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list]; |
2264 | 933k | } |
2265 | 13.5M | else |
2266 | 13.5M | { |
2267 | 13.5M | ps_pu_results_dst[0] = |
2268 | 13.5M | ps_pu_results_list0[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list]; |
2269 | 13.5M | } |
2270 | 14.5M | } |
2271 | | |
2272 | | /* Returns the number of candidates */ |
2273 | | static S32 hme_tu_recur_cand_harvester( |
2274 | | part_type_results_t *ps_cand_container, |
2275 | | inter_pu_results_t *ps_pu_data, |
2276 | | inter_ctb_prms_t *ps_inter_ctb_prms, |
2277 | | S32 i4_part_mask) |
2278 | 4.30M | { |
2279 | 4.30M | part_type_results_t s_cand_data; |
2280 | | |
2281 | 4.30M | U08 i, j; |
2282 | 4.30M | PART_ID_T e_part_id; |
2283 | | |
2284 | 4.30M | S32 i4_num_cands = 0; |
2285 | | |
2286 | | /* 2Nx2N part_type decision part */ |
2287 | 4.30M | if(i4_part_mask & ENABLE_2Nx2N) |
2288 | 4.26M | { |
2289 | 4.26M | U08 u1_num_candt_to_pick; |
2290 | | |
2291 | 4.26M | e_part_id = ge_part_type_to_part_id[PRT_2Nx2N][0]; |
2292 | | |
2293 | 4.26M | ASSERT(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands >= 1); |
2294 | | |
2295 | 4.26M | if(!ps_inter_ctb_prms->i4_bidir_enabled || (i4_part_mask == ENABLE_2Nx2N)) |
2296 | 4.04M | { |
2297 | 4.04M | u1_num_candt_to_pick = |
2298 | 4.04M | MIN(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands, |
2299 | 4.04M | ps_pu_data->u1_num_results_per_part_l0[e_part_id] + |
2300 | 4.04M | ps_pu_data->u1_num_results_per_part_l1[e_part_id]); |
2301 | 4.04M | } |
2302 | 229k | else |
2303 | 229k | { |
2304 | 229k | u1_num_candt_to_pick = |
2305 | 229k | MIN(1, |
2306 | 229k | ps_pu_data->u1_num_results_per_part_l0[e_part_id] + |
2307 | 229k | ps_pu_data->u1_num_results_per_part_l1[e_part_id]); |
2308 | 229k | } |
2309 | | |
2310 | 4.26M | if(ME_XTREME_SPEED_25 == ps_inter_ctb_prms->i1_quality_preset) |
2311 | 614k | { |
2312 | 614k | u1_num_candt_to_pick = MIN(u1_num_candt_to_pick, MAX_NUM_TU_RECUR_CANDS_IN_XS25); |
2313 | 614k | } |
2314 | | |
2315 | 9.15M | for(i = 0; i < u1_num_candt_to_pick; i++) |
2316 | 4.88M | { |
2317 | | /* Picks the best two candidates of all the available ones */ |
2318 | 4.88M | hme_pick_best_pu_cand( |
2319 | 4.88M | ps_cand_container[i4_num_cands].as_pu_results, |
2320 | 4.88M | ps_pu_data->aps_pu_results[0][e_part_id], |
2321 | 4.88M | ps_pu_data->aps_pu_results[1][e_part_id], |
2322 | 4.88M | ps_pu_data->u1_num_results_per_part_l0[e_part_id], |
2323 | 4.88M | ps_pu_data->u1_num_results_per_part_l1[e_part_id], |
2324 | 4.88M | i); |
2325 | | |
2326 | | /* Update the other params part_type and total_cost in part_type_results */ |
2327 | 4.88M | ps_cand_container[i4_num_cands].u1_part_type = e_part_id; |
2328 | 4.88M | ps_cand_container[i4_num_cands].i4_tot_cost = |
2329 | 4.88M | ps_cand_container[i4_num_cands].as_pu_results->i4_tot_cost; |
2330 | | |
2331 | 4.88M | i4_num_cands++; |
2332 | 4.88M | } |
2333 | 4.26M | } |
2334 | | |
2335 | | /* SMP */ |
2336 | 4.30M | { |
2337 | 4.30M | S32 i4_total_cost; |
2338 | | |
2339 | 4.30M | S32 num_part_types = PRT_Nx2N - PRT_2NxN + 1; |
2340 | 4.30M | S32 start_part_type = PRT_2NxN; |
2341 | 4.30M | S32 best_cost = MAX_32BIT_VAL; |
2342 | 4.30M | S32 part_type_cnt = 0; |
2343 | | |
2344 | 12.9M | for(j = 0; j < num_part_types; j++) |
2345 | 8.60M | { |
2346 | 8.60M | if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type])) |
2347 | 6.94M | { |
2348 | 6.94M | continue; |
2349 | 6.94M | } |
2350 | | |
2351 | 4.97M | for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++) |
2352 | 3.31M | { |
2353 | 3.31M | e_part_id = ge_part_type_to_part_id[j + start_part_type][i]; |
2354 | | |
2355 | | /* Pick the best candidate for the partition acroos lists */ |
2356 | 3.31M | hme_pick_best_pu_cand( |
2357 | 3.31M | &s_cand_data.as_pu_results[i], |
2358 | 3.31M | ps_pu_data->aps_pu_results[0][e_part_id], |
2359 | 3.31M | ps_pu_data->aps_pu_results[1][e_part_id], |
2360 | 3.31M | ps_pu_data->u1_num_results_per_part_l0[e_part_id], |
2361 | 3.31M | ps_pu_data->u1_num_results_per_part_l1[e_part_id], |
2362 | 3.31M | 0); |
2363 | 3.31M | } |
2364 | | |
2365 | 1.65M | i4_total_cost = |
2366 | 1.65M | s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost; |
2367 | | |
2368 | 1.65M | if(i4_total_cost < best_cost) |
2369 | 922k | { |
2370 | | /* Stores the index of the best part_type in the sub-catoegory */ |
2371 | 922k | best_cost = i4_total_cost; |
2372 | | |
2373 | 922k | ps_cand_container[i4_num_cands] = s_cand_data; |
2374 | | |
2375 | 922k | ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type; |
2376 | 922k | ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost; |
2377 | 922k | } |
2378 | | |
2379 | 1.65M | part_type_cnt++; |
2380 | 1.65M | } |
2381 | | |
2382 | 4.30M | i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands; |
2383 | 4.30M | } |
2384 | | |
2385 | | /* AMP */ |
2386 | 4.30M | { |
2387 | 4.30M | S32 i4_total_cost; |
2388 | | |
2389 | 4.30M | S32 num_part_types = PRT_nRx2N - PRT_2NxnU + 1; |
2390 | 4.30M | S32 start_part_type = PRT_2NxnU; |
2391 | 4.30M | S32 best_cost = MAX_32BIT_VAL; |
2392 | 4.30M | S32 part_type_cnt = 0; |
2393 | | |
2394 | 21.5M | for(j = 0; j < num_part_types; j++) |
2395 | 17.2M | { |
2396 | 17.2M | if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type])) |
2397 | 14.0M | { |
2398 | 14.0M | continue; |
2399 | 14.0M | } |
2400 | | |
2401 | 9.44M | for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++) |
2402 | 6.29M | { |
2403 | 6.29M | e_part_id = ge_part_type_to_part_id[j + start_part_type][i]; |
2404 | | |
2405 | | /* Pick the best candidate for the partition acroos lists */ |
2406 | 6.29M | hme_pick_best_pu_cand( |
2407 | 6.29M | &s_cand_data.as_pu_results[i], |
2408 | 6.29M | ps_pu_data->aps_pu_results[0][e_part_id], |
2409 | 6.29M | ps_pu_data->aps_pu_results[1][e_part_id], |
2410 | 6.29M | ps_pu_data->u1_num_results_per_part_l0[e_part_id], |
2411 | 6.29M | ps_pu_data->u1_num_results_per_part_l1[e_part_id], |
2412 | 6.29M | 0); |
2413 | 6.29M | } |
2414 | | |
2415 | 3.14M | i4_total_cost = |
2416 | 3.14M | s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost; |
2417 | | |
2418 | 3.14M | if(i4_total_cost < best_cost) |
2419 | 932k | { |
2420 | | /* Stores the index of the best part_type in the sub-catoegory */ |
2421 | 932k | best_cost = i4_total_cost; |
2422 | | |
2423 | 932k | ps_cand_container[i4_num_cands] = s_cand_data; |
2424 | | |
2425 | 932k | ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type; |
2426 | 932k | ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost; |
2427 | 932k | } |
2428 | | |
2429 | 3.14M | part_type_cnt++; |
2430 | 3.14M | } |
2431 | | |
2432 | 4.30M | i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands; |
2433 | 4.30M | } |
2434 | | |
2435 | 4.30M | return i4_num_cands; |
2436 | 4.30M | } |
2437 | | |
2438 | | /** |
2439 | | ***************************************************************************** |
2440 | | * @fn hme_decide_part_types(search_results_t *ps_search_results) |
2441 | | * |
2442 | | * @brief Does uni/bi evaluation accross various partition types, |
2443 | | * decides best inter partition types for the CU, compares |
2444 | | * intra cost and decides the best K results for the CU |
2445 | | * |
2446 | | * This is called post subpel refinmenent for 16x16s, 8x8s and |
2447 | | * for post merge evaluation for 32x32,64x64 CUs |
2448 | | * |
2449 | | * @param[in,out] ps_search_results : Search results data structure |
2450 | | * - In : 2 lists of upto 2mvs & refids, active partition mask |
2451 | | * - Out: Best results for final rdo evaluation of the cu |
2452 | | * |
2453 | | * @param[in] ps_subpel_prms : Sub pel params data structure |
2454 | | * |
2455 | | * |
2456 | | * @par Description |
2457 | | * -------------------------------------------------------------------------------- |
2458 | | * Flow: |
2459 | | * for each category (SMP,AMP,2Nx2N based on part mask) |
2460 | | * { |
2461 | | * for each part_type |
2462 | | * { |
2463 | | * for each part |
2464 | | * pick best candidate from each list |
2465 | | * combine uni part type |
2466 | | * update best results for part type |
2467 | | * } |
2468 | | * pick the best part type for given category (for SMP & AMP) |
2469 | | * } |
2470 | | * || |
2471 | | * || |
2472 | | * \/ |
2473 | | * Bi-Pred evaluation: |
2474 | | * for upto 4 best part types |
2475 | | * { |
2476 | | * for each part |
2477 | | * { |
2478 | | * compute fixed size had for all uni and remember coeffs |
2479 | | * compute bisatd |
2480 | | * uni vs bi and gives upto two results |
2481 | | * also gives the pt level pred buffer |
2482 | | * } |
2483 | | * } |
2484 | | * || |
2485 | | * || |
2486 | | * \/ |
2487 | | * select X candidates for tu recursion as per the Note below |
2488 | | * tu_rec_on_part_type (reuse transform coeffs) |
2489 | | * || |
2490 | | * || |
2491 | | * \/ |
2492 | | * insert intra nodes at appropriate result id |
2493 | | * || |
2494 | | * || |
2495 | | * \/ |
2496 | | * populate y best resuls for rdo based on preset |
2497 | | * |
2498 | | * Note : |
2499 | | * number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq |
2500 | | * number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq |
2501 | | * -------------------------------------------------------------------------------- |
2502 | | * |
2503 | | * @return None |
2504 | | ******************************************************************************** |
2505 | | */ |
2506 | | void hme_decide_part_types( |
2507 | | inter_cu_results_t *ps_cu_results, |
2508 | | inter_pu_results_t *ps_pu_results, |
2509 | | inter_ctb_prms_t *ps_inter_ctb_prms, |
2510 | | me_frm_ctxt_t *ps_ctxt, |
2511 | | ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list, |
2512 | | ihevce_me_optimised_function_list_t *ps_me_optimised_function_list |
2513 | | |
2514 | | ) |
2515 | 4.30M | { |
2516 | 4.30M | S32 i, j; |
2517 | 4.30M | S32 i4_part_mask; |
2518 | 4.30M | ULWORD64 au8_pred_sigmaXSquare[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS]; |
2519 | 4.30M | ULWORD64 au8_pred_sigmaX[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS]; |
2520 | 4.30M | S32 i4_noise_term; |
2521 | 4.30M | WORD32 e_part_id; |
2522 | | |
2523 | 4.30M | PF_SAD_FXN_TU_REC apf_err_compute[4]; |
2524 | | |
2525 | 4.30M | part_type_results_t as_part_type_results[NUM_BEST_ME_OUTPUTS]; |
2526 | 4.30M | part_type_results_t *ps_part_type_results; |
2527 | | |
2528 | 4.30M | S32 num_best_cand = 0; |
2529 | 4.30M | const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT; |
2530 | | |
2531 | 4.30M | i4_part_mask = ps_cu_results->i4_part_mask; |
2532 | | |
2533 | 4.30M | num_best_cand = hme_tu_recur_cand_harvester( |
2534 | 4.30M | as_part_type_results, ps_pu_results, ps_inter_ctb_prms, i4_part_mask); |
2535 | | |
2536 | | /* Partition ID for the current PU */ |
2537 | 4.30M | e_part_id = (UWORD8)ge_part_type_to_part_id[PRT_2Nx2N][0]; |
2538 | | |
2539 | 4.30M | ps_part_type_results = as_part_type_results; |
2540 | 10.8M | for(i = 0; i < num_best_cand; i++) |
2541 | 6.52M | { |
2542 | 6.52M | hme_compute_pred_and_evaluate_bi( |
2543 | 6.52M | ps_cu_results, |
2544 | 6.52M | ps_pu_results, |
2545 | 6.52M | ps_inter_ctb_prms, |
2546 | 6.52M | &(ps_part_type_results[i]), |
2547 | 6.52M | au8_pred_sigmaXSquare[i], |
2548 | 6.52M | au8_pred_sigmaX[i], |
2549 | 6.52M | ps_cmn_utils_optimised_function_list, |
2550 | 6.52M | ps_me_optimised_function_list |
2551 | | |
2552 | 6.52M | ); |
2553 | 6.52M | } |
2554 | | /* Perform TU_REC on the best candidates selected */ |
2555 | 4.30M | { |
2556 | 4.30M | WORD32 i4_sad_grid; |
2557 | 4.30M | WORD32 ai4_tu_split_flag[4]; |
2558 | 4.30M | WORD32 ai4_tu_early_cbf[4]; |
2559 | | |
2560 | 4.30M | WORD32 best_cost[NUM_BEST_ME_OUTPUTS]; |
2561 | 4.30M | WORD32 ai4_final_idx[NUM_BEST_ME_OUTPUTS]; |
2562 | 4.30M | WORD16 i2_wght; |
2563 | 4.30M | WORD32 i4_satd; |
2564 | | |
2565 | 4.30M | err_prms_t s_err_prms; |
2566 | 4.30M | err_prms_t *ps_err_prms = &s_err_prms; |
2567 | | |
2568 | | /* Default cost and final idx initialization */ |
2569 | 10.8M | for(i = 0; i < num_best_cand; i++) |
2570 | 6.52M | { |
2571 | 6.52M | best_cost[i] = MAX_32BIT_VAL; |
2572 | 6.52M | ai4_final_idx[i] = -1; |
2573 | 6.52M | } |
2574 | | |
2575 | | /* Assign the stad function to the err_compute function pointer : |
2576 | | Implemented only for 32x32 and 64x64, hence 16x16 and 8x8 are kept NULL */ |
2577 | 4.30M | apf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec; |
2578 | 4.30M | apf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec; |
2579 | 4.30M | apf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec; |
2580 | 4.30M | apf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec; |
2581 | | |
2582 | 4.30M | ps_err_prms->pi4_sad_grid = &i4_sad_grid; |
2583 | 4.30M | ps_err_prms->pi4_tu_split_flags = ai4_tu_split_flag; |
2584 | 4.30M | ps_err_prms->u1_max_tr_depth = ps_inter_ctb_prms->u1_max_tr_depth; |
2585 | 4.30M | ps_err_prms->pi4_tu_early_cbf = ai4_tu_early_cbf; |
2586 | 4.30M | ps_err_prms->i4_grid_mask = 1; |
2587 | 4.30M | ps_err_prms->pu1_wkg_mem = ps_inter_ctb_prms->pu1_wkg_mem; |
2588 | 4.30M | ps_err_prms->u1_max_tr_size = 32; |
2589 | | |
2590 | 4.30M | if(ps_inter_ctb_prms->u1_is_cu_noisy) |
2591 | 0 | { |
2592 | 0 | ps_err_prms->u1_max_tr_size = MAX_TU_SIZE_WHEN_NOISY; |
2593 | 0 | } |
2594 | | |
2595 | | /* TU_REC for the best candidates, as mentioned in NOTE above (except candidates that |
2596 | | are disabled by Part_mask */ |
2597 | 10.8M | for(i = 0; i < num_best_cand; i++) |
2598 | 6.52M | { |
2599 | 6.52M | part_type_results_t *ps_best_results; |
2600 | 6.52M | pu_result_t *ps_pu_result; |
2601 | 6.52M | WORD32 part_type_cost; |
2602 | 6.52M | WORD32 cand_idx; |
2603 | | |
2604 | 6.52M | WORD32 pred_dir; |
2605 | 6.52M | S32 i4_inp_off; |
2606 | | |
2607 | 6.52M | S32 lambda; |
2608 | 6.52M | U08 lambda_qshift; |
2609 | 6.52M | U08 *apu1_inp[MAX_NUM_INTER_PARTS]; |
2610 | 6.52M | S16 ai2_wt[MAX_NUM_INTER_PARTS]; |
2611 | 6.52M | S32 ai4_inv_wt[MAX_NUM_INTER_PARTS]; |
2612 | 6.52M | S32 ai4_inv_wt_shift_val[MAX_NUM_INTER_PARTS]; |
2613 | | |
2614 | 6.52M | WORD32 part_type = ps_part_type_results[i].u1_part_type; |
2615 | 6.52M | WORD32 e_cu_size = ps_cu_results->u1_cu_size; |
2616 | 6.52M | WORD32 e_blk_size = ge_cu_size_to_blk_size[e_cu_size]; |
2617 | 6.52M | U08 u1_num_parts = gau1_num_parts_in_part_type[part_type]; |
2618 | 6.52M | U08 u1_inp_buf_idx = UCHAR_MAX; |
2619 | | |
2620 | 6.52M | ps_err_prms->i4_part_mask = i4_part_mask; |
2621 | 6.52M | ps_err_prms->i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; |
2622 | 6.52M | ps_err_prms->i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; |
2623 | 6.52M | ps_err_prms->pu1_ref = ps_part_type_results[i].pu1_pred; |
2624 | 6.52M | ps_err_prms->i4_ref_stride = ps_part_type_results[i].i4_pred_stride; |
2625 | | |
2626 | | /* Current offset for the present part type */ |
2627 | 6.52M | i4_inp_off = ps_cu_results->i4_inp_offset; |
2628 | | |
2629 | 6.52M | ps_best_results = &(ps_part_type_results[i]); |
2630 | | |
2631 | 6.52M | part_type_cost = 0; |
2632 | 6.52M | lambda = ps_inter_ctb_prms->i4_lamda; |
2633 | 6.52M | lambda_qshift = ps_inter_ctb_prms->u1_lamda_qshift; |
2634 | | |
2635 | 14.6M | for(j = 0; j < u1_num_parts; j++) |
2636 | 8.17M | { |
2637 | 8.17M | ps_pu_result = &(ps_best_results->as_pu_results[j]); |
2638 | | |
2639 | 8.17M | pred_dir = ps_pu_result->pu.b2_pred_mode; |
2640 | | |
2641 | 8.17M | if(PRED_L0 == pred_dir) |
2642 | 7.36M | { |
2643 | 7.36M | apu1_inp[j] = |
2644 | 7.36M | ps_inter_ctb_prms->apu1_wt_inp[PRED_L0][ps_pu_result->pu.mv.i1_l0_ref_idx] + |
2645 | 7.36M | i4_inp_off; |
2646 | 7.36M | ai2_wt[j] = |
2647 | 7.36M | ps_inter_ctb_prms->pps_rec_list_l0[ps_pu_result->pu.mv.i1_l0_ref_idx] |
2648 | 7.36M | ->s_weight_offset.i2_luma_weight; |
2649 | 7.36M | ai4_inv_wt[j] = |
2650 | 7.36M | ps_inter_ctb_prms->pi4_inv_wt |
2651 | 7.36M | [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]]; |
2652 | 7.36M | ai4_inv_wt_shift_val[j] = |
2653 | 7.36M | ps_inter_ctb_prms->pi4_inv_wt_shift_val |
2654 | 7.36M | [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]]; |
2655 | 7.36M | } |
2656 | 810k | else if(PRED_L1 == pred_dir) |
2657 | 744k | { |
2658 | 744k | apu1_inp[j] = |
2659 | 744k | ps_inter_ctb_prms->apu1_wt_inp[PRED_L1][ps_pu_result->pu.mv.i1_l1_ref_idx] + |
2660 | 744k | i4_inp_off; |
2661 | 744k | ai2_wt[j] = |
2662 | 744k | ps_inter_ctb_prms->pps_rec_list_l1[ps_pu_result->pu.mv.i1_l1_ref_idx] |
2663 | 744k | ->s_weight_offset.i2_luma_weight; |
2664 | 744k | ai4_inv_wt[j] = |
2665 | 744k | ps_inter_ctb_prms->pi4_inv_wt |
2666 | 744k | [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]]; |
2667 | 744k | ai4_inv_wt_shift_val[j] = |
2668 | 744k | ps_inter_ctb_prms->pi4_inv_wt_shift_val |
2669 | 744k | [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]]; |
2670 | 744k | } |
2671 | 66.1k | else if(PRED_BI == pred_dir) |
2672 | 66.1k | { |
2673 | 66.1k | apu1_inp[j] = ps_inter_ctb_prms->pu1_non_wt_inp + i4_inp_off; |
2674 | 66.1k | ai2_wt[j] = 1 << ps_inter_ctb_prms->wpred_log_wdc; |
2675 | 66.1k | ai4_inv_wt[j] = i4_default_src_wt; |
2676 | 66.1k | ai4_inv_wt_shift_val[j] = 0; |
2677 | 66.1k | } |
2678 | 0 | else |
2679 | 0 | { |
2680 | 0 | ASSERT(0); |
2681 | 0 | } |
2682 | | |
2683 | 8.17M | part_type_cost += ps_pu_result->i4_mv_cost; |
2684 | 8.17M | } |
2685 | | |
2686 | 6.52M | if((u1_num_parts == 1) || (ai2_wt[0] == ai2_wt[1])) |
2687 | 6.52M | { |
2688 | 6.52M | ps_err_prms->pu1_inp = apu1_inp[0]; |
2689 | 6.52M | ps_err_prms->i4_inp_stride = ps_inter_ctb_prms->i4_inp_stride; |
2690 | 6.52M | i2_wght = ai2_wt[0]; |
2691 | 6.52M | } |
2692 | 0 | else |
2693 | 0 | { |
2694 | 0 | if(1 != ihevce_get_free_pred_buf_indices( |
2695 | 0 | &u1_inp_buf_idx, |
2696 | 0 | &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, |
2697 | 0 | 1)) |
2698 | 0 | { |
2699 | 0 | ASSERT(0); |
2700 | 0 | } |
2701 | 0 | else |
2702 | 0 | { |
2703 | 0 | U08 *pu1_dst = |
2704 | 0 | ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx]; |
2705 | 0 | U08 *pu1_src = apu1_inp[0]; |
2706 | 0 | U08 u1_pu1_wd = (ps_part_type_results[i].as_pu_results[0].pu.b4_wd + 1) << 2; |
2707 | 0 | U08 u1_pu1_ht = (ps_part_type_results[i].as_pu_results[0].pu.b4_ht + 1) << 2; |
2708 | 0 | U08 u1_pu2_wd = (ps_part_type_results[i].as_pu_results[1].pu.b4_wd + 1) << 2; |
2709 | 0 | U08 u1_pu2_ht = (ps_part_type_results[i].as_pu_results[1].pu.b4_ht + 1) << 2; |
2710 | |
|
2711 | 0 | ps_cmn_utils_optimised_function_list->pf_copy_2d( |
2712 | 0 | pu1_dst, |
2713 | 0 | MAX_CU_SIZE, |
2714 | 0 | pu1_src, |
2715 | 0 | ps_inter_ctb_prms->i4_inp_stride, |
2716 | 0 | u1_pu1_wd, |
2717 | 0 | u1_pu1_ht); |
2718 | |
|
2719 | 0 | pu1_dst += |
2720 | 0 | (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]] |
2721 | 0 | ? u1_pu1_ht * MAX_CU_SIZE |
2722 | 0 | : u1_pu1_wd); |
2723 | 0 | pu1_src = |
2724 | 0 | apu1_inp[1] + (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]] |
2725 | 0 | ? u1_pu1_ht * ps_inter_ctb_prms->i4_inp_stride |
2726 | 0 | : u1_pu1_wd); |
2727 | |
|
2728 | 0 | ps_cmn_utils_optimised_function_list->pf_copy_2d( |
2729 | 0 | pu1_dst, |
2730 | 0 | MAX_CU_SIZE, |
2731 | 0 | pu1_src, |
2732 | 0 | ps_inter_ctb_prms->i4_inp_stride, |
2733 | 0 | u1_pu2_wd, |
2734 | 0 | u1_pu2_ht); |
2735 | |
|
2736 | 0 | ps_err_prms->pu1_inp = |
2737 | 0 | ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx]; |
2738 | 0 | ps_err_prms->i4_inp_stride = MAX_CU_SIZE; |
2739 | 0 | i2_wght = ai2_wt[1]; |
2740 | 0 | } |
2741 | 0 | } |
2742 | | |
2743 | 6.52M | #if !DISABLE_TU_RECURSION |
2744 | 6.52M | i4_satd = apf_err_compute[e_cu_size]( |
2745 | 6.52M | ps_err_prms, |
2746 | 6.52M | lambda, |
2747 | 6.52M | lambda_qshift, |
2748 | 6.52M | ps_inter_ctb_prms->i4_qstep_ls8, |
2749 | 6.52M | ps_ctxt->ps_func_selector); |
2750 | | #else |
2751 | | ps_err_prms->pi4_sad_grid = &i4_satd; |
2752 | | |
2753 | | pf_err_compute(ps_err_prms); |
2754 | | |
2755 | | if((part_type == PRT_2Nx2N) || (e_cu_size != CU_64x64)) |
2756 | | { |
2757 | | ai4_tu_split_flag[0] = 1; |
2758 | | ai4_tu_split_flag[1] = 1; |
2759 | | ai4_tu_split_flag[2] = 1; |
2760 | | ai4_tu_split_flag[3] = 1; |
2761 | | |
2762 | | ps_err_prms->i4_tu_split_cost = 0; |
2763 | | } |
2764 | | else |
2765 | | { |
2766 | | ai4_tu_split_flag[0] = 1; |
2767 | | ai4_tu_split_flag[1] = 1; |
2768 | | ai4_tu_split_flag[2] = 1; |
2769 | | ai4_tu_split_flag[3] = 1; |
2770 | | |
2771 | | ps_err_prms->i4_tu_split_cost = 0; |
2772 | | } |
2773 | | #endif |
2774 | | |
2775 | 6.52M | #if UNI_SATD_SCALE |
2776 | 6.52M | i4_satd = (i4_satd * i2_wght) >> ps_inter_ctb_prms->wpred_log_wdc; |
2777 | 6.52M | #endif |
2778 | | |
2779 | 6.52M | if(ps_inter_ctb_prms->u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier) |
2780 | 0 | { |
2781 | 0 | ULWORD64 u8_temp_var, u8_temp_var1, u8_pred_sigmaSquaredX; |
2782 | 0 | ULWORD64 u8_src_variance, u8_pred_variance; |
2783 | 0 | unsigned long u4_shift_val; |
2784 | 0 | S32 i4_bits_req; |
2785 | 0 | S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT; |
2786 | |
|
2787 | 0 | if(1 == u1_num_parts) |
2788 | 0 | { |
2789 | 0 | u8_pred_sigmaSquaredX = au8_pred_sigmaX[i][0] * au8_pred_sigmaX[i][0]; |
2790 | 0 | u8_pred_variance = au8_pred_sigmaXSquare[i][0] - u8_pred_sigmaSquaredX; |
2791 | |
|
2792 | 0 | if(e_cu_size == CU_8x8) |
2793 | 0 | { |
2794 | 0 | PART_ID_T e_part_id = (PART_ID_T)( |
2795 | 0 | (PART_ID_NxN_TL) + (ps_cu_results->u1_x_off & 1) + |
2796 | 0 | ((ps_cu_results->u1_y_off & 1) << 1)); |
2797 | |
|
2798 | 0 | u4_shift_val = ihevce_calc_stim_injected_variance( |
2799 | 0 | ps_inter_ctb_prms->pu8_part_src_sigmaX, |
2800 | 0 | ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, |
2801 | 0 | &u8_src_variance, |
2802 | 0 | ai4_inv_wt[0], |
2803 | 0 | ai4_inv_wt_shift_val[0], |
2804 | 0 | ps_inter_ctb_prms->wpred_log_wdc, |
2805 | 0 | e_part_id); |
2806 | 0 | } |
2807 | 0 | else |
2808 | 0 | { |
2809 | 0 | u4_shift_val = ihevce_calc_stim_injected_variance( |
2810 | 0 | ps_inter_ctb_prms->pu8_part_src_sigmaX, |
2811 | 0 | ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, |
2812 | 0 | &u8_src_variance, |
2813 | 0 | ai4_inv_wt[0], |
2814 | 0 | ai4_inv_wt_shift_val[0], |
2815 | 0 | ps_inter_ctb_prms->wpred_log_wdc, |
2816 | 0 | e_part_id); |
2817 | 0 | } |
2818 | |
|
2819 | 0 | u8_pred_variance = u8_pred_variance >> u4_shift_val; |
2820 | |
|
2821 | 0 | GETRANGE64(i4_bits_req, u8_pred_variance); |
2822 | |
|
2823 | 0 | if(i4_bits_req > 27) |
2824 | 0 | { |
2825 | 0 | u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27); |
2826 | 0 | u8_src_variance = u8_src_variance >> (i4_bits_req - 27); |
2827 | 0 | } |
2828 | |
|
2829 | 0 | if(u8_src_variance == u8_pred_variance) |
2830 | 0 | { |
2831 | 0 | u8_temp_var = (1 << STIM_Q_FORMAT); |
2832 | 0 | } |
2833 | 0 | else |
2834 | 0 | { |
2835 | 0 | u8_temp_var = (2 * u8_src_variance * u8_pred_variance); |
2836 | 0 | u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT)); |
2837 | 0 | u8_temp_var1 = (u8_src_variance * u8_src_variance) + |
2838 | 0 | (u8_pred_variance * u8_pred_variance); |
2839 | 0 | u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2)); |
2840 | 0 | u8_temp_var = (u8_temp_var / u8_temp_var1); |
2841 | 0 | } |
2842 | |
|
2843 | 0 | i4_noise_term = (UWORD32)u8_temp_var; |
2844 | |
|
2845 | 0 | ASSERT(i4_noise_term >= 0); |
2846 | | |
2847 | 0 | i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier; |
2848 | |
|
2849 | 0 | u8_temp_var = i4_satd; |
2850 | 0 | u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term)); |
2851 | 0 | u8_temp_var += (1 << ((i4_q_level)-1)); |
2852 | 0 | i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level)); |
2853 | 0 | } |
2854 | 0 | else /*if(e_cu_size <= CU_16x16)*/ |
2855 | 0 | { |
2856 | 0 | unsigned long temp_shift_val; |
2857 | 0 | PART_ID_T ae_part_id[MAX_NUM_INTER_PARTS] = { |
2858 | 0 | ge_part_type_to_part_id[part_type][0], ge_part_type_to_part_id[part_type][1] |
2859 | 0 | }; |
2860 | |
|
2861 | 0 | u4_shift_val = ihevce_calc_variance_for_diff_weights( |
2862 | 0 | ps_inter_ctb_prms->pu8_part_src_sigmaX, |
2863 | 0 | ps_inter_ctb_prms->pu8_part_src_sigmaXSquared, |
2864 | 0 | &u8_src_variance, |
2865 | 0 | ai4_inv_wt, |
2866 | 0 | ai4_inv_wt_shift_val, |
2867 | 0 | ps_best_results->as_pu_results, |
2868 | 0 | ps_inter_ctb_prms->wpred_log_wdc, |
2869 | 0 | ae_part_id, |
2870 | 0 | gau1_blk_size_to_wd[e_blk_size], |
2871 | 0 | u1_num_parts, |
2872 | 0 | 1); |
2873 | |
|
2874 | 0 | temp_shift_val = u4_shift_val; |
2875 | |
|
2876 | 0 | u4_shift_val = ihevce_calc_variance_for_diff_weights( |
2877 | 0 | au8_pred_sigmaX[i], |
2878 | 0 | au8_pred_sigmaXSquare[i], |
2879 | 0 | &u8_pred_variance, |
2880 | 0 | ai4_inv_wt, |
2881 | 0 | ai4_inv_wt_shift_val, |
2882 | 0 | ps_best_results->as_pu_results, |
2883 | 0 | 0, |
2884 | 0 | ae_part_id, |
2885 | 0 | gau1_blk_size_to_wd[e_blk_size], |
2886 | 0 | u1_num_parts, |
2887 | 0 | 0); |
2888 | |
|
2889 | 0 | u8_pred_variance = u8_pred_variance >> temp_shift_val; |
2890 | |
|
2891 | 0 | GETRANGE64(i4_bits_req, u8_pred_variance); |
2892 | |
|
2893 | 0 | if(i4_bits_req > 27) |
2894 | 0 | { |
2895 | 0 | u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27); |
2896 | 0 | u8_src_variance = u8_src_variance >> (i4_bits_req - 27); |
2897 | 0 | } |
2898 | |
|
2899 | 0 | if(u8_src_variance == u8_pred_variance) |
2900 | 0 | { |
2901 | 0 | u8_temp_var = (1 << STIM_Q_FORMAT); |
2902 | 0 | } |
2903 | 0 | else |
2904 | 0 | { |
2905 | 0 | u8_temp_var = (2 * u8_src_variance * u8_pred_variance); |
2906 | 0 | u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT)); |
2907 | 0 | u8_temp_var1 = (u8_src_variance * u8_src_variance) + |
2908 | 0 | (u8_pred_variance * u8_pred_variance); |
2909 | 0 | u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2)); |
2910 | 0 | u8_temp_var = (u8_temp_var / u8_temp_var1); |
2911 | 0 | } |
2912 | |
|
2913 | 0 | i4_noise_term = (UWORD32)u8_temp_var; |
2914 | |
|
2915 | 0 | ASSERT(i4_noise_term >= 0); |
2916 | 0 | ASSERT(i4_noise_term <= (1 << (STIM_Q_FORMAT + ALPHA_Q_FORMAT))); |
2917 | | |
2918 | 0 | i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier; |
2919 | |
|
2920 | 0 | u8_temp_var = i4_satd; |
2921 | 0 | u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term)); |
2922 | 0 | u8_temp_var += (1 << ((i4_q_level)-1)); |
2923 | 0 | i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level)); |
2924 | |
|
2925 | 0 | ASSERT(i4_satd >= 0); |
2926 | 0 | } |
2927 | 0 | } |
2928 | | |
2929 | 6.52M | if(u1_inp_buf_idx != UCHAR_MAX) |
2930 | 0 | { |
2931 | 0 | ihevce_set_pred_buf_as_free( |
2932 | 0 | &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, |
2933 | 0 | u1_inp_buf_idx); |
2934 | 0 | } |
2935 | | |
2936 | 6.52M | part_type_cost += i4_satd; |
2937 | | |
2938 | | /*Update the best results with the new results */ |
2939 | 6.52M | ps_best_results->i4_tot_cost = part_type_cost; |
2940 | | |
2941 | 6.52M | ps_best_results->i4_tu_split_cost = ps_err_prms->i4_tu_split_cost; |
2942 | | |
2943 | 6.52M | ASSERT(ai4_tu_split_flag[0] >= 0); |
2944 | 6.52M | if(e_cu_size == CU_64x64) |
2945 | 72.5k | { |
2946 | 72.5k | ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0]; |
2947 | 72.5k | ps_best_results->ai4_tu_split_flag[1] = ai4_tu_split_flag[1]; |
2948 | 72.5k | ps_best_results->ai4_tu_split_flag[2] = ai4_tu_split_flag[2]; |
2949 | 72.5k | ps_best_results->ai4_tu_split_flag[3] = ai4_tu_split_flag[3]; |
2950 | | |
2951 | | /* Update the TU early cbf flags into the best results structure */ |
2952 | 72.5k | ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0]; |
2953 | 72.5k | ps_best_results->ai4_tu_early_cbf[1] = ai4_tu_early_cbf[1]; |
2954 | 72.5k | ps_best_results->ai4_tu_early_cbf[2] = ai4_tu_early_cbf[2]; |
2955 | 72.5k | ps_best_results->ai4_tu_early_cbf[3] = ai4_tu_early_cbf[3]; |
2956 | 72.5k | } |
2957 | 6.45M | else |
2958 | 6.45M | { |
2959 | 6.45M | ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0]; |
2960 | 6.45M | ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0]; |
2961 | 6.45M | } |
2962 | | |
2963 | 6.52M | if(part_type_cost < best_cost[num_best_cand - 1]) |
2964 | 6.52M | { |
2965 | | /* Push and sort current part type if it is one of the num_best_cand */ |
2966 | 8.17M | for(cand_idx = 0; cand_idx < i; cand_idx++) |
2967 | 3.01M | { |
2968 | 3.01M | if(part_type_cost <= best_cost[cand_idx]) |
2969 | 1.37M | { |
2970 | 1.37M | memmove( |
2971 | 1.37M | &ai4_final_idx[cand_idx + 1], |
2972 | 1.37M | &ai4_final_idx[cand_idx], |
2973 | 1.37M | sizeof(WORD32) * (i - cand_idx)); |
2974 | 1.37M | memmove( |
2975 | 1.37M | &best_cost[cand_idx + 1], |
2976 | 1.37M | &best_cost[cand_idx], |
2977 | 1.37M | sizeof(WORD32) * (i - cand_idx)); |
2978 | 1.37M | break; |
2979 | 1.37M | } |
2980 | 3.01M | } |
2981 | | |
2982 | 6.52M | ai4_final_idx[cand_idx] = i; |
2983 | 6.52M | best_cost[cand_idx] = part_type_cost; |
2984 | 6.52M | } |
2985 | 6.52M | } |
2986 | | |
2987 | 4.30M | ps_cu_results->u1_num_best_results = num_best_cand; |
2988 | | |
2989 | 10.8M | for(i = 0; i < num_best_cand; i++) |
2990 | 6.52M | { |
2991 | 6.52M | ASSERT(ai4_final_idx[i] < num_best_cand); |
2992 | | |
2993 | 6.52M | if(ai4_final_idx[i] != -1) |
2994 | 6.52M | { |
2995 | 6.52M | memcpy( |
2996 | 6.52M | &(ps_cu_results->ps_best_results[i]), |
2997 | 6.52M | &(ps_part_type_results[ai4_final_idx[i]]), |
2998 | 6.52M | sizeof(part_type_results_t)); |
2999 | 6.52M | } |
3000 | 6.52M | } |
3001 | 4.30M | } |
3002 | | |
3003 | 30.1M | for(i = 0; i < (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS)-2; i++) |
3004 | 25.8M | { |
3005 | 25.8M | ihevce_set_pred_buf_as_free( |
3006 | 25.8M | &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, i); |
3007 | 25.8M | } |
3008 | 4.30M | } |
3009 | | |
3010 | | /** |
3011 | | ************************************************************************************************** |
3012 | | * @fn hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results) |
3013 | | * |
3014 | | * @brief Does the population of the inter_cu_results structure with the results after the |
3015 | | * subpel refinement |
3016 | | * |
3017 | | * This is called post subpel refinmenent for 16x16s, 8x8s and |
3018 | | * for post merge evaluation for 32x32,64x64 CUs |
3019 | | * |
3020 | | * @param[in,out] ps_search_results : Search results data structure |
3021 | | * - ps_cu_results : cu_results data structure |
3022 | | * ps_pu_result : Pointer to the memory for storing PU's |
3023 | | * |
3024 | | **************************************************************************************************** |
3025 | | */ |
3026 | | void hme_populate_pus( |
3027 | | me_ctxt_t *ps_thrd_ctxt, |
3028 | | me_frm_ctxt_t *ps_ctxt, |
3029 | | hme_subpel_prms_t *ps_subpel_prms, |
3030 | | search_results_t *ps_search_results, |
3031 | | inter_cu_results_t *ps_cu_results, |
3032 | | inter_pu_results_t *ps_pu_results, |
3033 | | pu_result_t *ps_pu_result, |
3034 | | inter_ctb_prms_t *ps_inter_ctb_prms, |
3035 | | wgt_pred_ctxt_t *ps_wt_prms, |
3036 | | layer_ctxt_t *ps_curr_layer, |
3037 | | U08 *pu1_pred_dir_searched, |
3038 | | WORD32 i4_num_active_ref) |
3039 | 1.97M | { |
3040 | 1.97M | WORD32 i, j, k; |
3041 | 1.97M | WORD32 i4_part_mask; |
3042 | 1.97M | WORD32 i4_ref; |
3043 | 1.97M | UWORD8 e_part_id; |
3044 | 1.97M | pu_result_t *ps_curr_pu; |
3045 | 1.97M | search_node_t *ps_search_node; |
3046 | 1.97M | part_attr_t *ps_part_attr; |
3047 | 1.97M | UWORD8 e_cu_size = ps_search_results->e_cu_size; |
3048 | 1.97M | WORD32 num_results_per_part_l0 = 0; |
3049 | 1.97M | WORD32 num_results_per_part_l1 = 0; |
3050 | 1.97M | WORD32 i4_ref_id; |
3051 | 1.97M | WORD32 i4_total_act_ref; |
3052 | | |
3053 | 1.97M | i4_part_mask = ps_search_results->i4_part_mask; |
3054 | | |
3055 | | /* pred_buf_mngr init */ |
3056 | 1.97M | { |
3057 | 1.97M | hme_get_wkg_mem(&ps_ctxt->s_buf_mgr, MAX_WKG_MEM_SIZE_PER_THREAD); |
3058 | | |
3059 | 1.97M | ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator = UINT_MAX; |
3060 | | |
3061 | 13.7M | for(i = 0; i < MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS - 2; i++) |
3062 | 11.8M | { |
3063 | 11.8M | ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[i] = |
3064 | 11.8M | ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE; |
3065 | 11.8M | ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator &= ~(1 << i); |
3066 | 11.8M | } |
3067 | | |
3068 | 1.97M | ps_inter_ctb_prms->pu1_wkg_mem = ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE; |
3069 | 1.97M | } |
3070 | | |
3071 | 1.97M | ps_inter_ctb_prms->i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME; |
3072 | 1.97M | ps_inter_ctb_prms->u1_is_cu_noisy = ps_subpel_prms->u1_is_cu_noisy; |
3073 | 1.97M | ps_inter_ctb_prms->i4_lamda = ps_search_results->as_pred_ctxt[0].lambda; |
3074 | | |
3075 | | /* Populate the CU level parameters */ |
3076 | 1.97M | ps_cu_results->u1_cu_size = ps_search_results->e_cu_size; |
3077 | 1.97M | ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results; |
3078 | 1.97M | ps_cu_results->i4_part_mask = ps_search_results->i4_part_mask; |
3079 | 1.97M | ps_cu_results->u1_x_off = ps_search_results->u1_x_off; |
3080 | 1.97M | ps_cu_results->u1_y_off = ps_search_results->u1_y_off; |
3081 | | |
3082 | 1.97M | i4_total_act_ref = |
3083 | 1.97M | ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1; |
3084 | | /*Populate the partition results |
3085 | | Loop across all the active references that are enabled right now */ |
3086 | 17.7M | for(i = 0; i < MAX_PART_TYPES; i++) |
3087 | 15.7M | { |
3088 | 15.7M | if(!(i4_part_mask & gai4_part_type_to_part_mask[i])) |
3089 | 8.42M | { |
3090 | 8.42M | continue; |
3091 | 8.42M | } |
3092 | | |
3093 | 21.2M | for(j = 0; j < gau1_num_parts_in_part_type[i]; j++) |
3094 | 13.9M | { |
3095 | | /* Partition ID for the current PU */ |
3096 | 13.9M | e_part_id = (UWORD8)ge_part_type_to_part_id[i][j]; |
3097 | 13.9M | ps_part_attr = &gas_part_attr_in_cu[e_part_id]; |
3098 | | |
3099 | 13.9M | num_results_per_part_l0 = 0; |
3100 | 13.9M | num_results_per_part_l1 = 0; |
3101 | | |
3102 | 13.9M | ps_pu_results->aps_pu_results[0][e_part_id] = |
3103 | 13.9M | ps_pu_result + (e_part_id * MAX_NUM_RESULTS_PER_PART_LIST); |
3104 | 13.9M | ps_pu_results->aps_pu_results[1][e_part_id] = |
3105 | 13.9M | ps_pu_result + ((e_part_id + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST); |
3106 | | |
3107 | 31.1M | for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++) |
3108 | 17.1M | { |
3109 | 17.1M | U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref]; |
3110 | | |
3111 | 34.3M | for(k = 0; k < ps_search_results->u1_num_results_per_part; k++) |
3112 | 17.1M | { |
3113 | 17.1M | ps_search_node = |
3114 | 17.1M | &ps_search_results->aps_part_results[u1_pred_dir][e_part_id][k]; |
3115 | | |
3116 | | /* If subpel is done then the node is a valid candidate else break the loop */ |
3117 | 17.1M | if(ps_search_node->u1_subpel_done) |
3118 | 17.1M | { |
3119 | 17.1M | i4_ref_id = ps_search_node->i1_ref_idx; |
3120 | | |
3121 | 17.1M | ASSERT(i4_ref_id >= 0); |
3122 | | |
3123 | | /* Check whether current ref_id is past or future and assign the pointers to L0 or L1 list accordingly */ |
3124 | 17.1M | if(!u1_pred_dir) |
3125 | 13.8M | { |
3126 | 13.8M | ps_curr_pu = ps_pu_results->aps_pu_results[0][e_part_id] + |
3127 | 13.8M | num_results_per_part_l0; |
3128 | | |
3129 | 13.8M | ASSERT( |
3130 | 13.8M | ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] < |
3131 | 13.8M | ps_inter_ctb_prms->u1_num_active_ref_l0); |
3132 | | |
3133 | | /* Always populate the ref_idx value in l0_ref_idx */ |
3134 | 13.8M | ps_curr_pu->pu.mv.i1_l0_ref_idx = |
3135 | 13.8M | ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id]; |
3136 | 13.8M | ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv; |
3137 | 13.8M | ps_curr_pu->pu.mv.i1_l1_ref_idx = -1; |
3138 | 13.8M | ps_curr_pu->pu.b2_pred_mode = PRED_L0; |
3139 | | |
3140 | 13.8M | ps_inter_ctb_prms->apu1_wt_inp[0][ps_curr_pu->pu.mv.i1_l0_ref_idx] = |
3141 | 13.8M | ps_wt_prms->apu1_wt_inp[i4_ref_id]; |
3142 | | |
3143 | 13.8M | num_results_per_part_l0++; |
3144 | 13.8M | } |
3145 | 3.34M | else |
3146 | 3.34M | { |
3147 | 3.34M | ps_curr_pu = ps_pu_results->aps_pu_results[1][e_part_id] + |
3148 | 3.34M | num_results_per_part_l1; |
3149 | | |
3150 | 3.34M | ASSERT( |
3151 | 3.34M | ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] < |
3152 | 3.34M | ps_inter_ctb_prms->u1_num_active_ref_l1); |
3153 | | |
3154 | | /* populate the ref_idx value in l1_ref_idx */ |
3155 | 3.34M | ps_curr_pu->pu.mv.i1_l1_ref_idx = |
3156 | 3.34M | ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id]; |
3157 | 3.34M | ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv; |
3158 | 3.34M | ps_curr_pu->pu.mv.i1_l0_ref_idx = -1; |
3159 | 3.34M | ps_curr_pu->pu.b2_pred_mode = PRED_L1; |
3160 | | |
3161 | | /* Copy the values from weighted params to common_frm_aprams */ |
3162 | 3.34M | ps_inter_ctb_prms->apu1_wt_inp[1][ps_curr_pu->pu.mv.i1_l1_ref_idx] = |
3163 | 3.34M | ps_wt_prms->apu1_wt_inp[i4_ref_id]; |
3164 | | |
3165 | 3.34M | num_results_per_part_l1++; |
3166 | 3.34M | } |
3167 | 17.1M | ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost; |
3168 | 17.1M | ps_curr_pu->i4_sdi = ps_search_node->i4_sdi; |
3169 | | |
3170 | 17.1M | #if UNI_SATD_SCALE |
3171 | | /*SATD is scaled by weight. Hence rescale the SATD */ |
3172 | 17.1M | ps_curr_pu->i4_tot_cost = |
3173 | 17.1M | ((ps_search_node->i4_sad * |
3174 | 17.1M | ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] + |
3175 | 17.1M | (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >> |
3176 | 17.1M | ps_inter_ctb_prms->wpred_log_wdc) + |
3177 | 17.1M | ps_search_node->i4_mv_cost; |
3178 | 17.1M | #endif |
3179 | | |
3180 | | /* Packed format of the width and height */ |
3181 | 17.1M | ps_curr_pu->pu.b4_wd = ((ps_part_attr->u1_x_count << e_cu_size) >> 2) - 1; |
3182 | 17.1M | ps_curr_pu->pu.b4_ht = ((ps_part_attr->u1_y_count << e_cu_size) >> 2) - 1; |
3183 | | |
3184 | 17.1M | ps_curr_pu->pu.b4_pos_x = |
3185 | 17.1M | (((ps_part_attr->u1_x_start << e_cu_size) + ps_cu_results->u1_x_off) >> |
3186 | 17.1M | 2); |
3187 | 17.1M | ps_curr_pu->pu.b4_pos_y = |
3188 | 17.1M | (((ps_part_attr->u1_y_start << e_cu_size) + ps_cu_results->u1_y_off) >> |
3189 | 17.1M | 2); |
3190 | | |
3191 | 17.1M | ps_curr_pu->pu.b1_intra_flag = 0; |
3192 | | |
3193 | | /* Unweighted input */ |
3194 | 17.1M | ps_inter_ctb_prms->pu1_non_wt_inp = |
3195 | 17.1M | ps_wt_prms->apu1_wt_inp[i4_total_act_ref]; |
3196 | | |
3197 | 17.1M | ps_search_node++; |
3198 | 17.1M | } |
3199 | 0 | else |
3200 | 0 | { |
3201 | 0 | break; |
3202 | 0 | } |
3203 | 17.1M | } |
3204 | 17.1M | } |
3205 | | |
3206 | 13.9M | ps_pu_results->u1_num_results_per_part_l0[e_part_id] = num_results_per_part_l0; |
3207 | 13.9M | ps_pu_results->u1_num_results_per_part_l1[e_part_id] = num_results_per_part_l1; |
3208 | 13.9M | } |
3209 | 7.34M | } |
3210 | 1.97M | } |
3211 | | |
3212 | | /** |
3213 | | ********************************************************************************************************* |
3214 | | * @fn hme_populate_pus_8x8_cu(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results) |
3215 | | * |
3216 | | * @brief Does the population of the inter_cu_results structure with the results after the |
3217 | | * subpel refinement |
3218 | | * |
3219 | | * This is called post subpel refinmenent for 16x16s, 8x8s and |
3220 | | * for post merge evaluation for 32x32,64x64 CUs |
3221 | | * |
3222 | | * @param[in,out] ps_search_results : Search results data structure |
3223 | | * - ps_cu_results : cu_results data structure |
3224 | | * ps_pu_results : Pointer for the PU's |
3225 | | * ps_pu_result : Pointer to the memory for storing PU's |
3226 | | * |
3227 | | ********************************************************************************************************* |
3228 | | */ |
3229 | | void hme_populate_pus_8x8_cu( |
3230 | | me_ctxt_t *ps_thrd_ctxt, |
3231 | | me_frm_ctxt_t *ps_ctxt, |
3232 | | hme_subpel_prms_t *ps_subpel_prms, |
3233 | | search_results_t *ps_search_results, |
3234 | | inter_cu_results_t *ps_cu_results, |
3235 | | inter_pu_results_t *ps_pu_results, |
3236 | | pu_result_t *ps_pu_result, |
3237 | | inter_ctb_prms_t *ps_inter_ctb_prms, |
3238 | | U08 *pu1_pred_dir_searched, |
3239 | | WORD32 i4_num_active_ref, |
3240 | | U08 u1_blk_8x8_mask) |
3241 | 621k | { |
3242 | 621k | WORD32 i, k; |
3243 | 621k | WORD32 i4_part_mask; |
3244 | 621k | WORD32 i4_ref; |
3245 | 621k | pu_result_t *ps_curr_pu; |
3246 | 621k | search_node_t *ps_search_node; |
3247 | 621k | WORD32 i4_ref_id; |
3248 | 621k | WORD32 x_off, y_off; |
3249 | | |
3250 | | /* Make part mask available as only 2Nx2N |
3251 | | Later support for 4x8 and 8x4 needs to be added */ |
3252 | 621k | i4_part_mask = ENABLE_2Nx2N; |
3253 | | |
3254 | 621k | x_off = ps_search_results->u1_x_off; |
3255 | 621k | y_off = ps_search_results->u1_y_off; |
3256 | | |
3257 | 3.10M | for(i = 0; i < 4; i++) |
3258 | 2.48M | { |
3259 | 2.48M | if(u1_blk_8x8_mask & (1 << i)) |
3260 | 2.42M | { |
3261 | 2.42M | UWORD8 u1_x_pos, u1_y_pos; |
3262 | | |
3263 | 2.42M | WORD32 num_results_per_part_l0 = 0; |
3264 | 2.42M | WORD32 num_results_per_part_l1 = 0; |
3265 | | |
3266 | 2.42M | ps_cu_results->u1_cu_size = CU_8x8; |
3267 | 2.42M | ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results; |
3268 | 2.42M | ps_cu_results->i4_part_mask = i4_part_mask; |
3269 | 2.42M | ps_cu_results->u1_x_off = x_off + (i & 1) * 8; |
3270 | 2.42M | ps_cu_results->u1_y_off = y_off + (i >> 1) * 8; |
3271 | 2.42M | ps_cu_results->i4_inp_offset = ps_cu_results->u1_x_off + (ps_cu_results->u1_y_off * 64); |
3272 | | |
3273 | 2.42M | ps_cu_results->ps_best_results[0].i4_tot_cost = MAX_32BIT_VAL; |
3274 | 2.42M | ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0; |
3275 | | |
3276 | 2.42M | u1_x_pos = ps_cu_results->u1_x_off >> 2; |
3277 | 2.42M | u1_y_pos = ps_cu_results->u1_y_off >> 2; |
3278 | | |
3279 | 2.42M | if(!(ps_search_results->i4_part_mask & ENABLE_NxN)) |
3280 | 91.9k | { |
3281 | 91.9k | ps_curr_pu = &ps_cu_results->ps_best_results[0].as_pu_results[0]; |
3282 | | |
3283 | 91.9k | ps_cu_results->i4_part_mask = 0; |
3284 | 91.9k | ps_cu_results->u1_num_best_results = 0; |
3285 | | |
3286 | 91.9k | ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL; |
3287 | | |
3288 | 91.9k | ps_curr_pu->pu.b4_wd = 1; |
3289 | 91.9k | ps_curr_pu->pu.b4_ht = 1; |
3290 | 91.9k | ps_curr_pu->pu.b4_pos_x = u1_x_pos; |
3291 | 91.9k | ps_curr_pu->pu.b4_pos_y = u1_y_pos; |
3292 | 91.9k | ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0; |
3293 | | |
3294 | 91.9k | ps_cu_results++; |
3295 | 91.9k | ps_pu_results++; |
3296 | | |
3297 | 91.9k | continue; |
3298 | 91.9k | } |
3299 | | |
3300 | 2.33M | ps_pu_results->aps_pu_results[0][0] = |
3301 | 2.33M | ps_pu_result + (i * MAX_NUM_RESULTS_PER_PART_LIST); |
3302 | 2.33M | ps_pu_results->aps_pu_results[1][0] = |
3303 | 2.33M | ps_pu_result + ((i + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST); |
3304 | | |
3305 | 5.29M | for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++) |
3306 | 2.96M | { |
3307 | 2.96M | U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref]; |
3308 | | |
3309 | | /* Select the NxN partition node for the current ref_idx in the search results*/ |
3310 | 2.96M | ps_search_node = |
3311 | 2.96M | ps_search_results->aps_part_results[u1_pred_dir][PART_ID_NxN_TL + i]; |
3312 | | |
3313 | 5.92M | for(k = 0; k < ps_search_results->u1_num_results_per_part; k++) |
3314 | 2.96M | { |
3315 | | /* If subpel is done then the node is a valid candidate else break the loop */ |
3316 | 2.96M | if((ps_search_node->u1_is_avail) || (ps_search_node->u1_subpel_done)) |
3317 | 2.96M | { |
3318 | 2.96M | i4_ref_id = ps_search_node->i1_ref_idx; |
3319 | | |
3320 | 2.96M | ASSERT(i4_ref_id >= 0); |
3321 | | |
3322 | 2.96M | if(!u1_pred_dir) |
3323 | 2.31M | { |
3324 | 2.31M | ps_curr_pu = |
3325 | 2.31M | ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0; |
3326 | | |
3327 | 2.31M | ASSERT( |
3328 | 2.31M | ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] < |
3329 | 2.31M | ps_inter_ctb_prms->u1_num_active_ref_l0); |
3330 | | |
3331 | 2.31M | ps_curr_pu->pu.mv.i1_l0_ref_idx = |
3332 | 2.31M | ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id]; |
3333 | 2.31M | ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv; |
3334 | 2.31M | ps_curr_pu->pu.mv.i1_l1_ref_idx = -1; |
3335 | 2.31M | ps_curr_pu->pu.b2_pred_mode = PRED_L0; |
3336 | | |
3337 | 2.31M | num_results_per_part_l0++; |
3338 | 2.31M | } |
3339 | 646k | else |
3340 | 646k | { |
3341 | 646k | ps_curr_pu = |
3342 | 646k | ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1; |
3343 | | |
3344 | 646k | ASSERT( |
3345 | 646k | ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] < |
3346 | 646k | ps_inter_ctb_prms->u1_num_active_ref_l1); |
3347 | | |
3348 | 646k | ps_curr_pu->pu.mv.i1_l1_ref_idx = |
3349 | 646k | ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id]; |
3350 | 646k | ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv; |
3351 | 646k | ps_curr_pu->pu.mv.i1_l0_ref_idx = -1; |
3352 | 646k | ps_curr_pu->pu.b2_pred_mode = PRED_L1; |
3353 | | |
3354 | 646k | num_results_per_part_l1++; |
3355 | 646k | } |
3356 | 2.96M | ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost; |
3357 | 2.96M | ps_curr_pu->i4_sdi = ps_search_node->i4_sdi; |
3358 | | |
3359 | 2.96M | #if UNI_SATD_SCALE |
3360 | | /*SATD is scaled by weight. Hence rescale the SATD */ |
3361 | 2.96M | ps_curr_pu->i4_tot_cost = |
3362 | 2.96M | ((ps_search_node->i4_sad * |
3363 | 2.96M | ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] + |
3364 | 2.96M | (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >> |
3365 | 2.96M | ps_inter_ctb_prms->wpred_log_wdc) + |
3366 | 2.96M | ps_search_node->i4_mv_cost; |
3367 | 2.96M | #endif |
3368 | | |
3369 | 2.96M | ps_curr_pu->pu.b4_wd = 1; |
3370 | 2.96M | ps_curr_pu->pu.b4_ht = 1; |
3371 | 2.96M | ps_curr_pu->pu.b4_pos_x = u1_x_pos; |
3372 | 2.96M | ps_curr_pu->pu.b4_pos_y = u1_y_pos; |
3373 | 2.96M | ps_curr_pu->pu.b1_intra_flag = 0; |
3374 | | |
3375 | 2.96M | ps_search_node++; |
3376 | 2.96M | } |
3377 | 0 | else |
3378 | 0 | { |
3379 | | /* if NxN was not evaluated at 16x16 level, assign max cost to 8x8 CU |
3380 | | to remove 8x8's as possible candidates during evaluation */ |
3381 | |
|
3382 | 0 | ps_curr_pu = ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0; |
3383 | |
|
3384 | 0 | ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL; |
3385 | |
|
3386 | 0 | ps_curr_pu = ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1; |
3387 | |
|
3388 | 0 | ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL; |
3389 | |
|
3390 | 0 | break; |
3391 | 0 | } |
3392 | 2.96M | } |
3393 | 2.96M | } |
3394 | | |
3395 | | /* Update the num_results per_part across lists L0 and L1 */ |
3396 | 2.33M | ps_pu_results->u1_num_results_per_part_l0[0] = num_results_per_part_l0; |
3397 | 2.33M | ps_pu_results->u1_num_results_per_part_l1[0] = num_results_per_part_l1; |
3398 | 2.33M | } |
3399 | 2.39M | ps_cu_results++; |
3400 | 2.39M | ps_pu_results++; |
3401 | 2.39M | } |
3402 | 621k | } |
3403 | | |
3404 | | /** |
3405 | | ******************************************************************************** |
3406 | | * @fn hme_insert_intra_nodes_post_bipred |
3407 | | * |
3408 | | * @brief Compares intra costs (populated by IPE) with the best inter costs |
3409 | | * (populated after evaluating bi-pred) and updates the best results |
3410 | | * if intra cost is better |
3411 | | * |
3412 | | * @param[in,out] ps_cu_results [inout] : Best results structure of CU |
3413 | | * ps_cur_ipe_ctb [in] : intra results for the current CTB |
3414 | | * i4_frm_qstep [in] : current frame quantizer(qscale)* |
3415 | | * |
3416 | | * @return None |
3417 | | ******************************************************************************** |
3418 | | */ |
3419 | | void hme_insert_intra_nodes_post_bipred( |
3420 | | inter_cu_results_t *ps_cu_results, |
3421 | | ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb, |
3422 | | WORD32 i4_frm_qstep) |
3423 | 4.29M | { |
3424 | 4.29M | WORD32 i; |
3425 | 4.29M | WORD32 num_results; |
3426 | 4.29M | WORD32 cu_size = ps_cu_results->u1_cu_size; |
3427 | 4.29M | UWORD8 u1_x_off = ps_cu_results->u1_x_off; |
3428 | 4.29M | UWORD8 u1_y_off = ps_cu_results->u1_y_off; |
3429 | | |
3430 | | /* Id of the 32x32 block, 16x16 block in a CTB */ |
3431 | 4.29M | WORD32 i4_32x32_id = (u1_y_off >> 5) * 2 + (u1_x_off >> 5); |
3432 | 4.29M | WORD32 i4_16x16_id = ((u1_y_off >> 4) & 0x1) * 2 + ((u1_x_off >> 4) & 0x1); |
3433 | | |
3434 | | /* Flags to indicate if intra64/intra32/intra16 cusize are invalid as per IPE decision */ |
3435 | 4.29M | WORD32 disable_intra64 = 0; |
3436 | 4.29M | WORD32 disable_intra32 = 0; |
3437 | 4.29M | WORD32 disable_intra16 = 0; |
3438 | | |
3439 | 4.29M | S32 i4_intra_2nx2n_cost; |
3440 | | |
3441 | | /* ME final results for this CU (post seeding of best uni/bi pred results) */ |
3442 | 4.29M | part_type_results_t *ps_best_result; |
3443 | | |
3444 | 4.29M | i4_frm_qstep *= !L0ME_IN_OPENLOOP_MODE; |
3445 | | |
3446 | | /*If inter candidates are enabled then enter the for loop to update the intra candidate */ |
3447 | | |
3448 | 4.29M | if((ps_cu_results->u1_num_best_results == 0) && (CU_8x8 == ps_cu_results->u1_cu_size)) |
3449 | 91.9k | { |
3450 | 91.9k | ps_cu_results->u1_num_best_results = 1; |
3451 | 91.9k | } |
3452 | | |
3453 | 4.29M | num_results = ps_cu_results->u1_num_best_results; |
3454 | | |
3455 | 4.29M | ps_best_result = &ps_cu_results->ps_best_results[0]; |
3456 | | |
3457 | | /* Disable intra16/32/64 flags based on split flags recommended by IPE */ |
3458 | 4.29M | if(ps_cur_ipe_ctb->u1_split_flag) |
3459 | 4.20M | { |
3460 | 4.20M | disable_intra64 = 1; |
3461 | 4.20M | if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag) |
3462 | 1.84M | { |
3463 | 1.84M | disable_intra32 = 1; |
3464 | | |
3465 | 1.84M | if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id] |
3466 | 1.84M | .as_intra16_analyse[i4_16x16_id] |
3467 | 1.84M | .b1_split_flag) |
3468 | 581k | { |
3469 | 581k | disable_intra16 = 1; |
3470 | 581k | } |
3471 | 1.84M | } |
3472 | 4.20M | } |
3473 | | |
3474 | | /* Derive the intra cost based on current cu size and offset */ |
3475 | 4.29M | switch(cu_size) |
3476 | 4.29M | { |
3477 | 2.42M | case CU_8x8: |
3478 | 2.42M | { |
3479 | 2.42M | i4_intra_2nx2n_cost = ps_cur_ipe_ctb->ai4_best8x8_intra_cost[u1_y_off + (u1_x_off >> 3)]; |
3480 | | |
3481 | | /* Accounting for coding noise in the open loop IPE cost */ |
3482 | 2.42M | i4_intra_2nx2n_cost += |
3483 | 2.42M | ((i4_frm_qstep * 16) >> 2) /*+ ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */; |
3484 | | |
3485 | 2.42M | break; |
3486 | 0 | } |
3487 | | |
3488 | 1.49M | case CU_16x16: |
3489 | 1.49M | { |
3490 | 1.49M | i4_intra_2nx2n_cost = |
3491 | 1.49M | ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_y_off >> 4) * 4 + (u1_x_off >> 4)]; |
3492 | | |
3493 | | /* Accounting for coding noise in the open loop IPE cost */ |
3494 | 1.49M | i4_intra_2nx2n_cost += |
3495 | 1.49M | ((i4_frm_qstep * 16)); /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */ |
3496 | | |
3497 | 1.49M | if(disable_intra16) |
3498 | 139k | { |
3499 | | /* Disable intra 2Nx2N (intra 16) as IPE suggested best mode as 8x8 */ |
3500 | 139k | i4_intra_2nx2n_cost = MAX_32BIT_VAL; |
3501 | 139k | } |
3502 | 1.49M | break; |
3503 | 0 | } |
3504 | | |
3505 | 348k | case CU_32x32: |
3506 | 348k | { |
3507 | 348k | i4_intra_2nx2n_cost = |
3508 | 348k | ps_cur_ipe_ctb->ai4_best32x32_intra_cost[(u1_y_off >> 5) * 2 + (u1_x_off >> 5)]; |
3509 | | |
3510 | | /* Accounting for coding noise in the open loop IPE cost */ |
3511 | 348k | i4_intra_2nx2n_cost += |
3512 | 348k | (i4_frm_qstep * 16 * 4) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */; |
3513 | | |
3514 | 348k | if(disable_intra32) |
3515 | 160k | { |
3516 | | /* Disable intra 2Nx2N (intra 32) as IPE suggested best mode as 16x16 or 8x8 */ |
3517 | 160k | i4_intra_2nx2n_cost = MAX_32BIT_VAL; |
3518 | 160k | } |
3519 | 348k | break; |
3520 | 0 | } |
3521 | | |
3522 | 25.4k | case CU_64x64: |
3523 | 25.4k | { |
3524 | 25.4k | i4_intra_2nx2n_cost = ps_cur_ipe_ctb->i4_best64x64_intra_cost; |
3525 | | |
3526 | | /* Accounting for coding noise in the open loop IPE cost */ |
3527 | 25.4k | i4_intra_2nx2n_cost += |
3528 | 25.4k | (i4_frm_qstep * 16 * 16) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */; |
3529 | | |
3530 | 25.4k | if(disable_intra64) |
3531 | 24.5k | { |
3532 | | /* Disable intra 2Nx2N (intra 64) as IPE suggested best mode as 32x32 /16x16 / 8x8 */ |
3533 | 24.5k | i4_intra_2nx2n_cost = MAX_32BIT_VAL; |
3534 | 24.5k | } |
3535 | 25.4k | break; |
3536 | 0 | } |
3537 | | |
3538 | 0 | default: |
3539 | 0 | ASSERT(0); |
3540 | 4.29M | } |
3541 | | |
3542 | 4.29M | { |
3543 | | /*****************************************************************/ |
3544 | | /* Intra / Inter cost comparison for 2Nx2N : cu size 8/16/32/64 */ |
3545 | | /* Identify where the current result isto be placed. Basically */ |
3546 | | /* find the node which has cost just higher than node under test */ |
3547 | | /*****************************************************************/ |
3548 | 10.5M | for(i = 0; i < num_results; i++) |
3549 | 6.50M | { |
3550 | | /* Subtrqact the tu_spli_flag_cost from total_inter_cost for fair comparision */ |
3551 | 6.50M | WORD32 inter_cost = ps_best_result[i].i4_tot_cost - ps_best_result[i].i4_tu_split_cost; |
3552 | | |
3553 | 6.50M | if(i4_intra_2nx2n_cost < inter_cost) |
3554 | 216k | { |
3555 | 216k | if(i < (num_results - 1)) |
3556 | 12.4k | { |
3557 | 12.4k | memmove( |
3558 | 12.4k | ps_best_result + i + 1, |
3559 | 12.4k | ps_best_result + i, |
3560 | 12.4k | sizeof(ps_best_result[0]) * (num_results - 1 - i)); |
3561 | 12.4k | } |
3562 | | |
3563 | | /* Insert the intra node result */ |
3564 | 216k | ps_best_result[i].u1_part_type = PRT_2Nx2N; |
3565 | 216k | ps_best_result[i].i4_tot_cost = i4_intra_2nx2n_cost; |
3566 | 216k | ps_best_result[i].ai4_tu_split_flag[0] = 0; |
3567 | 216k | ps_best_result[i].ai4_tu_split_flag[1] = 0; |
3568 | 216k | ps_best_result[i].ai4_tu_split_flag[2] = 0; |
3569 | 216k | ps_best_result[i].ai4_tu_split_flag[3] = 0; |
3570 | | |
3571 | | /* Populate intra flag, cost and default mvs, refidx for intra pu */ |
3572 | 216k | ps_best_result[i].as_pu_results[0].i4_tot_cost = i4_intra_2nx2n_cost; |
3573 | | //ps_best_result[i].as_pu_results[0].i4_sad = i4_intra_2nx2n_cost; |
3574 | 216k | ps_best_result[i].as_pu_results[0].i4_mv_cost = 0; |
3575 | 216k | ps_best_result[i].as_pu_results[0].pu.b1_intra_flag = 1; |
3576 | 216k | ps_best_result[i].as_pu_results[0].pu.mv.i1_l0_ref_idx = -1; |
3577 | 216k | ps_best_result[i].as_pu_results[0].pu.mv.i1_l1_ref_idx = -1; |
3578 | 216k | ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvx = INTRA_MV; |
3579 | 216k | ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvy = INTRA_MV; |
3580 | 216k | ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvx = INTRA_MV; |
3581 | 216k | ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvy = INTRA_MV; |
3582 | | |
3583 | 216k | break; |
3584 | 216k | } |
3585 | 6.50M | } |
3586 | 4.29M | } |
3587 | 4.29M | } |
3588 | | |
3589 | | S32 hme_recompute_lambda_from_min_8x8_act_in_ctb( |
3590 | | me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb) |
3591 | 33.6k | { |
3592 | 33.6k | double lambda; |
3593 | 33.6k | double lambda_modifier; |
3594 | 33.6k | WORD32 i4_cu_qp; |
3595 | 33.6k | frm_lambda_ctxt_t *ps_frm_lambda_ctxt; |
3596 | | //ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb; |
3597 | 33.6k | WORD32 i4_frame_qp; |
3598 | 33.6k | rc_quant_t *ps_rc_quant_ctxt; |
3599 | 33.6k | WORD32 i4_is_bpic; |
3600 | | |
3601 | 33.6k | ps_frm_lambda_ctxt = &ps_ctxt->s_frm_lambda_ctxt; |
3602 | | //ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base; |
3603 | 33.6k | i4_frame_qp = ps_ctxt->s_frm_prms.i4_frame_qp; |
3604 | 33.6k | ps_rc_quant_ctxt = ps_ctxt->ps_rc_quant_ctxt; |
3605 | 33.6k | i4_is_bpic = ps_ctxt->s_frm_prms.bidir_enabled; |
3606 | | |
3607 | 33.6k | i4_cu_qp = ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset]; |
3608 | | |
3609 | 33.6k | { |
3610 | 33.6k | if(ps_ctxt->i4_l0me_qp_mod) |
3611 | 33.6k | { |
3612 | 33.6k | #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON |
3613 | | #if LAMDA_BASED_ON_QUANT |
3614 | | WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[2][0]; |
3615 | | #else |
3616 | 33.6k | WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[3][0]; |
3617 | 33.6k | #endif |
3618 | 33.6k | i4_cu_qp = (((i4_cu_qp)*i4_activity) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >> |
3619 | 33.6k | QP_LEVEL_MOD_ACT_FACTOR; |
3620 | | |
3621 | 33.6k | #endif |
3622 | 33.6k | } |
3623 | 33.6k | if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qscale) |
3624 | 1.50k | i4_cu_qp = ps_rc_quant_ctxt->i2_max_qscale; |
3625 | 32.1k | else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qscale) |
3626 | 0 | i4_cu_qp = ps_rc_quant_ctxt->i2_min_qscale; |
3627 | | |
3628 | 33.6k | i4_cu_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_cu_qp]; |
3629 | 33.6k | } |
3630 | | |
3631 | 33.6k | if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qp) |
3632 | 0 | i4_cu_qp = ps_rc_quant_ctxt->i2_max_qp; |
3633 | 33.6k | else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qp) |
3634 | 149 | i4_cu_qp = ps_rc_quant_ctxt->i2_min_qp; |
3635 | | |
3636 | 33.6k | lambda = pow(2.0, (((double)(i4_cu_qp - 12)) / 3)); |
3637 | | |
3638 | 33.6k | lambda_modifier = ps_frm_lambda_ctxt->lambda_modifier; |
3639 | | |
3640 | 33.6k | if(i4_is_bpic) |
3641 | 8.52k | { |
3642 | 8.52k | lambda_modifier = lambda_modifier * CLIP3((((double)(i4_cu_qp - 12)) / 6.0), 2.00, 4.00); |
3643 | 8.52k | } |
3644 | 33.6k | if(ps_ctxt->i4_use_const_lamda_modifier) |
3645 | 0 | { |
3646 | 0 | if(ps_ctxt->s_frm_prms.is_i_pic) |
3647 | 0 | { |
3648 | 0 | lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier; |
3649 | 0 | } |
3650 | 0 | else |
3651 | 0 | { |
3652 | 0 | lambda_modifier = CONST_LAMDA_MOD_VAL; |
3653 | 0 | } |
3654 | 0 | } |
3655 | 33.6k | lambda *= lambda_modifier; |
3656 | | |
3657 | 33.6k | return ((WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT))); |
3658 | 33.6k | } |
3659 | | |
3660 | | /** |
3661 | | ******************************************************************************** |
3662 | | * @fn hme_update_dynamic_search_params |
3663 | | * |
3664 | | * @brief Update the Dynamic search params based on the current MVs |
3665 | | * |
3666 | | * @param[in,out] ps_dyn_range_prms [inout] : Dyn. Range Param str. |
3667 | | * i2_mvy [in] : current MV y comp. |
3668 | | * |
3669 | | * @return None |
3670 | | ******************************************************************************** |
3671 | | */ |
3672 | | void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy) |
3673 | 15.8M | { |
3674 | | /* If MV is up large, update i2_dyn_max_y */ |
3675 | 15.8M | if(i2_mvy > ps_dyn_range_prms->i2_dyn_max_y) |
3676 | 120k | ps_dyn_range_prms->i2_dyn_max_y = i2_mvy; |
3677 | | /* If MV is down large, update i2_dyn_min_y */ |
3678 | 15.8M | if(i2_mvy < ps_dyn_range_prms->i2_dyn_min_y) |
3679 | 126k | ps_dyn_range_prms->i2_dyn_min_y = i2_mvy; |
3680 | 15.8M | } |
3681 | | |
3682 | | void hme_add_new_node_to_a_sorted_array( |
3683 | | search_node_t *ps_result_node, |
3684 | | search_node_t **pps_sorted_array, |
3685 | | U08 *pu1_shifts, |
3686 | | U32 u4_num_results_updated, |
3687 | | U08 u1_shift) |
3688 | 21.1M | { |
3689 | 21.1M | U32 i; |
3690 | | |
3691 | 21.1M | if(NULL == pu1_shifts) |
3692 | 1.92M | { |
3693 | 1.92M | S32 i4_cur_node_cost = ps_result_node->i4_tot_cost; |
3694 | | |
3695 | 3.25M | for(i = 0; i < u4_num_results_updated; i++) |
3696 | 1.54M | { |
3697 | 1.54M | if(i4_cur_node_cost < pps_sorted_array[i]->i4_tot_cost) |
3698 | 215k | { |
3699 | 215k | memmove( |
3700 | 215k | &pps_sorted_array[i + 1], |
3701 | 215k | &pps_sorted_array[i], |
3702 | 215k | (u4_num_results_updated - i) * sizeof(search_node_t *)); |
3703 | | |
3704 | 215k | break; |
3705 | 215k | } |
3706 | 1.54M | } |
3707 | 1.92M | } |
3708 | 19.2M | else |
3709 | 19.2M | { |
3710 | 19.2M | S32 i4_cur_node_cost = |
3711 | 19.2M | (u1_shift == 0) ? ps_result_node->i4_tot_cost |
3712 | 19.2M | : (ps_result_node->i4_tot_cost + (1 << (u1_shift - 1))) >> u1_shift; |
3713 | | |
3714 | 91.3M | for(i = 0; i < u4_num_results_updated; i++) |
3715 | 82.5M | { |
3716 | 82.5M | S32 i4_prev_node_cost = (pu1_shifts[i] == 0) ? pps_sorted_array[i]->i4_tot_cost |
3717 | 82.5M | : (pps_sorted_array[i]->i4_tot_cost + |
3718 | 60.2M | (1 << (pu1_shifts[i] - 1))) >> |
3719 | 60.2M | pu1_shifts[i]; |
3720 | | |
3721 | 82.5M | if(i4_cur_node_cost < i4_prev_node_cost) |
3722 | 10.3M | { |
3723 | 10.3M | memmove( |
3724 | 10.3M | &pps_sorted_array[i + 1], |
3725 | 10.3M | &pps_sorted_array[i], |
3726 | 10.3M | (u4_num_results_updated - i) * sizeof(search_node_t *)); |
3727 | 10.3M | memmove( |
3728 | 10.3M | &pu1_shifts[i + 1], &pu1_shifts[i], (u4_num_results_updated - i) * sizeof(U08)); |
3729 | | |
3730 | 10.3M | break; |
3731 | 10.3M | } |
3732 | 82.5M | } |
3733 | | |
3734 | 19.2M | pu1_shifts[i] = u1_shift; |
3735 | 19.2M | } |
3736 | | |
3737 | 21.1M | pps_sorted_array[i] = ps_result_node; |
3738 | 21.1M | } |
3739 | | |
3740 | | S32 hme_find_pos_of_implicitly_stored_ref_id( |
3741 | | S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results) |
3742 | 8.21M | { |
3743 | 8.21M | S32 i; |
3744 | | |
3745 | 21.5M | for(i = 0; i < i4_num_results; i++) |
3746 | 18.1M | { |
3747 | 18.1M | if(i1_ref_idx == pi1_ref_idx[i]) |
3748 | 4.81M | { |
3749 | 4.81M | if(0 == i4_result_id) |
3750 | 4.81M | { |
3751 | 4.81M | return i; |
3752 | 4.81M | } |
3753 | 0 | else |
3754 | 0 | { |
3755 | 0 | i4_result_id--; |
3756 | 0 | } |
3757 | 4.81M | } |
3758 | 18.1M | } |
3759 | | |
3760 | 3.39M | return -1; |
3761 | 8.21M | } |
3762 | | |
3763 | | static __inline void hme_search_node_populator( |
3764 | | search_node_t *ps_search_node, hme_mv_t *ps_mv, S08 i1_ref_idx, S08 i1_mv_magnitude_shift) |
3765 | 39.7M | { |
3766 | 39.7M | ps_search_node->ps_mv->i2_mvx = SHL_NEG((WORD16)ps_mv->i2_mv_x, i1_mv_magnitude_shift); |
3767 | 39.7M | ps_search_node->ps_mv->i2_mvy = SHL_NEG((WORD16)ps_mv->i2_mv_y, i1_mv_magnitude_shift); |
3768 | 39.7M | ps_search_node->i1_ref_idx = i1_ref_idx; |
3769 | 39.7M | ps_search_node->u1_is_avail = 1; |
3770 | 39.7M | ps_search_node->u1_subpel_done = 0; |
3771 | 39.7M | } |
3772 | | |
3773 | | S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt) |
3774 | 1.89M | { |
3775 | 1.89M | hme_mv_t *ps_mv; |
3776 | | |
3777 | 1.89M | S32 wd_c, ht_c, wd_p, ht_p; |
3778 | 1.89M | S32 blksize_p, blksize_c; |
3779 | 1.89M | S32 i; |
3780 | 1.89M | S08 *pi1_ref_idx; |
3781 | | /* Cache for storing offsets */ |
3782 | 1.89M | S32 ai4_cand_offsets[NUM_SEARCH_CAND_LOCATIONS]; |
3783 | | |
3784 | 1.89M | layer_ctxt_t *ps_curr_layer = ps_ctxt->ps_curr_layer; |
3785 | 1.89M | layer_ctxt_t *ps_coarse_layer = ps_ctxt->ps_coarse_layer; |
3786 | 1.89M | layer_mv_t *ps_coarse_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; |
3787 | 1.89M | layer_mv_t *ps_curr_layer_mvbank = ps_curr_layer->ps_layer_mvbank; |
3788 | 1.89M | search_candt_t *ps_search_cands = ps_ctxt->ps_search_cands; |
3789 | 1.89M | hme_mv_t s_zero_mv = { 0 }; |
3790 | | |
3791 | 1.89M | S32 i4_pos_x = ps_ctxt->i4_pos_x; |
3792 | 1.89M | S32 i4_pos_y = ps_ctxt->i4_pos_y; |
3793 | 1.89M | S32 i4_num_act_ref_l0 = ps_ctxt->i4_num_act_ref_l0; |
3794 | 1.89M | S32 i4_num_act_ref_l1 = ps_ctxt->i4_num_act_ref_l1; |
3795 | 1.89M | U08 u1_pred_dir = ps_ctxt->u1_pred_dir; |
3796 | 1.89M | U08 u1_pred_dir_ctr = ps_ctxt->u1_pred_dir_ctr; |
3797 | 1.89M | U08 u1_num_results_in_curr_mvbank = ps_ctxt->u1_num_results_in_mvbank; |
3798 | 1.89M | U08 u1_num_results_in_coarse_mvbank = |
3799 | 1.89M | (u1_pred_dir == 0) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) |
3800 | 1.89M | : (i4_num_act_ref_l1 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref); |
3801 | 1.89M | S32 i4_init_offset_projected = |
3802 | 1.89M | (u1_pred_dir == 1) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) : 0; |
3803 | 1.89M | S32 i4_init_offset_spatial = |
3804 | 1.89M | (u1_pred_dir_ctr == 1) |
3805 | 1.89M | ? (ps_curr_layer_mvbank->i4_num_mvs_per_ref * u1_num_results_in_curr_mvbank) |
3806 | 1.89M | : 0; |
3807 | 1.89M | U08 u1_search_candidate_list_index = ps_ctxt->u1_search_candidate_list_index; |
3808 | 1.89M | U08 u1_max_num_search_cands = |
3809 | 1.89M | gau1_max_num_search_cands_in_l0_me[u1_search_candidate_list_index]; |
3810 | 1.89M | S32 i4_num_srch_cands = MIN(u1_max_num_search_cands, ps_ctxt->i4_max_num_init_cands << 1); |
3811 | 1.89M | U16 u2_is_offset_available = 0; |
3812 | 1.89M | U08 u1_search_blk_to_spatial_mvbank_blk_size_factor = 1; |
3813 | | |
3814 | | /* Width and ht of current and prev layers */ |
3815 | 1.89M | wd_c = ps_curr_layer->i4_wd; |
3816 | 1.89M | ht_c = ps_curr_layer->i4_ht; |
3817 | 1.89M | wd_p = ps_coarse_layer->i4_wd; |
3818 | 1.89M | ht_p = ps_coarse_layer->i4_ht; |
3819 | | |
3820 | 1.89M | blksize_p = gau1_blk_size_to_wd_shift[ps_coarse_layer_mvbank->e_blk_size]; |
3821 | 1.89M | blksize_c = gau1_blk_size_to_wd_shift[ps_curr_layer_mvbank->e_blk_size]; |
3822 | | |
3823 | | /* ASSERT for valid sizes */ |
3824 | 1.89M | ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); |
3825 | | |
3826 | 1.89M | { |
3827 | 1.89M | S32 x = i4_pos_x >> 4; |
3828 | 1.89M | S32 y = i4_pos_y >> 4; |
3829 | | |
3830 | 1.89M | if(blksize_c != gau1_blk_size_to_wd_shift[ps_ctxt->e_search_blk_size]) |
3831 | 1.89M | { |
3832 | 1.89M | x *= 2; |
3833 | 1.89M | y *= 2; |
3834 | | |
3835 | 1.89M | u1_search_blk_to_spatial_mvbank_blk_size_factor = 2; |
3836 | 1.89M | } |
3837 | | |
3838 | 1.89M | i4_init_offset_spatial += (x + y * ps_curr_layer_mvbank->i4_num_blks_per_row) * |
3839 | 1.89M | ps_curr_layer_mvbank->i4_num_mvs_per_blk; |
3840 | 1.89M | } |
3841 | | |
3842 | 41.6M | for(i = 0; i < i4_num_srch_cands; i++) |
3843 | 39.7M | { |
3844 | 39.7M | SEARCH_CANDIDATE_TYPE_T e_search_cand_type = |
3845 | 39.7M | gae_search_cand_priority_to_search_cand_type_map_in_l0_me[u1_search_candidate_list_index] |
3846 | 39.7M | [i]; |
3847 | 39.7M | SEARCH_CAND_LOCATIONS_T e_search_cand_loc = |
3848 | 39.7M | gae_search_cand_type_to_location_map[e_search_cand_type]; |
3849 | 39.7M | S08 i1_result_id = MIN( |
3850 | 39.7M | gai1_search_cand_type_to_result_id_map[e_search_cand_type], |
3851 | 39.7M | (e_search_cand_loc < 0 ? 0 |
3852 | 39.7M | : ps_ctxt->pu1_num_fpel_search_cands[e_search_cand_loc] - 1)); |
3853 | 39.7M | U08 u1_is_spatial_cand = (1 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]); |
3854 | 39.7M | U08 u1_is_proj_cand = (0 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]); |
3855 | 39.7M | U08 u1_is_zeroMV_cand = (ZERO_MV == e_search_cand_type) || |
3856 | 39.7M | (ZERO_MV_ALTREF == e_search_cand_type); |
3857 | | |
3858 | | /* When spatial candidates are available, use them, else use the projected candidates */ |
3859 | | /* This is required since some blocks will never have certain spatial candidates, and in order */ |
3860 | | /* to accomodate such instances in 'gae_search_cand_priority_to_search_cand_type_map_in_l0_me' list, */ |
3861 | | /* all candidates apart from the 'LEFT' have been marked as projected */ |
3862 | 39.7M | if(((e_search_cand_loc == TOPLEFT) || (e_search_cand_loc == TOP) || |
3863 | 39.7M | (e_search_cand_loc == TOPRIGHT)) && |
3864 | 39.7M | (i1_result_id < u1_num_results_in_curr_mvbank) && u1_is_proj_cand) |
3865 | 7.58M | { |
3866 | 7.58M | if(e_search_cand_loc == TOPLEFT) |
3867 | 2.52M | { |
3868 | 2.52M | u1_is_spatial_cand = ps_ctxt->u1_is_topLeft_available || |
3869 | 2.52M | !ps_ctxt->u1_is_left_available; |
3870 | 2.52M | } |
3871 | 5.05M | else if(e_search_cand_loc == TOPRIGHT) |
3872 | 2.52M | { |
3873 | 2.52M | u1_is_spatial_cand = ps_ctxt->u1_is_topRight_available; |
3874 | 2.52M | } |
3875 | 2.52M | else |
3876 | 2.52M | { |
3877 | 2.52M | u1_is_spatial_cand = ps_ctxt->u1_is_top_available; |
3878 | 2.52M | } |
3879 | | |
3880 | 7.58M | u1_is_proj_cand = !u1_is_spatial_cand; |
3881 | 7.58M | } |
3882 | | |
3883 | 39.7M | switch(u1_is_zeroMV_cand + (u1_is_spatial_cand << 1) + (u1_is_proj_cand << 2)) |
3884 | 39.7M | { |
3885 | 1.98M | case 1: |
3886 | 1.98M | { |
3887 | 1.98M | hme_search_node_populator( |
3888 | 1.98M | ps_search_cands[i].ps_search_node, |
3889 | 1.98M | &s_zero_mv, |
3890 | 1.98M | (ZERO_MV == e_search_cand_type) ? ps_ctxt->i1_default_ref_id |
3891 | 1.98M | : ps_ctxt->i1_alt_default_ref_id, |
3892 | 1.98M | 0); |
3893 | | |
3894 | 1.98M | break; |
3895 | 0 | } |
3896 | 7.62M | case 2: |
3897 | 7.62M | { |
3898 | 7.62M | S08 i1_mv_magnitude_shift = 0; |
3899 | | |
3900 | 7.62M | S32 i4_offset = i4_init_offset_spatial; |
3901 | | |
3902 | 7.62M | i1_result_id = MIN(i1_result_id, u1_num_results_in_curr_mvbank - 1); |
3903 | 7.62M | i4_offset += i1_result_id; |
3904 | | |
3905 | 7.62M | switch(e_search_cand_loc) |
3906 | 7.62M | { |
3907 | 2.62M | case LEFT: |
3908 | 2.62M | { |
3909 | 2.62M | if(ps_ctxt->u1_is_left_available) |
3910 | 2.01M | { |
3911 | 2.01M | i1_mv_magnitude_shift = -2; |
3912 | | |
3913 | 2.01M | i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk; |
3914 | | |
3915 | 2.01M | ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset; |
3916 | 2.01M | pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset; |
3917 | 2.01M | } |
3918 | 615k | else |
3919 | 615k | { |
3920 | 615k | i1_mv_magnitude_shift = 0; |
3921 | | |
3922 | 615k | ps_mv = &s_zero_mv; |
3923 | 615k | pi1_ref_idx = &ps_ctxt->i1_default_ref_id; |
3924 | 615k | } |
3925 | | |
3926 | 2.62M | break; |
3927 | 0 | } |
3928 | 2.03M | case TOPLEFT: |
3929 | 2.03M | { |
3930 | 2.03M | if(ps_ctxt->u1_is_topLeft_available) |
3931 | 1.44M | { |
3932 | 1.44M | i1_mv_magnitude_shift = -2; |
3933 | | |
3934 | 1.44M | i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk; |
3935 | 1.44M | i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row; |
3936 | | |
3937 | 1.44M | ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset; |
3938 | 1.44M | pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset; |
3939 | 1.44M | } |
3940 | 592k | else |
3941 | 592k | { |
3942 | 592k | i1_mv_magnitude_shift = 0; |
3943 | | |
3944 | 592k | ps_mv = &s_zero_mv; |
3945 | 592k | pi1_ref_idx = &ps_ctxt->i1_default_ref_id; |
3946 | 592k | } |
3947 | | |
3948 | 2.03M | break; |
3949 | 0 | } |
3950 | 1.88M | case TOP: |
3951 | 1.88M | { |
3952 | 1.88M | if(ps_ctxt->u1_is_top_available) |
3953 | 1.88M | { |
3954 | 1.88M | i1_mv_magnitude_shift = -2; |
3955 | | |
3956 | 1.88M | i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row; |
3957 | | |
3958 | 1.88M | ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset; |
3959 | 1.88M | pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset; |
3960 | 1.88M | } |
3961 | 0 | else |
3962 | 0 | { |
3963 | 0 | i1_mv_magnitude_shift = 0; |
3964 | |
|
3965 | 0 | ps_mv = &s_zero_mv; |
3966 | 0 | pi1_ref_idx = &ps_ctxt->i1_default_ref_id; |
3967 | 0 | } |
3968 | | |
3969 | 1.88M | break; |
3970 | 0 | } |
3971 | 1.08M | case TOPRIGHT: |
3972 | 1.08M | { |
3973 | 1.08M | if(ps_ctxt->u1_is_topRight_available) |
3974 | 1.08M | { |
3975 | 1.08M | i1_mv_magnitude_shift = -2; |
3976 | | |
3977 | 1.08M | i4_offset += ps_curr_layer_mvbank->i4_num_mvs_per_blk * |
3978 | 1.08M | u1_search_blk_to_spatial_mvbank_blk_size_factor; |
3979 | 1.08M | i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row; |
3980 | | |
3981 | 1.08M | ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset; |
3982 | 1.08M | pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset; |
3983 | 1.08M | } |
3984 | 0 | else |
3985 | 0 | { |
3986 | 0 | i1_mv_magnitude_shift = 0; |
3987 | 0 | ps_mv = &s_zero_mv; |
3988 | 0 | pi1_ref_idx = &ps_ctxt->i1_default_ref_id; |
3989 | 0 | } |
3990 | | |
3991 | 1.08M | break; |
3992 | 0 | } |
3993 | 0 | default: |
3994 | 0 | { |
3995 | | /* AiyAiyYo!! */ |
3996 | 0 | ASSERT(0); |
3997 | 0 | } |
3998 | 7.62M | } |
3999 | | |
4000 | 7.62M | hme_search_node_populator( |
4001 | 7.62M | ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], i1_mv_magnitude_shift); |
4002 | | |
4003 | 7.62M | break; |
4004 | 7.62M | } |
4005 | 30.1M | case 4: |
4006 | 30.1M | { |
4007 | 30.1M | ASSERT(ILLUSORY_CANDIDATE != e_search_cand_type); |
4008 | 30.1M | ASSERT(ILLUSORY_LOCATION != e_search_cand_loc); |
4009 | | |
4010 | 30.1M | i1_result_id = MIN(i1_result_id, u1_num_results_in_coarse_mvbank - 1); |
4011 | | |
4012 | 30.1M | if(!(u2_is_offset_available & (1 << e_search_cand_loc))) |
4013 | 15.4M | { |
4014 | 15.4M | S32 x, y; |
4015 | | |
4016 | 15.4M | x = i4_pos_x + gai4_search_cand_location_to_x_offset_map[e_search_cand_loc]; |
4017 | 15.4M | y = i4_pos_y + gai4_search_cand_location_to_y_offset_map[e_search_cand_loc]; |
4018 | | |
4019 | | /* Safety check to avoid uninitialized access across temporal layers */ |
4020 | 15.4M | x = CLIP3(x, 0, (wd_c - blksize_p)); |
4021 | 15.4M | y = CLIP3(y, 0, (ht_c - blksize_p)); |
4022 | | |
4023 | | /* Project the positions to prev layer */ |
4024 | 15.4M | x = x >> blksize_p; |
4025 | 15.4M | y = y >> blksize_p; |
4026 | | |
4027 | 15.4M | ai4_cand_offsets[e_search_cand_loc] = |
4028 | 15.4M | (x * ps_coarse_layer_mvbank->i4_num_mvs_per_blk); |
4029 | 15.4M | ai4_cand_offsets[e_search_cand_loc] += |
4030 | 15.4M | (y * ps_coarse_layer_mvbank->i4_num_mvs_per_row); |
4031 | 15.4M | ai4_cand_offsets[e_search_cand_loc] += i4_init_offset_projected; |
4032 | | |
4033 | 15.4M | u2_is_offset_available |= (1 << e_search_cand_loc); |
4034 | 15.4M | } |
4035 | | |
4036 | 30.1M | ps_mv = |
4037 | 30.1M | ps_coarse_layer_mvbank->ps_mv + ai4_cand_offsets[e_search_cand_loc] + i1_result_id; |
4038 | 30.1M | pi1_ref_idx = ps_coarse_layer_mvbank->pi1_ref_idx + |
4039 | 30.1M | ai4_cand_offsets[e_search_cand_loc] + i1_result_id; |
4040 | | |
4041 | 30.1M | hme_search_node_populator(ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], 1); |
4042 | | |
4043 | 30.1M | break; |
4044 | 30.1M | } |
4045 | 0 | default: |
4046 | 0 | { |
4047 | | /* NoNoNoNoNooooooooNO! */ |
4048 | 0 | ASSERT(0); |
4049 | 0 | } |
4050 | 39.7M | } |
4051 | | |
4052 | 39.7M | ASSERT(ps_search_cands[i].ps_search_node->i1_ref_idx >= 0); |
4053 | 39.7M | ASSERT( |
4054 | 39.7M | !u1_pred_dir |
4055 | 39.7M | ? (ps_ctxt->pi4_ref_id_lc_to_l0_map[ps_search_cands[i].ps_search_node->i1_ref_idx] < |
4056 | 39.7M | i4_num_act_ref_l0) |
4057 | 39.7M | : (ps_ctxt->pi4_ref_id_lc_to_l1_map[ps_search_cands[i].ps_search_node->i1_ref_idx] < |
4058 | 39.7M | ps_ctxt->i4_num_act_ref_l1)); |
4059 | 39.7M | } |
4060 | | |
4061 | 1.89M | return i4_num_srch_cands; |
4062 | 1.89M | } |
4063 | | |
4064 | | void hme_mv_clipper( |
4065 | | hme_search_prms_t *ps_search_prms_blk, |
4066 | | S32 i4_num_srch_cands, |
4067 | | S08 i1_check_for_mult_refs, |
4068 | | U08 u1_fpel_refine_extent, |
4069 | | U08 u1_hpel_refine_extent, |
4070 | | U08 u1_qpel_refine_extent) |
4071 | 1.89M | { |
4072 | 1.89M | S32 candt; |
4073 | 1.89M | range_prms_t *ps_range_prms; |
4074 | | |
4075 | 41.6M | for(candt = 0; candt < i4_num_srch_cands; candt++) |
4076 | 39.7M | { |
4077 | 39.7M | search_node_t *ps_search_node; |
4078 | | |
4079 | 39.7M | ps_search_node = ps_search_prms_blk->ps_search_candts[candt].ps_search_node; |
4080 | 39.7M | ps_range_prms = ps_search_prms_blk->aps_mv_range[ps_search_node->i1_ref_idx]; |
4081 | | |
4082 | | /* Clip the motion vectors as well here since after clipping |
4083 | | two candidates can become same and they will be removed during deduplication */ |
4084 | 39.7M | CLIP_MV_WITHIN_RANGE( |
4085 | 39.7M | ps_search_node->ps_mv->i2_mvx, |
4086 | 39.7M | ps_search_node->ps_mv->i2_mvy, |
4087 | 39.7M | ps_range_prms, |
4088 | 39.7M | u1_fpel_refine_extent, |
4089 | 39.7M | u1_hpel_refine_extent, |
4090 | 39.7M | u1_qpel_refine_extent); |
4091 | 39.7M | } |
4092 | 1.89M | } |
4093 | | |
4094 | | void hme_init_pred_buf_info( |
4095 | | hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS], |
4096 | | hme_pred_buf_mngr_t *ps_buf_mngr, |
4097 | | U08 u1_pu1_wd, |
4098 | | U08 u1_pu1_ht, |
4099 | | PART_TYPE_T e_part_type) |
4100 | 19.5M | { |
4101 | 19.5M | U08 u1_pred_buf_array_id; |
4102 | | |
4103 | 19.5M | if(1 != ihevce_get_free_pred_buf_indices( |
4104 | 19.5M | &u1_pred_buf_array_id, &ps_buf_mngr->u4_pred_buf_usage_indicator, 1)) |
4105 | 0 | { |
4106 | 0 | ASSERT(0); |
4107 | 0 | } |
4108 | 19.5M | else |
4109 | 19.5M | { |
4110 | 19.5M | ps_info[0][0].i4_pred_stride = MAX_CU_SIZE; |
4111 | 19.5M | ps_info[0][0].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id]; |
4112 | 19.5M | ps_info[0][0].u1_pred_buf_array_id = u1_pred_buf_array_id; |
4113 | | |
4114 | 19.5M | if(PRT_2Nx2N != e_part_type) |
4115 | 4.92M | { |
4116 | 4.92M | ps_info[0][1].i4_pred_stride = MAX_CU_SIZE; |
4117 | 4.92M | ps_info[0][1].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id] + |
4118 | 4.92M | (gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]] |
4119 | 4.92M | ? u1_pu1_ht * ps_info[0][1].i4_pred_stride |
4120 | 4.92M | : u1_pu1_wd); |
4121 | 4.92M | ps_info[0][1].u1_pred_buf_array_id = u1_pred_buf_array_id; |
4122 | 4.92M | } |
4123 | 19.5M | } |
4124 | 19.5M | } |
4125 | | |
4126 | | void hme_debrief_bipred_eval( |
4127 | | part_type_results_t *ps_part_type_result, |
4128 | | hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS], |
4129 | | hme_pred_buf_mngr_t *ps_pred_buf_mngr, |
4130 | | U08 *pu1_allocated_pred_buf_array_indixes, |
4131 | | ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list |
4132 | | |
4133 | | ) |
4134 | 6.52M | { |
4135 | 6.52M | PART_TYPE_T e_part_type = (PART_TYPE_T)ps_part_type_result->u1_part_type; |
4136 | | |
4137 | 6.52M | U32 *pu4_pred_buf_usage_indicator = &ps_pred_buf_mngr->u4_pred_buf_usage_indicator; |
4138 | 6.52M | U08 u1_is_part_vertical = gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]]; |
4139 | | |
4140 | 6.52M | if(0 == ps_part_type_result->u1_part_type) |
4141 | 4.88M | { |
4142 | 4.88M | if(ps_part_type_result->as_pu_results->pu.b2_pred_mode == PRED_BI) |
4143 | 43.5k | { |
4144 | 43.5k | ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id); |
4145 | | |
4146 | 43.5k | ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred; |
4147 | 43.5k | ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride; |
4148 | | |
4149 | 43.5k | ihevce_set_pred_buf_as_free( |
4150 | 43.5k | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); |
4151 | | |
4152 | 43.5k | ihevce_set_pred_buf_as_free( |
4153 | 43.5k | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); |
4154 | 43.5k | } |
4155 | 4.84M | else |
4156 | 4.84M | { |
4157 | 4.84M | ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred; |
4158 | 4.84M | ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride; |
4159 | | |
4160 | 4.84M | ihevce_set_pred_buf_as_free( |
4161 | 4.84M | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]); |
4162 | | |
4163 | 4.84M | ihevce_set_pred_buf_as_free( |
4164 | 4.84M | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); |
4165 | | |
4166 | 4.84M | if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id) |
4167 | 4.66M | { |
4168 | 4.66M | ihevce_set_pred_buf_as_free( |
4169 | 4.66M | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); |
4170 | 4.66M | } |
4171 | 4.84M | } |
4172 | 4.88M | } |
4173 | 1.64M | else |
4174 | 1.64M | { |
4175 | 1.64M | U08 *pu1_src_pred; |
4176 | 1.64M | U08 *pu1_dst_pred; |
4177 | 1.64M | S32 i4_src_pred_stride; |
4178 | 1.64M | S32 i4_dst_pred_stride; |
4179 | | |
4180 | 1.64M | U08 u1_pu1_wd = (ps_part_type_result->as_pu_results[0].pu.b4_wd + 1) << 2; |
4181 | 1.64M | U08 u1_pu1_ht = (ps_part_type_result->as_pu_results[0].pu.b4_ht + 1) << 2; |
4182 | 1.64M | U08 u1_pu2_wd = (ps_part_type_result->as_pu_results[1].pu.b4_wd + 1) << 2; |
4183 | 1.64M | U08 u1_pu2_ht = (ps_part_type_result->as_pu_results[1].pu.b4_ht + 1) << 2; |
4184 | | |
4185 | 1.64M | U08 u1_condition_for_switch = |
4186 | 1.64M | (ps_part_type_result->as_pu_results[0].pu.b2_pred_mode == PRED_BI) | |
4187 | 1.64M | ((ps_part_type_result->as_pu_results[1].pu.b2_pred_mode == PRED_BI) << 1); |
4188 | | |
4189 | 1.64M | switch(u1_condition_for_switch) |
4190 | 1.64M | { |
4191 | 1.62M | case 0: |
4192 | 1.62M | { |
4193 | 1.62M | ps_part_type_result->pu1_pred = |
4194 | 1.62M | ps_pred_buf_mngr->apu1_pred_bufs[pu1_allocated_pred_buf_array_indixes[0]]; |
4195 | 1.62M | ps_part_type_result->i4_pred_stride = MAX_CU_SIZE; |
4196 | | |
4197 | 1.62M | ihevce_set_pred_buf_as_free( |
4198 | 1.62M | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]); |
4199 | | |
4200 | 1.62M | ihevce_set_pred_buf_as_free( |
4201 | 1.62M | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); |
4202 | | |
4203 | 1.62M | if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id) |
4204 | 1.52M | { |
4205 | 1.52M | pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred; |
4206 | 1.52M | pu1_dst_pred = ps_part_type_result->pu1_pred; |
4207 | 1.52M | i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride; |
4208 | 1.52M | i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; |
4209 | | |
4210 | 1.52M | ps_cmn_utils_optimised_function_list->pf_copy_2d( |
4211 | 1.52M | pu1_dst_pred, |
4212 | 1.52M | i4_dst_pred_stride, |
4213 | 1.52M | pu1_src_pred, |
4214 | 1.52M | i4_src_pred_stride, |
4215 | 1.52M | u1_pu1_wd, |
4216 | 1.52M | u1_pu1_ht); |
4217 | 1.52M | } |
4218 | | |
4219 | 1.62M | if(UCHAR_MAX == ps_pred_buf_info[0][1].u1_pred_buf_array_id) |
4220 | 1.55M | { |
4221 | 1.55M | pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred; |
4222 | 1.55M | pu1_dst_pred = ps_part_type_result->pu1_pred + |
4223 | 1.55M | (u1_is_part_vertical |
4224 | 1.55M | ? u1_pu1_ht * ps_part_type_result->i4_pred_stride |
4225 | 1.55M | : u1_pu1_wd); |
4226 | 1.55M | i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride; |
4227 | 1.55M | i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; |
4228 | | |
4229 | 1.55M | ps_cmn_utils_optimised_function_list->pf_copy_2d( |
4230 | 1.55M | pu1_dst_pred, |
4231 | 1.55M | i4_dst_pred_stride, |
4232 | 1.55M | pu1_src_pred, |
4233 | 1.55M | i4_src_pred_stride, |
4234 | 1.55M | u1_pu2_wd, |
4235 | 1.55M | u1_pu2_ht); |
4236 | 1.55M | } |
4237 | | |
4238 | 1.62M | break; |
4239 | 0 | } |
4240 | 9.54k | case 1: |
4241 | 9.54k | { |
4242 | 9.54k | ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id); |
4243 | | |
4244 | 9.54k | ihevce_set_pred_buf_as_free( |
4245 | 9.54k | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); |
4246 | | |
4247 | | /* Copy PU1 pred into PU2's pred buf */ |
4248 | 9.54k | if(((u1_pu1_ht < u1_pu2_ht) || (u1_pu1_wd < u1_pu2_wd)) && |
4249 | 9.54k | (UCHAR_MAX != ps_pred_buf_info[0][1].u1_pred_buf_array_id)) |
4250 | 1.31k | { |
4251 | 1.31k | ps_part_type_result->pu1_pred = |
4252 | 1.31k | ps_pred_buf_info[0][1].pu1_pred - |
4253 | 1.31k | (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[0][1].i4_pred_stride |
4254 | 1.31k | : u1_pu1_wd); |
4255 | 1.31k | ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride; |
4256 | | |
4257 | 1.31k | ihevce_set_pred_buf_as_free( |
4258 | 1.31k | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]); |
4259 | | |
4260 | 1.31k | pu1_src_pred = ps_pred_buf_info[2][0].pu1_pred; |
4261 | 1.31k | pu1_dst_pred = ps_part_type_result->pu1_pred; |
4262 | 1.31k | i4_src_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride; |
4263 | 1.31k | i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; |
4264 | | |
4265 | 1.31k | ps_cmn_utils_optimised_function_list->pf_copy_2d( |
4266 | 1.31k | pu1_dst_pred, |
4267 | 1.31k | i4_dst_pred_stride, |
4268 | 1.31k | pu1_src_pred, |
4269 | 1.31k | i4_src_pred_stride, |
4270 | 1.31k | u1_pu1_wd, |
4271 | 1.31k | u1_pu1_ht); |
4272 | 1.31k | } |
4273 | 8.23k | else |
4274 | 8.23k | { |
4275 | 8.23k | ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred; |
4276 | 8.23k | ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride; |
4277 | | |
4278 | 8.23k | ihevce_set_pred_buf_as_free( |
4279 | 8.23k | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); |
4280 | | |
4281 | 8.23k | pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred; |
4282 | 8.23k | pu1_dst_pred = ps_part_type_result->pu1_pred; |
4283 | 8.23k | i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride; |
4284 | 8.23k | i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; |
4285 | | |
4286 | 8.23k | ps_cmn_utils_optimised_function_list->pf_copy_2d( |
4287 | 8.23k | pu1_dst_pred, |
4288 | 8.23k | i4_dst_pred_stride, |
4289 | 8.23k | pu1_src_pred, |
4290 | 8.23k | i4_src_pred_stride, |
4291 | 8.23k | u1_pu2_wd, |
4292 | 8.23k | u1_pu2_ht); |
4293 | 8.23k | } |
4294 | | |
4295 | 9.54k | break; |
4296 | 9.54k | } |
4297 | 3.12k | case 2: |
4298 | 3.12k | { |
4299 | 3.12k | ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id); |
4300 | | |
4301 | 3.12k | ihevce_set_pred_buf_as_free( |
4302 | 3.12k | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]); |
4303 | | |
4304 | | /* Copy PU2 pred into PU1's pred buf */ |
4305 | 3.12k | if(((u1_pu1_ht > u1_pu2_ht) || (u1_pu1_wd > u1_pu2_wd)) && |
4306 | 3.12k | (UCHAR_MAX != ps_pred_buf_info[0][0].u1_pred_buf_array_id)) |
4307 | 151 | { |
4308 | 151 | ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred; |
4309 | 151 | ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride; |
4310 | | |
4311 | 151 | ihevce_set_pred_buf_as_free( |
4312 | 151 | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]); |
4313 | | |
4314 | 151 | pu1_src_pred = ps_pred_buf_info[2][1].pu1_pred; |
4315 | 151 | pu1_dst_pred = ps_part_type_result->pu1_pred + |
4316 | 151 | (u1_is_part_vertical |
4317 | 151 | ? u1_pu1_ht * ps_part_type_result->i4_pred_stride |
4318 | 151 | : u1_pu1_wd); |
4319 | 151 | i4_src_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride; |
4320 | 151 | i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; |
4321 | | |
4322 | 151 | ps_cmn_utils_optimised_function_list->pf_copy_2d( |
4323 | 151 | pu1_dst_pred, |
4324 | 151 | i4_dst_pred_stride, |
4325 | 151 | pu1_src_pred, |
4326 | 151 | i4_src_pred_stride, |
4327 | 151 | u1_pu2_wd, |
4328 | 151 | u1_pu2_ht); |
4329 | 151 | } |
4330 | 2.97k | else |
4331 | 2.97k | { |
4332 | 2.97k | ps_part_type_result->pu1_pred = |
4333 | 2.97k | ps_pred_buf_info[2][1].pu1_pred - |
4334 | 2.97k | (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[2][1].i4_pred_stride |
4335 | 2.97k | : u1_pu1_wd); |
4336 | 2.97k | ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride; |
4337 | | |
4338 | 2.97k | ihevce_set_pred_buf_as_free( |
4339 | 2.97k | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); |
4340 | | |
4341 | 2.97k | pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred; |
4342 | 2.97k | pu1_dst_pred = ps_part_type_result->pu1_pred; |
4343 | 2.97k | i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride; |
4344 | 2.97k | i4_dst_pred_stride = ps_part_type_result->i4_pred_stride; |
4345 | | |
4346 | 2.97k | ps_cmn_utils_optimised_function_list->pf_copy_2d( |
4347 | 2.97k | pu1_dst_pred, |
4348 | 2.97k | i4_dst_pred_stride, |
4349 | 2.97k | pu1_src_pred, |
4350 | 2.97k | i4_src_pred_stride, |
4351 | 2.97k | u1_pu1_wd, |
4352 | 2.97k | u1_pu1_ht); |
4353 | 2.97k | } |
4354 | | |
4355 | 3.12k | break; |
4356 | 3.12k | } |
4357 | 4.97k | case 3: |
4358 | 4.97k | { |
4359 | 4.97k | ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id); |
4360 | 4.97k | ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id); |
4361 | 4.97k | ASSERT( |
4362 | 4.97k | ps_pred_buf_info[2][1].u1_pred_buf_array_id == |
4363 | 4.97k | ps_pred_buf_info[2][0].u1_pred_buf_array_id); |
4364 | | |
4365 | 4.97k | ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred; |
4366 | 4.97k | ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride; |
4367 | | |
4368 | 4.97k | ihevce_set_pred_buf_as_free( |
4369 | 4.97k | pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]); |
4370 | | |
4371 | 4.97k | break; |
4372 | 4.97k | } |
4373 | 1.64M | } |
4374 | 1.64M | } |
4375 | 6.52M | } |
4376 | | |
4377 | | U08 hme_decide_search_candidate_priority_in_l1_and_l2_me( |
4378 | | SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset) |
4379 | 2.32M | { |
4380 | 2.32M | U08 u1_priority_val = |
4381 | 2.32M | gau1_search_cand_priority_in_l1_and_l2_me[e_quality_preset >= ME_MEDIUM_SPEED][e_cand_type]; |
4382 | | |
4383 | 2.32M | if(UCHAR_MAX == u1_priority_val) |
4384 | 0 | { |
4385 | 0 | ASSERT(0); |
4386 | 0 | } |
4387 | | |
4388 | 2.32M | ASSERT(u1_priority_val <= MAX_INIT_CANDTS); |
4389 | | |
4390 | 2.32M | return u1_priority_val; |
4391 | 2.32M | } |
4392 | | |
4393 | | U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index) |
4394 | 633k | { |
4395 | 633k | U08 u1_priority_val = gau1_search_cand_priority_in_l0_me[u1_index][e_cand_type]; |
4396 | | |
4397 | 633k | if(UCHAR_MAX == u1_priority_val) |
4398 | 0 | { |
4399 | 0 | ASSERT(0); |
4400 | 0 | } |
4401 | | |
4402 | 633k | ASSERT(u1_priority_val <= MAX_INIT_CANDTS); |
4403 | | |
4404 | 633k | return u1_priority_val; |
4405 | 633k | } |
4406 | | |
4407 | | void hme_search_cand_data_init( |
4408 | | S32 *pi4_id_Z, |
4409 | | S32 *pi4_id_coloc, |
4410 | | S32 *pi4_num_coloc_cands, |
4411 | | U08 *pu1_search_candidate_list_index, |
4412 | | S32 i4_num_act_ref_l0, |
4413 | | S32 i4_num_act_ref_l1, |
4414 | | U08 u1_is_bidir_enabled, |
4415 | | U08 u1_4x4_blk_in_l1me) |
4416 | 85.3k | { |
4417 | 85.3k | S32 i, j; |
4418 | 85.3k | S32 i4_num_coloc_cands; |
4419 | | |
4420 | 85.3k | U08 u1_search_candidate_list_index; |
4421 | | |
4422 | 85.3k | if(!u1_is_bidir_enabled && !u1_4x4_blk_in_l1me) |
4423 | 35.3k | { |
4424 | 35.3k | S32 i; |
4425 | | |
4426 | 35.3k | u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2; |
4427 | 35.3k | i4_num_coloc_cands = i4_num_act_ref_l0 * 2; |
4428 | | |
4429 | 35.3k | switch(i4_num_act_ref_l0) |
4430 | 35.3k | { |
4431 | 19.3k | case 1: |
4432 | 19.3k | { |
4433 | 58.0k | for(i = 0; i < 2; i++) |
4434 | 38.7k | { |
4435 | 38.7k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4436 | 38.7k | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4437 | 38.7k | u1_search_candidate_list_index); |
4438 | 38.7k | } |
4439 | | |
4440 | 19.3k | break; |
4441 | 0 | } |
4442 | 15.8k | case 2: |
4443 | 15.8k | { |
4444 | 79.4k | for(i = 0; i < 4; i++) |
4445 | 63.5k | { |
4446 | 63.5k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4447 | 63.5k | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4448 | 63.5k | u1_search_candidate_list_index); |
4449 | 63.5k | } |
4450 | | |
4451 | 15.8k | break; |
4452 | 0 | } |
4453 | 35 | case 3: |
4454 | 35 | { |
4455 | 245 | for(i = 0; i < 6; i++) |
4456 | 210 | { |
4457 | 210 | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4458 | 210 | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4459 | 210 | u1_search_candidate_list_index); |
4460 | 210 | } |
4461 | | |
4462 | 35 | break; |
4463 | 0 | } |
4464 | 113 | case 4: |
4465 | 113 | { |
4466 | 1.01k | for(i = 0; i < 8; i++) |
4467 | 904 | { |
4468 | 904 | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4469 | 904 | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4470 | 904 | u1_search_candidate_list_index); |
4471 | 904 | } |
4472 | | |
4473 | 113 | break; |
4474 | 0 | } |
4475 | 0 | default: |
4476 | 0 | { |
4477 | 0 | ASSERT(0); |
4478 | 0 | } |
4479 | 35.3k | } |
4480 | | |
4481 | 35.3k | *pi4_num_coloc_cands = i4_num_coloc_cands; |
4482 | 35.3k | *pu1_search_candidate_list_index = u1_search_candidate_list_index; |
4483 | 35.3k | } |
4484 | 49.9k | else if(!u1_is_bidir_enabled && u1_4x4_blk_in_l1me) |
4485 | 31.4k | { |
4486 | 31.4k | S32 i; |
4487 | | |
4488 | 31.4k | i4_num_coloc_cands = i4_num_act_ref_l0 * 2; |
4489 | 31.4k | u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2 + 1; |
4490 | | |
4491 | 31.4k | switch(i4_num_act_ref_l0) |
4492 | 31.4k | { |
4493 | 7.65k | case 1: |
4494 | 7.65k | { |
4495 | 22.9k | for(i = 0; i < 2; i++) |
4496 | 15.3k | { |
4497 | 15.3k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4498 | 15.3k | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4499 | 15.3k | u1_search_candidate_list_index); |
4500 | 15.3k | } |
4501 | | |
4502 | 7.65k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4503 | 7.65k | PROJECTED_COLOC_TR0, u1_search_candidate_list_index); |
4504 | | |
4505 | 7.65k | pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( |
4506 | 7.65k | PROJECTED_COLOC_BL0, u1_search_candidate_list_index); |
4507 | | |
4508 | 7.65k | pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( |
4509 | 7.65k | PROJECTED_COLOC_BR0, u1_search_candidate_list_index); |
4510 | | |
4511 | 7.65k | i4_num_coloc_cands += 3; |
4512 | | |
4513 | 7.65k | break; |
4514 | 0 | } |
4515 | 10.7k | case 2: |
4516 | 10.7k | { |
4517 | 53.8k | for(i = 0; i < 4; i++) |
4518 | 43.1k | { |
4519 | 43.1k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4520 | 43.1k | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4521 | 43.1k | u1_search_candidate_list_index); |
4522 | 43.1k | } |
4523 | | |
4524 | 10.7k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4525 | 10.7k | PROJECTED_COLOC_TR0, u1_search_candidate_list_index); |
4526 | | |
4527 | 10.7k | pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( |
4528 | 10.7k | PROJECTED_COLOC_BL0, u1_search_candidate_list_index); |
4529 | | |
4530 | 10.7k | pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( |
4531 | 10.7k | PROJECTED_COLOC_BR0, u1_search_candidate_list_index); |
4532 | | |
4533 | 10.7k | pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me( |
4534 | 10.7k | PROJECTED_COLOC_TR1, u1_search_candidate_list_index); |
4535 | | |
4536 | 10.7k | pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me( |
4537 | 10.7k | PROJECTED_COLOC_BL1, u1_search_candidate_list_index); |
4538 | | |
4539 | 10.7k | pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me( |
4540 | 10.7k | PROJECTED_COLOC_BR1, u1_search_candidate_list_index); |
4541 | | |
4542 | 10.7k | i4_num_coloc_cands += 6; |
4543 | | |
4544 | 10.7k | break; |
4545 | 0 | } |
4546 | 3.05k | case 3: |
4547 | 3.05k | { |
4548 | 21.3k | for(i = 0; i < 6; i++) |
4549 | 18.3k | { |
4550 | 18.3k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4551 | 18.3k | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4552 | 18.3k | u1_search_candidate_list_index); |
4553 | 18.3k | } |
4554 | | |
4555 | 3.05k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4556 | 3.05k | PROJECTED_COLOC_TR0, u1_search_candidate_list_index); |
4557 | | |
4558 | 3.05k | pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( |
4559 | 3.05k | PROJECTED_COLOC_BL0, u1_search_candidate_list_index); |
4560 | | |
4561 | 3.05k | pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( |
4562 | 3.05k | PROJECTED_COLOC_BR0, u1_search_candidate_list_index); |
4563 | | |
4564 | 3.05k | pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me( |
4565 | 3.05k | PROJECTED_COLOC_TR1, u1_search_candidate_list_index); |
4566 | | |
4567 | 3.05k | pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me( |
4568 | 3.05k | PROJECTED_COLOC_BL1, u1_search_candidate_list_index); |
4569 | | |
4570 | 3.05k | pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me( |
4571 | 3.05k | PROJECTED_COLOC_BR1, u1_search_candidate_list_index); |
4572 | | |
4573 | 3.05k | i4_num_coloc_cands += 6; |
4574 | | |
4575 | 3.05k | break; |
4576 | 0 | } |
4577 | 9.93k | case 4: |
4578 | 9.93k | { |
4579 | 89.4k | for(i = 0; i < 8; i++) |
4580 | 79.4k | { |
4581 | 79.4k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4582 | 79.4k | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4583 | 79.4k | u1_search_candidate_list_index); |
4584 | 79.4k | } |
4585 | | |
4586 | 9.93k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4587 | 9.93k | PROJECTED_COLOC_TR0, u1_search_candidate_list_index); |
4588 | | |
4589 | 9.93k | pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( |
4590 | 9.93k | PROJECTED_COLOC_BL0, u1_search_candidate_list_index); |
4591 | | |
4592 | 9.93k | pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( |
4593 | 9.93k | PROJECTED_COLOC_BR0, u1_search_candidate_list_index); |
4594 | | |
4595 | 9.93k | pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me( |
4596 | 9.93k | PROJECTED_COLOC_TR1, u1_search_candidate_list_index); |
4597 | | |
4598 | 9.93k | pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me( |
4599 | 9.93k | PROJECTED_COLOC_BL1, u1_search_candidate_list_index); |
4600 | | |
4601 | 9.93k | pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me( |
4602 | 9.93k | PROJECTED_COLOC_BR1, u1_search_candidate_list_index); |
4603 | | |
4604 | 9.93k | i4_num_coloc_cands += 6; |
4605 | | |
4606 | 9.93k | break; |
4607 | 0 | } |
4608 | 0 | default: |
4609 | 0 | { |
4610 | 0 | ASSERT(0); |
4611 | 0 | } |
4612 | 31.4k | } |
4613 | | |
4614 | 31.4k | *pi4_num_coloc_cands = i4_num_coloc_cands; |
4615 | 31.4k | *pu1_search_candidate_list_index = u1_search_candidate_list_index; |
4616 | 31.4k | } |
4617 | 18.5k | else |
4618 | 18.5k | { |
4619 | | /* The variable 'u1_search_candidate_list_index' is hardcoded */ |
4620 | | /* to 10 and 11 respectively. But, these values are not returned */ |
4621 | | /* by this function since the actual values are dependent on */ |
4622 | | /* the number of refs in L0 and L1 respectively */ |
4623 | | /* Hence, the actual return values are being recomputed */ |
4624 | | /* in the latter part of this block */ |
4625 | | |
4626 | 18.5k | if(!u1_4x4_blk_in_l1me) |
4627 | 8.17k | { |
4628 | 8.17k | u1_search_candidate_list_index = 10; |
4629 | | |
4630 | 8.17k | i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1))); |
4631 | | |
4632 | 24.5k | for(i = 0; i < i4_num_coloc_cands; i++) |
4633 | 16.3k | { |
4634 | 16.3k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4635 | 16.3k | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4636 | 16.3k | u1_search_candidate_list_index); |
4637 | 16.3k | } |
4638 | 8.17k | } |
4639 | 10.3k | else |
4640 | 10.3k | { |
4641 | 10.3k | u1_search_candidate_list_index = 11; |
4642 | | |
4643 | 10.3k | i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1))); |
4644 | | |
4645 | 45.7k | for(i = 0; i < i4_num_coloc_cands; i++) |
4646 | 35.3k | { |
4647 | 35.3k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4648 | 35.3k | (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i), |
4649 | 35.3k | u1_search_candidate_list_index); |
4650 | 35.3k | } |
4651 | | |
4652 | 10.3k | pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me( |
4653 | 10.3k | PROJECTED_COLOC_TR0, u1_search_candidate_list_index); |
4654 | | |
4655 | 10.3k | pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me( |
4656 | 10.3k | PROJECTED_COLOC_BL0, u1_search_candidate_list_index); |
4657 | | |
4658 | 10.3k | pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me( |
4659 | 10.3k | PROJECTED_COLOC_BR0, u1_search_candidate_list_index); |
4660 | 10.3k | } |
4661 | | |
4662 | 55.6k | for(j = 0; j < 2; j++) |
4663 | 37.1k | { |
4664 | 37.1k | if(0 == j) |
4665 | 18.5k | { |
4666 | 18.5k | pu1_search_candidate_list_index[j] = |
4667 | 18.5k | 8 + ((i4_num_act_ref_l0 > 1) * 2) + u1_4x4_blk_in_l1me; |
4668 | 18.5k | pi4_num_coloc_cands[j] = |
4669 | 18.5k | (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l0 > 1) * 2); |
4670 | 18.5k | } |
4671 | 18.5k | else |
4672 | 18.5k | { |
4673 | 18.5k | pu1_search_candidate_list_index[j] = |
4674 | 18.5k | 8 + ((i4_num_act_ref_l1 > 1) * 2) + u1_4x4_blk_in_l1me; |
4675 | 18.5k | pi4_num_coloc_cands[j] = |
4676 | 18.5k | (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l1 > 1) * 2); |
4677 | 18.5k | } |
4678 | 37.1k | } |
4679 | 18.5k | } |
4680 | | |
4681 | 85.3k | if(i4_num_act_ref_l0 || i4_num_act_ref_l1) |
4682 | 85.3k | { |
4683 | 85.3k | pi4_id_Z[0] = hme_decide_search_candidate_priority_in_l0_me( |
4684 | 85.3k | (SEARCH_CANDIDATE_TYPE_T)ZERO_MV, pu1_search_candidate_list_index[0]); |
4685 | 85.3k | } |
4686 | | |
4687 | 85.3k | if((i4_num_act_ref_l0 > 1) && !u1_is_bidir_enabled) |
4688 | 39.8k | { |
4689 | 39.8k | pi4_id_Z[1] = hme_decide_search_candidate_priority_in_l0_me( |
4690 | 39.8k | (SEARCH_CANDIDATE_TYPE_T)ZERO_MV_ALTREF, pu1_search_candidate_list_index[0]); |
4691 | 39.8k | } |
4692 | 85.3k | } |
4693 | | |
4694 | | static U08 |
4695 | | hme_determine_base_block_size(S32 *pi4_valid_part_array, S32 i4_num_valid_parts, U08 u1_cu_size) |
4696 | 0 | { |
4697 | 0 | ASSERT(i4_num_valid_parts > 0); |
4698 | | |
4699 | 0 | if(1 == i4_num_valid_parts) |
4700 | 0 | { |
4701 | 0 | ASSERT(pi4_valid_part_array[i4_num_valid_parts - 1] == PART_ID_2Nx2N); |
4702 | | |
4703 | 0 | return u1_cu_size; |
4704 | 0 | } |
4705 | 0 | else |
4706 | 0 | { |
4707 | 0 | if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_NxN_BR) |
4708 | 0 | { |
4709 | 0 | return u1_cu_size / 2; |
4710 | 0 | } |
4711 | 0 | else if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_nRx2N_R) |
4712 | 0 | { |
4713 | 0 | return u1_cu_size / 4; |
4714 | 0 | } |
4715 | 0 | } |
4716 | | |
4717 | 0 | return u1_cu_size / 4; |
4718 | 0 | } |
4719 | | |
4720 | | static U32 hme_compute_variance_of_pu_from_base_blocks( |
4721 | | ULWORD64 *pu8_SigmaX, |
4722 | | ULWORD64 *pu8_SigmaXSquared, |
4723 | | U08 u1_cu_size, |
4724 | | U08 u1_base_block_size, |
4725 | | S32 i4_part_id) |
4726 | 0 | { |
4727 | 0 | U08 i, j; |
4728 | 0 | ULWORD64 u8_final_variance; |
4729 | |
|
4730 | 0 | U08 u1_part_dimension_multiplier = (u1_cu_size >> 4); |
4731 | 0 | S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier; |
4732 | 0 | S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier; |
4733 | 0 | U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size; |
4734 | 0 | U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size; |
4735 | 0 | U08 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size; |
4736 | 0 | U08 u1_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column); |
4737 | 0 | U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size; |
4738 | 0 | ULWORD64 u8_final_SigmaXSquared = 0; |
4739 | 0 | ULWORD64 u8_final_SigmaX = 0; |
4740 | |
|
4741 | 0 | if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN) |
4742 | 0 | { |
4743 | 0 | U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id] |
4744 | 0 | ? (gai1_is_part_vertical[i4_part_id] |
4745 | 0 | ? 0 |
4746 | 0 | : (u1_cu_size - i4_part_wd) / u1_base_block_size) |
4747 | 0 | : 0; |
4748 | 0 | U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id] |
4749 | 0 | ? (gai1_is_part_vertical[i4_part_id] |
4750 | 0 | ? (u1_cu_size - i4_part_ht) / u1_base_block_size |
4751 | 0 | : 0) |
4752 | 0 | : 0; |
4753 | 0 | U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row; |
4754 | 0 | U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column; |
4755 | |
|
4756 | 0 | for(i = u1_row_start_index; i < u1_row_end_index; i++) |
4757 | 0 | { |
4758 | 0 | for(j = u1_column_start_index; j < u1_column_end_index; j++) |
4759 | 0 | { |
4760 | 0 | u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row]; |
4761 | 0 | u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row]; |
4762 | 0 | } |
4763 | 0 | } |
4764 | |
|
4765 | 0 | u8_final_variance = |
4766 | 0 | u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared; |
4767 | 0 | u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX; |
4768 | 0 | u8_final_variance += |
4769 | 0 | ((u1_num_base_blocks * u4_num_pixels_in_base_block) * |
4770 | 0 | (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2); |
4771 | 0 | u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) * |
4772 | 0 | (u1_num_base_blocks * u4_num_pixels_in_base_block); |
4773 | |
|
4774 | 0 | ASSERT(u8_final_variance <= UINT_MAX); |
4775 | 0 | } |
4776 | 0 | else |
4777 | 0 | { |
4778 | 0 | U08 u1_row_start_index; |
4779 | 0 | U08 u1_column_start_index; |
4780 | 0 | U08 u1_row_end_index; |
4781 | 0 | U08 u1_column_end_index; |
4782 | |
|
4783 | 0 | switch(gau1_part_id_to_part_num[i4_part_id]) |
4784 | 0 | { |
4785 | 0 | case 0: |
4786 | 0 | { |
4787 | 0 | u1_row_start_index = 0; |
4788 | 0 | u1_column_start_index = 0; |
4789 | |
|
4790 | 0 | break; |
4791 | 0 | } |
4792 | 0 | case 1: |
4793 | 0 | { |
4794 | 0 | u1_row_start_index = 0; |
4795 | 0 | u1_column_start_index = u1_num_base_blocks_in_pu_row; |
4796 | |
|
4797 | 0 | break; |
4798 | 0 | } |
4799 | 0 | case 2: |
4800 | 0 | { |
4801 | 0 | u1_row_start_index = u1_num_base_blocks_in_pu_column; |
4802 | 0 | u1_column_start_index = 0; |
4803 | |
|
4804 | 0 | break; |
4805 | 0 | } |
4806 | 0 | case 3: |
4807 | 0 | { |
4808 | 0 | u1_row_start_index = u1_num_base_blocks_in_pu_column; |
4809 | 0 | u1_column_start_index = u1_num_base_blocks_in_pu_row; |
4810 | |
|
4811 | 0 | break; |
4812 | 0 | } |
4813 | 0 | } |
4814 | | |
4815 | 0 | u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row; |
4816 | 0 | u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column; |
4817 | |
|
4818 | 0 | for(i = u1_row_start_index; i < u1_row_end_index; i++) |
4819 | 0 | { |
4820 | 0 | for(j = u1_column_start_index; j < u1_column_end_index; j++) |
4821 | 0 | { |
4822 | 0 | u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row]; |
4823 | 0 | u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row]; |
4824 | 0 | } |
4825 | 0 | } |
4826 | |
|
4827 | 0 | u8_final_variance = |
4828 | 0 | u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared; |
4829 | 0 | u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX; |
4830 | 0 | u8_final_variance += |
4831 | 0 | ((u1_num_base_blocks * u4_num_pixels_in_base_block) * |
4832 | 0 | (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2); |
4833 | 0 | u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) * |
4834 | 0 | (u1_num_base_blocks * u4_num_pixels_in_base_block); |
4835 | |
|
4836 | 0 | ASSERT(u8_final_variance <= UINT_MAX); |
4837 | 0 | } |
4838 | | |
4839 | 0 | return u8_final_variance; |
4840 | 0 | } |
4841 | | |
4842 | | void hme_compute_variance_for_all_parts( |
4843 | | U08 *pu1_data, |
4844 | | S32 i4_data_stride, |
4845 | | S32 *pi4_valid_part_array, |
4846 | | U32 *pu4_variance, |
4847 | | S32 i4_num_valid_parts, |
4848 | | U08 u1_cu_size) |
4849 | 0 | { |
4850 | 0 | ULWORD64 au8_SigmaX[16]; |
4851 | 0 | ULWORD64 au8_SigmaXSquared[16]; |
4852 | 0 | U08 i, j, k, l; |
4853 | 0 | U08 u1_base_block_size; |
4854 | 0 | U08 u1_num_base_blocks_in_cu_row; |
4855 | 0 | U08 u1_num_base_blocks_in_cu_column; |
4856 | |
|
4857 | 0 | u1_base_block_size = |
4858 | 0 | hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size); |
4859 | |
|
4860 | 0 | u1_num_base_blocks_in_cu_row = u1_num_base_blocks_in_cu_column = |
4861 | 0 | u1_cu_size / u1_base_block_size; |
4862 | |
|
4863 | 0 | ASSERT(u1_num_base_blocks_in_cu_row <= 4); |
4864 | | |
4865 | 0 | for(i = 0; i < u1_num_base_blocks_in_cu_column; i++) |
4866 | 0 | { |
4867 | 0 | for(j = 0; j < u1_num_base_blocks_in_cu_row; j++) |
4868 | 0 | { |
4869 | 0 | U08 *pu1_buf = |
4870 | 0 | pu1_data + (u1_base_block_size * j) + (u1_base_block_size * i * i4_data_stride); |
4871 | |
|
4872 | 0 | au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] = 0; |
4873 | 0 | au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] = 0; |
4874 | |
|
4875 | 0 | for(k = 0; k < u1_base_block_size; k++) |
4876 | 0 | { |
4877 | 0 | for(l = 0; l < u1_base_block_size; l++) |
4878 | 0 | { |
4879 | 0 | au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] += |
4880 | 0 | pu1_buf[l + k * i4_data_stride]; |
4881 | 0 | au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] += |
4882 | 0 | pu1_buf[l + k * i4_data_stride] * pu1_buf[l + k * i4_data_stride]; |
4883 | 0 | } |
4884 | 0 | } |
4885 | 0 | } |
4886 | 0 | } |
4887 | |
|
4888 | 0 | for(i = 0; i < i4_num_valid_parts; i++) |
4889 | 0 | { |
4890 | 0 | pu4_variance[pi4_valid_part_array[i]] = hme_compute_variance_of_pu_from_base_blocks( |
4891 | 0 | au8_SigmaX, au8_SigmaXSquared, u1_cu_size, u1_base_block_size, pi4_valid_part_array[i]); |
4892 | 0 | } |
4893 | 0 | } |
4894 | | |
4895 | | void hme_compute_final_sigma_of_pu_from_base_blocks( |
4896 | | U32 *pu4_SigmaX, |
4897 | | U32 *pu4_SigmaXSquared, |
4898 | | ULWORD64 *pu8_final_sigmaX, |
4899 | | ULWORD64 *pu8_final_sigmaX_Squared, |
4900 | | U08 u1_cu_size, |
4901 | | U08 u1_base_block_size, |
4902 | | S32 i4_part_id, |
4903 | | U08 u1_base_blk_array_stride) |
4904 | 0 | { |
4905 | 0 | U08 i, j; |
4906 | | //U08 u1_num_base_blocks_in_cu_row; |
4907 | |
|
4908 | 0 | U08 u1_part_dimension_multiplier = (u1_cu_size >> 4); |
4909 | 0 | S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier; |
4910 | 0 | S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier; |
4911 | 0 | U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size; |
4912 | 0 | U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size; |
4913 | 0 | U16 u2_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column); |
4914 | 0 | U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size; |
4915 | 0 | U32 u4_N = (u2_num_base_blocks * u4_num_pixels_in_base_block); |
4916 | | |
4917 | | /*if (u1_is_for_src) |
4918 | | { |
4919 | | u1_num_base_blocks_in_cu_row = 16; |
4920 | | } |
4921 | | else |
4922 | | { |
4923 | | u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size; |
4924 | | }*/ |
4925 | |
|
4926 | 0 | pu8_final_sigmaX[i4_part_id] = 0; |
4927 | 0 | pu8_final_sigmaX_Squared[i4_part_id] = 0; |
4928 | |
|
4929 | 0 | if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN) |
4930 | 0 | { |
4931 | 0 | U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id] |
4932 | 0 | ? (gai1_is_part_vertical[i4_part_id] |
4933 | 0 | ? 0 |
4934 | 0 | : (u1_cu_size - i4_part_wd) / u1_base_block_size) |
4935 | 0 | : 0; |
4936 | 0 | U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id] |
4937 | 0 | ? (gai1_is_part_vertical[i4_part_id] |
4938 | 0 | ? (u1_cu_size - i4_part_ht) / u1_base_block_size |
4939 | 0 | : 0) |
4940 | 0 | : 0; |
4941 | 0 | U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row; |
4942 | 0 | U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column; |
4943 | |
|
4944 | 0 | for(i = u1_row_start_index; i < u1_row_end_index; i++) |
4945 | 0 | { |
4946 | 0 | for(j = u1_column_start_index; j < u1_column_end_index; j++) |
4947 | 0 | { |
4948 | 0 | pu8_final_sigmaX_Squared[i4_part_id] += |
4949 | 0 | pu4_SigmaXSquared[j + i * u1_base_blk_array_stride]; |
4950 | 0 | pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride]; |
4951 | 0 | } |
4952 | 0 | } |
4953 | 0 | } |
4954 | 0 | else |
4955 | 0 | { |
4956 | 0 | U08 u1_row_start_index; |
4957 | 0 | U08 u1_column_start_index; |
4958 | 0 | U08 u1_row_end_index; |
4959 | 0 | U08 u1_column_end_index; |
4960 | |
|
4961 | 0 | switch(gau1_part_id_to_part_num[i4_part_id]) |
4962 | 0 | { |
4963 | 0 | case 0: |
4964 | 0 | { |
4965 | 0 | u1_row_start_index = 0; |
4966 | 0 | u1_column_start_index = 0; |
4967 | |
|
4968 | 0 | break; |
4969 | 0 | } |
4970 | 0 | case 1: |
4971 | 0 | { |
4972 | 0 | u1_row_start_index = 0; |
4973 | 0 | u1_column_start_index = u1_num_base_blocks_in_pu_row; |
4974 | |
|
4975 | 0 | break; |
4976 | 0 | } |
4977 | 0 | case 2: |
4978 | 0 | { |
4979 | 0 | u1_row_start_index = u1_num_base_blocks_in_pu_column; |
4980 | 0 | u1_column_start_index = 0; |
4981 | |
|
4982 | 0 | break; |
4983 | 0 | } |
4984 | 0 | case 3: |
4985 | 0 | { |
4986 | 0 | u1_row_start_index = u1_num_base_blocks_in_pu_column; |
4987 | 0 | u1_column_start_index = u1_num_base_blocks_in_pu_row; |
4988 | |
|
4989 | 0 | break; |
4990 | 0 | } |
4991 | 0 | } |
4992 | | |
4993 | 0 | u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row; |
4994 | 0 | u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column; |
4995 | |
|
4996 | 0 | for(i = u1_row_start_index; i < u1_row_end_index; i++) |
4997 | 0 | { |
4998 | 0 | for(j = u1_column_start_index; j < u1_column_end_index; j++) |
4999 | 0 | { |
5000 | 0 | pu8_final_sigmaX_Squared[i4_part_id] += |
5001 | 0 | pu4_SigmaXSquared[j + i * u1_base_blk_array_stride]; |
5002 | 0 | pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride]; |
5003 | 0 | } |
5004 | 0 | } |
5005 | 0 | } |
5006 | | |
5007 | 0 | pu8_final_sigmaX_Squared[i4_part_id] *= u4_N; |
5008 | 0 | } |
5009 | | |
5010 | | void hme_compute_stim_injected_distortion_for_all_parts( |
5011 | | U08 *pu1_pred, |
5012 | | S32 i4_pred_stride, |
5013 | | S32 *pi4_valid_part_array, |
5014 | | ULWORD64 *pu8_src_sigmaX, |
5015 | | ULWORD64 *pu8_src_sigmaXSquared, |
5016 | | S32 *pi4_sad_array, |
5017 | | S32 i4_alpha_stim_multiplier, |
5018 | | S32 i4_inv_wt, |
5019 | | S32 i4_inv_wt_shift_val, |
5020 | | S32 i4_num_valid_parts, |
5021 | | S32 i4_wpred_log_wdc, |
5022 | | U08 u1_cu_size) |
5023 | 0 | { |
5024 | 0 | U32 au4_sigmaX[16], au4_sigmaXSquared[16]; |
5025 | 0 | ULWORD64 au8_final_ref_sigmaX[17], au8_final_ref_sigmaXSquared[17]; |
5026 | 0 | S32 i4_noise_term; |
5027 | 0 | U16 i2_count; |
5028 | |
|
5029 | 0 | ULWORD64 u8_temp_var, u8_temp_var1, u8_pure_dist; |
5030 | 0 | ULWORD64 u8_ref_X_Square, u8_src_var, u8_ref_var; |
5031 | |
|
5032 | 0 | U08 u1_base_block_size; |
5033 | |
|
5034 | 0 | WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT; |
5035 | |
|
5036 | 0 | u1_base_block_size = |
5037 | 0 | hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size); |
5038 | |
|
5039 | 0 | ASSERT(u1_cu_size >= 16); |
5040 | | |
5041 | 0 | hme_compute_sigmaX_and_sigmaXSquared( |
5042 | 0 | pu1_pred, |
5043 | 0 | i4_pred_stride, |
5044 | 0 | au4_sigmaX, |
5045 | 0 | au4_sigmaXSquared, |
5046 | 0 | u1_base_block_size, |
5047 | 0 | u1_base_block_size, |
5048 | 0 | u1_cu_size, |
5049 | 0 | u1_cu_size, |
5050 | 0 | 1, |
5051 | 0 | u1_cu_size / u1_base_block_size); |
5052 | | |
5053 | | /* Noise Term Computation */ |
5054 | 0 | for(i2_count = 0; i2_count < i4_num_valid_parts; i2_count++) |
5055 | 0 | { |
5056 | 0 | unsigned long u4_shift_val; |
5057 | 0 | S32 i4_bits_req; |
5058 | 0 | S32 part_id = pi4_valid_part_array[i2_count]; |
5059 | |
|
5060 | 0 | if(i4_alpha_stim_multiplier) |
5061 | 0 | { |
5062 | | /* Final SigmaX and SigmaX-Squared Calculation */ |
5063 | 0 | hme_compute_final_sigma_of_pu_from_base_blocks( |
5064 | 0 | au4_sigmaX, |
5065 | 0 | au4_sigmaXSquared, |
5066 | 0 | au8_final_ref_sigmaX, |
5067 | 0 | au8_final_ref_sigmaXSquared, |
5068 | 0 | u1_cu_size, |
5069 | 0 | u1_base_block_size, |
5070 | 0 | part_id, |
5071 | 0 | (u1_cu_size / u1_base_block_size)); |
5072 | |
|
5073 | 0 | u8_ref_X_Square = (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]); |
5074 | 0 | u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square); |
5075 | |
|
5076 | 0 | u4_shift_val = ihevce_calc_stim_injected_variance( |
5077 | 0 | pu8_src_sigmaX, |
5078 | 0 | pu8_src_sigmaXSquared, |
5079 | 0 | &u8_src_var, |
5080 | 0 | i4_inv_wt, |
5081 | 0 | i4_inv_wt_shift_val, |
5082 | 0 | i4_wpred_log_wdc, |
5083 | 0 | part_id); |
5084 | |
|
5085 | 0 | u8_ref_var = u8_ref_var >> u4_shift_val; |
5086 | |
|
5087 | 0 | GETRANGE64(i4_bits_req, u8_ref_var); |
5088 | |
|
5089 | 0 | if(i4_bits_req > 27) |
5090 | 0 | { |
5091 | 0 | u8_ref_var = u8_ref_var >> (i4_bits_req - 27); |
5092 | 0 | u8_src_var = u8_src_var >> (i4_bits_req - 27); |
5093 | 0 | } |
5094 | |
|
5095 | 0 | if(u8_src_var == u8_ref_var) |
5096 | 0 | { |
5097 | 0 | u8_temp_var = (1 << STIM_Q_FORMAT); |
5098 | 0 | } |
5099 | 0 | else |
5100 | 0 | { |
5101 | 0 | u8_temp_var = (u8_src_var * u8_ref_var * (1 << STIM_Q_FORMAT)); |
5102 | 0 | u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var); |
5103 | 0 | u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2)); |
5104 | 0 | u8_temp_var = (u8_temp_var / u8_temp_var1); |
5105 | 0 | u8_temp_var = (2 * u8_temp_var); |
5106 | 0 | } |
5107 | |
|
5108 | 0 | i4_noise_term = (UWORD32)u8_temp_var; |
5109 | |
|
5110 | 0 | ASSERT(i4_noise_term >= 0); |
5111 | | |
5112 | 0 | i4_noise_term *= i4_alpha_stim_multiplier; |
5113 | 0 | } |
5114 | 0 | else |
5115 | 0 | { |
5116 | 0 | i4_noise_term = 0; |
5117 | 0 | } |
5118 | | |
5119 | 0 | u8_pure_dist = pi4_sad_array[part_id]; |
5120 | 0 | u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term)); |
5121 | 0 | u8_pure_dist += (1 << ((i4_q_level)-1)); |
5122 | 0 | pi4_sad_array[part_id] = (UWORD32)(u8_pure_dist >> (i4_q_level)); |
5123 | 0 | } |
5124 | 0 | } |
5125 | | |
5126 | | void hme_compute_sigmaX_and_sigmaXSquared( |
5127 | | U08 *pu1_data, |
5128 | | S32 i4_buf_stride, |
5129 | | void *pv_sigmaX, |
5130 | | void *pv_sigmaXSquared, |
5131 | | U08 u1_base_blk_wd, |
5132 | | U08 u1_base_blk_ht, |
5133 | | U08 u1_blk_wd, |
5134 | | U08 u1_blk_ht, |
5135 | | U08 u1_is_sigma_pointer_size_32_bit, |
5136 | | U08 u1_array_stride) |
5137 | 0 | { |
5138 | 0 | U08 i, j, k, l; |
5139 | 0 | U08 u1_num_base_blks_in_row; |
5140 | 0 | U08 u1_num_base_blks_in_column; |
5141 | |
|
5142 | 0 | u1_num_base_blks_in_row = u1_blk_wd / u1_base_blk_wd; |
5143 | 0 | u1_num_base_blks_in_column = u1_blk_ht / u1_base_blk_ht; |
5144 | |
|
5145 | 0 | if(u1_is_sigma_pointer_size_32_bit) |
5146 | 0 | { |
5147 | 0 | U32 *sigmaX, *sigmaXSquared; |
5148 | |
|
5149 | 0 | sigmaX = (U32 *)pv_sigmaX; |
5150 | 0 | sigmaXSquared = (U32 *)pv_sigmaXSquared; |
5151 | | |
5152 | | /* Loop to compute the sigma_X and sigma_X_Squared */ |
5153 | 0 | for(i = 0; i < u1_num_base_blks_in_column; i++) |
5154 | 0 | { |
5155 | 0 | for(j = 0; j < u1_num_base_blks_in_row; j++) |
5156 | 0 | { |
5157 | 0 | U32 u4_sigmaX = 0, u4_sigmaXSquared = 0; |
5158 | 0 | U08 *pu1_buf = |
5159 | 0 | pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride); |
5160 | |
|
5161 | 0 | for(k = 0; k < u1_base_blk_ht; k++) |
5162 | 0 | { |
5163 | 0 | for(l = 0; l < u1_base_blk_wd; l++) |
5164 | 0 | { |
5165 | 0 | u4_sigmaX += pu1_buf[l + k * i4_buf_stride]; |
5166 | 0 | u4_sigmaXSquared += |
5167 | 0 | (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]); |
5168 | 0 | } |
5169 | 0 | } |
5170 | |
|
5171 | 0 | sigmaX[j + i * u1_array_stride] = u4_sigmaX; |
5172 | 0 | sigmaXSquared[j + i * u1_array_stride] = u4_sigmaXSquared; |
5173 | 0 | } |
5174 | 0 | } |
5175 | 0 | } |
5176 | 0 | else |
5177 | 0 | { |
5178 | 0 | ULWORD64 *sigmaX, *sigmaXSquared; |
5179 | |
|
5180 | 0 | sigmaX = (ULWORD64 *)pv_sigmaX; |
5181 | 0 | sigmaXSquared = (ULWORD64 *)pv_sigmaXSquared; |
5182 | | |
5183 | | /* Loop to compute the sigma_X and sigma_X_Squared */ |
5184 | 0 | for(i = 0; i < u1_num_base_blks_in_column; i++) |
5185 | 0 | { |
5186 | 0 | for(j = 0; j < u1_num_base_blks_in_row; j++) |
5187 | 0 | { |
5188 | 0 | ULWORD64 u8_sigmaX = 0, u8_sigmaXSquared = 0; |
5189 | 0 | U08 *pu1_buf = |
5190 | 0 | pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride); |
5191 | |
|
5192 | 0 | for(k = 0; k < u1_base_blk_ht; k++) |
5193 | 0 | { |
5194 | 0 | for(l = 0; l < u1_base_blk_wd; l++) |
5195 | 0 | { |
5196 | 0 | u8_sigmaX += pu1_buf[l + k * i4_buf_stride]; |
5197 | 0 | u8_sigmaXSquared += |
5198 | 0 | (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]); |
5199 | 0 | } |
5200 | 0 | } |
5201 | |
|
5202 | 0 | u8_sigmaXSquared = u8_sigmaXSquared * u1_blk_wd * u1_blk_ht; |
5203 | |
|
5204 | 0 | sigmaX[j + i * u1_array_stride] = u8_sigmaX; |
5205 | 0 | sigmaXSquared[j + i * u1_array_stride] = u8_sigmaXSquared; |
5206 | 0 | } |
5207 | 0 | } |
5208 | 0 | } |
5209 | 0 | } |
5210 | | |
5211 | | #if TEMPORAL_NOISE_DETECT |
5212 | | WORD32 ihevce_16x16block_temporal_noise_detect( |
5213 | | WORD32 had_block_size, |
5214 | | WORD32 ctb_width, |
5215 | | WORD32 ctb_height, |
5216 | | ihevce_ctb_noise_params *ps_ctb_noise_params, |
5217 | | fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data, |
5218 | | hme_search_prms_t *s_search_prms_blk, |
5219 | | me_frm_ctxt_t *ps_ctxt, |
5220 | | WORD32 num_pred_dir, |
5221 | | WORD32 i4_num_act_ref_l0, |
5222 | | WORD32 i4_num_act_ref_l1, |
5223 | | WORD32 i4_cu_x_off, |
5224 | | WORD32 i4_cu_y_off, |
5225 | | wgt_pred_ctxt_t *ps_wt_inp_prms, |
5226 | | WORD32 input_stride, |
5227 | | WORD32 index_8x8_block, |
5228 | | WORD32 num_horz_blocks, |
5229 | | WORD32 num_8x8_in_ctb_row, |
5230 | | WORD32 i4_16x16_index) |
5231 | 0 | { |
5232 | 0 | WORD32 i; |
5233 | 0 | WORD32 noise_detected; |
5234 | |
|
5235 | 0 | UWORD8 *pu1_l0_block; |
5236 | 0 | UWORD8 *pu1_l1_block; |
5237 | |
|
5238 | 0 | WORD32 mean; |
5239 | 0 | UWORD32 variance_8x8; |
5240 | | |
5241 | | /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */ |
5242 | 0 | WORD16 pi2_residue_16x16[256]; |
5243 | 0 | WORD32 mean_16x16; |
5244 | 0 | UWORD32 variance_16x16[2]; |
5245 | | |
5246 | | /* throw errors in case of un- supported arguments */ |
5247 | | /* assumptions size is 8 or 16 or 32 */ |
5248 | 0 | assert( |
5249 | 0 | (had_block_size == 8) || (had_block_size == 16) || (had_block_size == 32)); //ihevc_assert |
5250 | | |
5251 | | /* initialize the variables */ |
5252 | 0 | noise_detected = 0; |
5253 | 0 | variance_8x8 = 0; |
5254 | |
|
5255 | 0 | mean = 0; |
5256 | |
|
5257 | 0 | { |
5258 | 0 | i = 0; |
5259 | | /* get the ref/pred and source using the MV of both directions */ |
5260 | | /* pick the best candidates in each direction */ |
5261 | | /* Colocated cands */ |
5262 | 0 | { |
5263 | | // steps to be done |
5264 | | /* pick the candidates */ |
5265 | | /* do motion compoensation using the candidates got from prev step : pick from the offset */ |
5266 | | /* get the ref or the pred from the offset*/ |
5267 | | /* get the source data */ |
5268 | | /* send the pred - source to noise detect */ |
5269 | | /* do noise detect on the residue of source and pred */ |
5270 | |
|
5271 | 0 | layer_mv_t *ps_layer_mvbank; |
5272 | 0 | hme_mv_t *ps_mv; |
5273 | | |
5274 | | //S32 i; |
5275 | 0 | S32 wd_c, ht_c, wd_p, ht_p; |
5276 | 0 | S32 blksize_p, blk_x, blk_y, i4_offset; |
5277 | 0 | S08 *pi1_ref_idx; |
5278 | 0 | fpel_srch_cand_init_data_t *ps_ctxt_2 = s_proj_srch_cand_init_data; |
5279 | 0 | layer_ctxt_t *ps_curr_layer = ps_ctxt_2->ps_curr_layer; |
5280 | 0 | layer_ctxt_t *ps_coarse_layer = ps_ctxt_2->ps_coarse_layer; |
5281 | 0 | err_prms_t s_err_prms; |
5282 | 0 | S32 i4_blk_wd; |
5283 | 0 | S32 i4_blk_ht; |
5284 | 0 | BLK_SIZE_T e_blk_size; |
5285 | 0 | hme_search_prms_t *ps_search_prms; |
5286 | 0 | S32 i4_part_mask; |
5287 | 0 | S32 *pi4_valid_part_ids; |
5288 | | |
5289 | | /* has list of valid partition to search terminated by -1 */ |
5290 | 0 | S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1]; |
5291 | | |
5292 | | /*SEARCH_COMPLEXITY_T e_search_complexity = ps_ctxt->e_search_complexity;*/ |
5293 | |
|
5294 | 0 | S32 i4_pos_x; |
5295 | 0 | S32 i4_pos_y; |
5296 | 0 | U08 u1_pred_dir; // = ps_ctxt_2->u1_pred_dir; |
5297 | 0 | U08 u1_default_ref_id = 0; //ps_ctxt_2->u1_default_ref_id; |
5298 | 0 | S32 i4_inp_off, i4_ref_offset, i4_ref_stride; |
5299 | | |
5300 | | /* The reference is actually an array of ptrs since there are several */ |
5301 | | /* reference id. So an array gets passed form calling function */ |
5302 | 0 | U08 **ppu1_ref; |
5303 | | |
5304 | | /* Atributes of input candidates */ |
5305 | 0 | search_node_t as_search_node[2]; |
5306 | 0 | wgt_pred_ctxt_t *ps_wt_inp_prms; |
5307 | |
|
5308 | 0 | S32 posx; |
5309 | 0 | S32 posy; |
5310 | 0 | S32 i4_num_results_to_proj; |
5311 | 0 | S32 ai4_sad_grid[9 * TOT_NUM_PARTS]; |
5312 | 0 | S32 i4_inp_stride; |
5313 | | |
5314 | | /* intialize variables */ |
5315 | | /* Width and ht of current and prev layers */ |
5316 | 0 | wd_c = ps_curr_layer->i4_wd; |
5317 | 0 | ht_c = ps_curr_layer->i4_ht; |
5318 | 0 | wd_p = ps_coarse_layer->i4_wd; |
5319 | 0 | ht_p = ps_coarse_layer->i4_ht; |
5320 | |
|
5321 | 0 | ps_search_prms = s_search_prms_blk; |
5322 | |
|
5323 | 0 | ps_wt_inp_prms = &ps_ctxt->s_wt_pred; |
5324 | 0 | e_blk_size = ps_search_prms->e_blk_size; |
5325 | 0 | i4_part_mask = ps_search_prms->i4_part_mask; |
5326 | |
|
5327 | 0 | i4_blk_wd = gau1_blk_size_to_wd[e_blk_size]; |
5328 | 0 | i4_blk_ht = gau1_blk_size_to_ht[e_blk_size]; |
5329 | |
|
5330 | 0 | ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank; |
5331 | 0 | blksize_p = gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size]; |
5332 | | |
5333 | | /* ASSERT for valid sizes */ |
5334 | 0 | ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5)); |
5335 | | |
5336 | 0 | i4_pos_x = i4_cu_x_off; |
5337 | 0 | i4_pos_y = i4_cu_y_off; |
5338 | 0 | posx = i4_pos_x + 2; |
5339 | 0 | posy = i4_pos_y + 2; |
5340 | |
|
5341 | 0 | i4_inp_stride = ps_search_prms->i4_inp_stride; |
5342 | | /* Move to the location of the search blk in inp buffer */ |
5343 | | //i4_inp_off = i4_cu_x_off; |
5344 | | //i4_inp_off += i4_cu_y_off * i4_inp_stride; |
5345 | 0 | i4_inp_off = (i4_16x16_index % 4) * 16; |
5346 | 0 | i4_inp_off += (i4_16x16_index / 4) * 16 * i4_inp_stride; |
5347 | | |
5348 | | /***********pick the candidates**************************************/ |
5349 | 0 | for(u1_pred_dir = 0; u1_pred_dir < num_pred_dir; u1_pred_dir++) |
5350 | 0 | { |
5351 | 0 | WORD32 actual_pred_dir = 0; |
5352 | |
|
5353 | 0 | if(u1_pred_dir == 0 && i4_num_act_ref_l0 == 0) |
5354 | 0 | { |
5355 | 0 | actual_pred_dir = 1; |
5356 | 0 | } |
5357 | 0 | else if(u1_pred_dir == 0 && i4_num_act_ref_l0 != 0) |
5358 | 0 | { |
5359 | 0 | actual_pred_dir = 0; |
5360 | 0 | } |
5361 | 0 | else if(u1_pred_dir == 1) |
5362 | 0 | { |
5363 | 0 | actual_pred_dir = 1; |
5364 | 0 | } |
5365 | |
|
5366 | 0 | i4_num_results_to_proj = 1; // only the best proj |
5367 | | |
5368 | | /* Safety check to avoid uninitialized access across temporal layers */ |
5369 | 0 | posx = CLIP3(posx, 0, (wd_c - blksize_p)); /* block position withing frAME */ |
5370 | 0 | posy = CLIP3(posy, 0, (ht_c - blksize_p)); |
5371 | | |
5372 | | /* Project the positions to prev layer */ |
5373 | 0 | blk_x = posx >> blksize_p; |
5374 | 0 | blk_y = posy >> blksize_p; |
5375 | | |
5376 | | /* Pick up the mvs from the location */ |
5377 | 0 | i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk); |
5378 | 0 | i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y); |
5379 | |
|
5380 | 0 | ps_mv = ps_layer_mvbank->ps_mv + i4_offset; |
5381 | 0 | pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset; |
5382 | |
|
5383 | 0 | if(actual_pred_dir == 1) |
5384 | 0 | { |
5385 | 0 | ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); |
5386 | 0 | pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref); |
5387 | 0 | } |
5388 | |
|
5389 | 0 | { |
5390 | 0 | as_search_node[actual_pred_dir].s_mv.i2_mvx = ps_mv[0].i2_mv_x << 1; |
5391 | 0 | as_search_node[actual_pred_dir].s_mv.i2_mvy = ps_mv[0].i2_mv_y << 1; |
5392 | 0 | as_search_node[actual_pred_dir].i1_ref_idx = pi1_ref_idx[0]; |
5393 | |
|
5394 | 0 | if((as_search_node[actual_pred_dir].i1_ref_idx < 0) || |
5395 | 0 | (as_search_node[actual_pred_dir].s_mv.i2_mvx == INTRA_MV)) |
5396 | 0 | { |
5397 | 0 | as_search_node[actual_pred_dir].i1_ref_idx = u1_default_ref_id; |
5398 | 0 | as_search_node[actual_pred_dir].s_mv.i2_mvx = 0; |
5399 | 0 | as_search_node[actual_pred_dir].s_mv.i2_mvy = 0; |
5400 | 0 | } |
5401 | 0 | } |
5402 | | |
5403 | | /********************************************************************************************/ |
5404 | 0 | { |
5405 | | /* declare the variables */ |
5406 | | //ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt; |
5407 | |
|
5408 | 0 | pi4_valid_part_ids = ai4_valid_part_ids; |
5409 | 0 | i4_ref_stride = ps_curr_layer->i4_rec_stride; |
5410 | 0 | s_err_prms.i4_inp_stride = i4_inp_stride; |
5411 | 0 | s_err_prms.i4_ref_stride = i4_ref_stride; |
5412 | 0 | s_err_prms.i4_part_mask = i4_part_mask; |
5413 | 0 | s_err_prms.pi4_sad_grid = &ai4_sad_grid[0]; |
5414 | 0 | s_err_prms.i4_blk_wd = i4_blk_wd; |
5415 | 0 | s_err_prms.i4_blk_ht = i4_blk_ht; |
5416 | 0 | s_err_prms.i4_step = 1; |
5417 | 0 | s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids; |
5418 | | //s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts; |
5419 | | |
5420 | | /*************************************************************************/ |
5421 | | /* Depending on flag i4_use_rec, we use either input of previously */ |
5422 | | /* encoded pictures or we use recon of previously encoded pictures. */ |
5423 | 0 | i4_ref_stride = ps_curr_layer->i4_rec_stride; |
5424 | 0 | ppu1_ref = ps_curr_layer->ppu1_list_rec_fxfy; // pointer to the pred |
5425 | |
|
5426 | 0 | i4_ref_offset = (i4_ref_stride * i4_cu_y_off) + i4_cu_x_off; //i4_x_off; |
5427 | |
|
5428 | 0 | s_err_prms.pu1_ref = |
5429 | 0 | ppu1_ref[as_search_node[actual_pred_dir].i1_ref_idx] + i4_ref_offset; |
5430 | 0 | s_err_prms.pu1_ref += as_search_node[actual_pred_dir].s_mv.i2_mvx; |
5431 | 0 | s_err_prms.pu1_ref += |
5432 | 0 | as_search_node[actual_pred_dir].s_mv.i2_mvy * i4_ref_stride; |
5433 | | |
5434 | | /*get the source */ |
5435 | 0 | s_err_prms.pu1_inp = |
5436 | 0 | ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + |
5437 | 0 | i4_inp_off; //pu1_src_input + i4_inp_off;//ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + i4_inp_off; |
5438 | | |
5439 | | /* send the pred - source to noise detect */ |
5440 | | // noise_detect_hme(noise_structure, s_err_prms.pu1_inp, s_err_prms.pu1_ref); |
5441 | 0 | } |
5442 | | /* change the l0/l1 blcok pointer names accrodingle */ |
5443 | | |
5444 | | /* get memory pointers the input and the reference */ |
5445 | 0 | pu1_l0_block = s_err_prms.pu1_inp; |
5446 | 0 | pu1_l1_block = s_err_prms.pu1_ref; |
5447 | |
|
5448 | 0 | { |
5449 | 0 | WORD32 i2, j2; |
5450 | 0 | WORD32 dim = 16; |
5451 | 0 | UWORD8 *buf1; |
5452 | 0 | UWORD8 *buf2; |
5453 | 0 | for(i2 = 0; i2 < dim; i2++) |
5454 | 0 | { |
5455 | 0 | buf1 = pu1_l0_block + i2 * i4_inp_stride; |
5456 | 0 | buf2 = pu1_l1_block + i2 * i4_ref_stride; |
5457 | |
|
5458 | 0 | for(j2 = 0; j2 < dim; j2++) |
5459 | 0 | { |
5460 | 0 | pi2_residue_16x16[i2 * dim + j2] = (WORD16)(buf1[j2] - buf2[j2]); |
5461 | 0 | } |
5462 | 0 | } |
5463 | |
|
5464 | 0 | ihevce_calc_variance_signed( |
5465 | 0 | pi2_residue_16x16, 16, &mean_16x16, &variance_16x16[u1_pred_dir], 16, 16); |
5466 | | |
5467 | | /* compare the source and residue variance for this block ps_ctb_noise_params->i4_variance_src_16x16 */ |
5468 | 0 | if(variance_16x16[u1_pred_dir] > |
5469 | 0 | ((TEMPORAL_VARIANCE_FACTOR * |
5470 | 0 | ps_ctb_noise_params->au4_variance_src_16x16[i4_16x16_index]) >> |
5471 | 0 | Q_TEMPORAL_VARIANCE_FACTOR)) |
5472 | 0 | { |
5473 | | /* update noisy block count only if all best MV in diff directions indicates noise */ |
5474 | 0 | if(u1_pred_dir == num_pred_dir - 1) |
5475 | 0 | { |
5476 | 0 | ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 1; |
5477 | 0 | ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 1; |
5478 | 0 | ps_ctb_noise_params |
5479 | 0 | ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 1; |
5480 | 0 | ps_ctb_noise_params |
5481 | 0 | ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 1; |
5482 | 0 | noise_detected = 1; |
5483 | 0 | } |
5484 | 0 | } |
5485 | 0 | else /* if any one of the direction mv says it as non noise then dont check for the other directions MV , move for next block*/ |
5486 | 0 | { |
5487 | 0 | noise_detected = 0; |
5488 | 0 | ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 0; |
5489 | 0 | ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 0; |
5490 | 0 | ps_ctb_noise_params |
5491 | 0 | ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 0; |
5492 | 0 | ps_ctb_noise_params |
5493 | 0 | ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 0; |
5494 | 0 | break; |
5495 | 0 | } |
5496 | 0 | } // variance analysis and calculation |
5497 | 0 | } // for each direction |
5498 | 0 | } // HME code |
5499 | |
|
5500 | 0 | } // for each 16x16 block |
5501 | | |
5502 | 0 | return (noise_detected); |
5503 | 0 | } |
5504 | | #endif |
5505 | | |
5506 | | void hme_qpel_interp_avg_1pt( |
5507 | | interp_prms_t *ps_prms, |
5508 | | S32 i4_mv_x, |
5509 | | S32 i4_mv_y, |
5510 | | S32 i4_buf_id, |
5511 | | U08 **ppu1_final, |
5512 | | S32 *pi4_final_stride) |
5513 | 12.3M | { |
5514 | 12.3M | U08 *pu1_src1, *pu1_src2, *pu1_dst; |
5515 | 12.3M | qpel_input_buf_cfg_t *ps_inp_cfg; |
5516 | 12.3M | S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset; |
5517 | | |
5518 | | /*************************************************************************/ |
5519 | | /* For a given QPEL pt, we need to determine the 2 source pts that are */ |
5520 | | /* needed to do the QPEL averaging. The logic to do this is as follows */ |
5521 | | /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are */ |
5522 | | /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0 */ |
5523 | | /* pt of th reference blk that is colocated to the inp blk. */ |
5524 | | /* A j E k B */ |
5525 | | /* l m n o p */ |
5526 | | /* F q G r H */ |
5527 | | /* s t u v w */ |
5528 | | /* C x I y D */ |
5529 | | /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/ |
5530 | | /* and (1,1) respectively in the fpel buffer (id = 0) */ |
5531 | | /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf */ |
5532 | | /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf */ |
5533 | | /* G is hxhy pt in offset 0,0 in hxhy buf */ |
5534 | | /* All above offsets are computed w.r.t. motion displaced pt in */ |
5535 | | /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and */ |
5536 | | /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G */ |
5537 | | /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3 */ |
5538 | | /* If we consider pt v to be derived. v has a fractional comp of 3, 3 */ |
5539 | | /* v is avg of H and I. So the table look up of v should give following */ |
5540 | | /* buf 1 (H) : offset = (1, 0) buf id = 2. */ |
5541 | | /* buf 2 (I) : offset = 0 , 1) buf id = 1. */ |
5542 | | /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1. */ |
5543 | | /*************************************************************************/ |
5544 | 12.3M | i4_mv_x_frac = i4_mv_x & 3; |
5545 | 12.3M | i4_mv_y_frac = i4_mv_y & 3; |
5546 | | |
5547 | 12.3M | i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride; |
5548 | | |
5549 | | /* Derive the descriptor that has all offset and size info */ |
5550 | 12.3M | ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac]; |
5551 | | |
5552 | 12.3M | pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1]; |
5553 | 12.3M | pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset; |
5554 | 12.3M | pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride); |
5555 | | |
5556 | 12.3M | pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2]; |
5557 | 12.3M | pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset; |
5558 | 12.3M | pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride); |
5559 | | |
5560 | 12.3M | pu1_dst = ps_prms->apu1_interp_out[i4_buf_id]; |
5561 | 12.3M | hevc_avg_2d( |
5562 | 12.3M | pu1_src1, |
5563 | 12.3M | pu1_src2, |
5564 | 12.3M | ps_prms->i4_ref_stride, |
5565 | 12.3M | ps_prms->i4_ref_stride, |
5566 | 12.3M | ps_prms->i4_blk_wd, |
5567 | 12.3M | ps_prms->i4_blk_ht, |
5568 | 12.3M | pu1_dst, |
5569 | 12.3M | ps_prms->i4_out_stride); |
5570 | 12.3M | ppu1_final[i4_buf_id] = pu1_dst; |
5571 | 12.3M | pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride; |
5572 | 12.3M | } |
5573 | | |
5574 | | void hme_qpel_interp_avg_2pt_vert_with_reuse( |
5575 | | interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride) |
5576 | 3.03M | { |
5577 | 3.03M | hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride); |
5578 | | |
5579 | 3.03M | hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride); |
5580 | 3.03M | } |
5581 | | |
5582 | | void hme_qpel_interp_avg_2pt_horz_with_reuse( |
5583 | | interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride) |
5584 | 3.04M | { |
5585 | 3.04M | hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride); |
5586 | | |
5587 | 3.04M | hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride); |
5588 | 3.04M | } |
5589 | | |
5590 | | void hme_set_mv_limit_using_dvsr_data( |
5591 | | me_frm_ctxt_t *ps_ctxt, |
5592 | | layer_ctxt_t *ps_curr_layer, |
5593 | | range_prms_t *ps_mv_limit, |
5594 | | S16 *pi2_prev_enc_frm_max_mv_y, |
5595 | | U08 u1_num_act_ref_pics) |
5596 | 85.3k | { |
5597 | 85.3k | WORD32 ref_ctr; |
5598 | | |
5599 | | /* Only for B/b pic. */ |
5600 | 85.3k | if(1 == ps_ctxt->s_frm_prms.bidir_enabled) |
5601 | 18.5k | { |
5602 | 18.5k | WORD16 i2_mv_y_per_poc, i2_max_mv_y; |
5603 | 18.5k | WORD32 cur_poc, prev_poc, ref_poc, abs_poc_diff; |
5604 | 18.5k | WORD32 prev_poc_count = 0; |
5605 | 18.5k | WORD32 i4_p_idx; |
5606 | | |
5607 | 18.5k | pi2_prev_enc_frm_max_mv_y[0] = 0; |
5608 | | |
5609 | 18.5k | cur_poc = ps_ctxt->i4_curr_poc; |
5610 | | |
5611 | 18.5k | i4_p_idx = 0; |
5612 | | |
5613 | | /* Get abs MAX for symmetric search */ |
5614 | 18.5k | i2_mv_y_per_poc = ps_curr_layer->i2_max_mv_y; |
5615 | | /* Assuming P to P distance as 4 */ |
5616 | 18.5k | i2_mv_y_per_poc = (i2_mv_y_per_poc + 2) >> 2; |
5617 | | |
5618 | 66.7k | for(ref_ctr = 0; ref_ctr < u1_num_act_ref_pics; ref_ctr++) |
5619 | 48.2k | { |
5620 | | /* Get the prev. encoded frame POC */ |
5621 | 48.2k | prev_poc = ps_ctxt->i4_prev_poc; |
5622 | | |
5623 | 48.2k | ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]; |
5624 | 48.2k | abs_poc_diff = ABS((cur_poc - ref_poc)); |
5625 | | /* Get the cur. max MV based on POC distance */ |
5626 | 48.2k | i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff; |
5627 | 48.2k | i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y); |
5628 | | |
5629 | 48.2k | ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; |
5630 | 48.2k | ps_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y; |
5631 | 48.2k | ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; |
5632 | 48.2k | ps_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y; |
5633 | | |
5634 | | /* Find the MAX MV for the prev. encoded frame to optimize */ |
5635 | | /* the reverse dependency of ME on Enc.Loop */ |
5636 | 48.2k | if(ref_poc == prev_poc) |
5637 | 12.7k | { |
5638 | | /* TO DO : Same thing for horz. search also */ |
5639 | 12.7k | pi2_prev_enc_frm_max_mv_y[0] = i2_max_mv_y; |
5640 | 12.7k | prev_poc_count++; |
5641 | 12.7k | } |
5642 | 48.2k | } |
5643 | 18.5k | } |
5644 | 66.8k | else |
5645 | 66.8k | { |
5646 | 66.8k | ASSERT(0 == ps_ctxt->s_frm_prms.u1_num_active_ref_l1); |
5647 | | |
5648 | | /* Set the Config. File Params for P pic. */ |
5649 | 196k | for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++) |
5650 | 129k | { |
5651 | 129k | ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x; |
5652 | 129k | ps_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y; |
5653 | 129k | ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x; |
5654 | 129k | ps_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y; |
5655 | 129k | } |
5656 | | |
5657 | | /* For P PIC., go with Config. File Params */ |
5658 | 66.8k | pi2_prev_enc_frm_max_mv_y[0] = ps_curr_layer->i2_max_mv_y; |
5659 | 66.8k | } |
5660 | 85.3k | } |
5661 | | |
5662 | | S32 hme_part_mask_populator( |
5663 | | U08 *pu1_inp, |
5664 | | S32 i4_inp_stride, |
5665 | | U08 u1_limit_active_partitions, |
5666 | | U08 u1_is_bPic, |
5667 | | U08 u1_is_refPic, |
5668 | | U08 u1_blk_8x8_mask, |
5669 | | ME_QUALITY_PRESETS_T e_me_quality_preset) |
5670 | 1.57M | { |
5671 | 1.57M | if(15 != u1_blk_8x8_mask) |
5672 | 32.2k | { |
5673 | 32.2k | return ENABLE_NxN; |
5674 | 32.2k | } |
5675 | 1.54M | else |
5676 | 1.54M | { |
5677 | 1.54M | U08 u1_call_inp_segmentation_based_part_mask_populator = |
5678 | 1.54M | (ME_XTREME_SPEED_25 != e_me_quality_preset) || |
5679 | 1.54M | (!u1_is_bPic && !DISABLE_8X8CUS_IN_PPICS_IN_P6) || |
5680 | 1.54M | (u1_is_bPic && u1_is_refPic && !DISABLE_8X8CUS_IN_REFBPICS_IN_P6) || |
5681 | 1.54M | (u1_is_bPic && !u1_is_refPic && !DISABLE_8X8CUS_IN_NREFBPICS_IN_P6); |
5682 | | |
5683 | 1.54M | if(u1_call_inp_segmentation_based_part_mask_populator) |
5684 | 1.49M | { |
5685 | 1.49M | S32 i4_part_mask = |
5686 | 1.49M | hme_study_input_segmentation(pu1_inp, i4_inp_stride, u1_limit_active_partitions); |
5687 | | |
5688 | 1.49M | if(e_me_quality_preset == ME_XTREME_SPEED) |
5689 | 163k | { |
5690 | 163k | i4_part_mask &= ~ENABLE_AMP; |
5691 | 163k | } |
5692 | | |
5693 | 1.49M | if(e_me_quality_preset == ME_XTREME_SPEED_25) |
5694 | 409k | { |
5695 | 409k | i4_part_mask &= ~ENABLE_AMP; |
5696 | | |
5697 | 409k | i4_part_mask &= ~ENABLE_SMP; |
5698 | 409k | } |
5699 | | |
5700 | 1.49M | return i4_part_mask; |
5701 | 1.49M | } |
5702 | 53.0k | else |
5703 | 53.0k | { |
5704 | 53.0k | return ENABLE_2Nx2N; |
5705 | 53.0k | } |
5706 | 1.54M | } |
5707 | 1.57M | } |