/src/libhevc/encoder/hme_coarse.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2018 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /** |
22 | | ****************************************************************************** |
23 | | * @file hme_coarse.c |
24 | | * |
25 | | * @brief |
26 | | * Contains ME algorithm for the coarse layer. |
27 | | * |
28 | | * @author |
29 | | * Ittiam |
30 | | * |
31 | | * |
32 | | * List of Functions |
33 | | * hme_update_mv_bank_coarse() |
34 | | * hme_coarse() |
35 | | ****************************************************************************** |
36 | | */ |
37 | | |
38 | | /*****************************************************************************/ |
39 | | /* File Includes */ |
40 | | /*****************************************************************************/ |
41 | | /* System include files */ |
42 | | #include <stdio.h> |
43 | | #include <string.h> |
44 | | #include <stdlib.h> |
45 | | #include <assert.h> |
46 | | #include <stdarg.h> |
47 | | #include <math.h> |
48 | | #include <limits.h> |
49 | | |
50 | | /* User include files */ |
51 | | #include "ihevc_typedefs.h" |
52 | | #include "itt_video_api.h" |
53 | | #include "ihevce_api.h" |
54 | | |
55 | | #include "rc_cntrl_param.h" |
56 | | #include "rc_frame_info_collector.h" |
57 | | #include "rc_look_ahead_params.h" |
58 | | |
59 | | #include "ihevc_defs.h" |
60 | | #include "ihevc_structs.h" |
61 | | #include "ihevc_platform_macros.h" |
62 | | #include "ihevc_deblk.h" |
63 | | #include "ihevc_itrans_recon.h" |
64 | | #include "ihevc_chroma_itrans_recon.h" |
65 | | #include "ihevc_chroma_intra_pred.h" |
66 | | #include "ihevc_intra_pred.h" |
67 | | #include "ihevc_inter_pred.h" |
68 | | #include "ihevc_mem_fns.h" |
69 | | #include "ihevc_padding.h" |
70 | | #include "ihevc_weighted_pred.h" |
71 | | #include "ihevc_sao.h" |
72 | | #include "ihevc_resi_trans.h" |
73 | | #include "ihevc_quant_iquant_ssd.h" |
74 | | #include "ihevc_cabac_tables.h" |
75 | | |
76 | | #include "ihevce_defs.h" |
77 | | #include "ihevce_lap_enc_structs.h" |
78 | | #include "ihevce_multi_thrd_structs.h" |
79 | | #include "ihevce_multi_thrd_funcs.h" |
80 | | #include "ihevce_me_common_defs.h" |
81 | | #include "ihevce_had_satd.h" |
82 | | #include "ihevce_error_codes.h" |
83 | | #include "ihevce_bitstream.h" |
84 | | #include "ihevce_cabac.h" |
85 | | #include "ihevce_rdoq_macros.h" |
86 | | #include "ihevce_function_selector.h" |
87 | | #include "ihevce_enc_structs.h" |
88 | | #include "ihevce_entropy_structs.h" |
89 | | #include "ihevce_cmn_utils_instr_set_router.h" |
90 | | #include "ihevce_enc_loop_structs.h" |
91 | | #include "ihevce_bs_compute_ctb.h" |
92 | | #include "ihevce_global_tables.h" |
93 | | #include "ihevce_dep_mngr_interface.h" |
94 | | #include "hme_datatype.h" |
95 | | #include "hme_interface.h" |
96 | | #include "hme_common_defs.h" |
97 | | #include "hme_defs.h" |
98 | | #include "ihevce_me_instr_set_router.h" |
99 | | #include "hme_globals.h" |
100 | | #include "hme_utils.h" |
101 | | #include "hme_coarse.h" |
102 | | #include "hme_refine.h" |
103 | | #include "hme_err_compute.h" |
104 | | #include "hme_common_utils.h" |
105 | | #include "hme_search_algo.h" |
106 | | |
107 | | /******************************************************************************* |
108 | | * MACROS |
109 | | *******************************************************************************/ |
110 | | #define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \ |
111 | 15.3M | { \ |
112 | 15.3M | ps_mv->i2_mv_x = ps_search_node->s_mv.i2_mvx >> (shift); \ |
113 | 15.3M | ps_mv->i2_mv_y = ps_search_node->s_mv.i2_mvy >> (shift); \ |
114 | 15.3M | *pi1_ref_idx = ps_search_node->i1_ref_idx; \ |
115 | 15.3M | } |
116 | | |
117 | | /*****************************************************************************/ |
118 | | /* Function Definitions */ |
119 | | /*****************************************************************************/ |
120 | | |
121 | | /** |
122 | | ******************************************************************************** |
123 | | * @fn void hme_update_mv_bank_coarse(search_results_t *ps_search_results, |
124 | | * layer_mv_t *ps_layer_mv, |
125 | | * S32 i4_blk_x, |
126 | | * S32 i4_blk_y, |
127 | | * search_node_t *ps_search_node_4x8_l, |
128 | | * search_node_t *ps_search_node_8x4_t, |
129 | | * S08 i1_ref_idx, |
130 | | * mvbank_update_prms_t *ps_prms |
131 | | * |
132 | | * @brief Updates the coarse layer MV Bank for a given ref id and blk pos |
133 | | * |
134 | | * @param[in] ps_search_results: Search results data structure |
135 | | * |
136 | | * @param[in, out] ps_layer_mv : MV Bank for this layer |
137 | | * |
138 | | * @param[in] i4_search_blk_x: column number of the 4x4 blk searched |
139 | | * |
140 | | * @param[in] i4_search_blk_y: row number of the 4x4 blk searched |
141 | | * |
142 | | * @param[in] ps_search_node_4x8_t: Best MV of the 4x8T blk |
143 | | * |
144 | | * @param[in] ps_search_node_8x4_l: Best MV of the 8x4L blk |
145 | | * |
146 | | * @param[in] i1_ref_idx : Reference ID that has been searched |
147 | | * |
148 | | * @param[in] ps_prms : Parameters pertaining to the MV Bank update |
149 | | * |
150 | | * @return None |
151 | | ******************************************************************************** |
152 | | */ |
153 | | void hme_update_mv_bank_coarse( |
154 | | search_results_t *ps_search_results, |
155 | | layer_mv_t *ps_layer_mv, |
156 | | S32 i4_search_blk_x, |
157 | | S32 i4_search_blk_y, |
158 | | search_node_t *ps_search_node_4x8_t, |
159 | | search_node_t *ps_search_node_8x4_l, |
160 | | S08 i1_ref_idx, |
161 | | mvbank_update_prms_t *ps_prms) |
162 | 3.84M | { |
163 | | /* These point to the MV and ref idx posn to be udpated */ |
164 | 3.84M | hme_mv_t *ps_mv; |
165 | 3.84M | S08 *pi1_ref_idx; |
166 | | |
167 | | /* Offset within the bank */ |
168 | 3.84M | S32 i4_offset; |
169 | | |
170 | 3.84M | S32 i, j, i4_blk_x, i4_blk_y; |
171 | | |
172 | | /* Best results for 8x4R and 4x8B blocks */ |
173 | 3.84M | search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b; |
174 | | |
175 | | /* Number of MVs in a block */ |
176 | 3.84M | S32 num_mvs = ps_layer_mv->i4_num_mvs_per_ref; |
177 | | |
178 | 3.84M | search_node_t *aps_search_nodes[4]; |
179 | | |
180 | | /* The search blk may be different in size from the blk used to hold MV */ |
181 | 3.84M | i4_blk_x = i4_search_blk_x << ps_prms->i4_shift; |
182 | 3.84M | i4_blk_y = i4_search_blk_y << ps_prms->i4_shift; |
183 | | |
184 | | /* Compute the offset in the MV bank */ |
185 | 3.84M | i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row; |
186 | 3.84M | i4_offset *= ps_layer_mv->i4_num_mvs_per_blk; |
187 | | |
188 | | /* Identify the correct offset in the mvbank and the reference id buf */ |
189 | 3.84M | ps_mv = ps_layer_mv->ps_mv + (i4_offset + (num_mvs * i1_ref_idx)); |
190 | 3.84M | pi1_ref_idx = ps_layer_mv->pi1_ref_idx + (i4_offset + (num_mvs * i1_ref_idx)); |
191 | | |
192 | | /*************************************************************************/ |
193 | | /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */ |
194 | | /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp. */ |
195 | | /* If number of results to be stored is 4, then we store all these 4 */ |
196 | | /* results, else we pick best ones */ |
197 | | /*************************************************************************/ |
198 | 3.84M | ps_search_node_8x4_r = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B]; |
199 | 3.84M | ps_search_node_4x8_b = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R]; |
200 | | |
201 | 3.84M | ASSERT(num_mvs <= 4); |
202 | | |
203 | | /* Doing this to sort best results */ |
204 | 3.84M | aps_search_nodes[0] = ps_search_node_8x4_r; |
205 | 3.84M | aps_search_nodes[1] = ps_search_node_4x8_b; |
206 | 3.84M | aps_search_nodes[2] = ps_search_node_8x4_l; |
207 | 3.84M | aps_search_nodes[3] = ps_search_node_4x8_t; |
208 | 3.84M | if(num_mvs == 4) |
209 | 3.84M | { |
210 | 3.84M | COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[0], 0); |
211 | 3.84M | ps_mv++; |
212 | 3.84M | pi1_ref_idx++; |
213 | 3.84M | COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[1], 0); |
214 | 3.84M | ps_mv++; |
215 | 3.84M | pi1_ref_idx++; |
216 | 3.84M | COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[2], 0); |
217 | 3.84M | ps_mv++; |
218 | 3.84M | pi1_ref_idx++; |
219 | 3.84M | COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[3], 0); |
220 | 3.84M | ps_mv++; |
221 | 3.84M | pi1_ref_idx++; |
222 | 3.84M | return; |
223 | 3.84M | } |
224 | | |
225 | | /* Run through the results, store them in best to worst order */ |
226 | 0 | for(i = 0; i < num_mvs; i++) |
227 | 0 | { |
228 | 0 | for(j = i + 1; j < 4; j++) |
229 | 0 | { |
230 | 0 | if(aps_search_nodes[j]->i4_tot_cost < aps_search_nodes[i]->i4_tot_cost) |
231 | 0 | { |
232 | 0 | SWAP_HME(aps_search_nodes[j], aps_search_nodes[i], search_node_t *); |
233 | 0 | } |
234 | 0 | } |
235 | 0 | COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[i], 0); |
236 | 0 | ps_mv++; |
237 | 0 | pi1_ref_idx++; |
238 | 0 | } |
239 | 0 | } |
240 | | |
241 | | /** |
242 | | ******************************************************************************** |
243 | | * @fn void hme_coarse_frm_init(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms) |
244 | | * |
245 | | * @brief Frame init entry point Coarse ME. |
246 | | * |
247 | | * @param[in,out] ps_ctxt: ME Handle |
248 | | * |
249 | | * @param[in] ps_coarse_prms : Coarse layer config params |
250 | | * |
251 | | * @return None |
252 | | ******************************************************************************** |
253 | | */ |
254 | | void hme_coarse_frm_init(coarse_me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms) |
255 | 105k | { |
256 | 105k | layer_ctxt_t *ps_curr_layer; |
257 | | |
258 | 105k | S32 i4_pic_wd, i4_pic_ht; |
259 | | |
260 | 105k | S32 num_blks_in_pic, num_blks_in_row; |
261 | | |
262 | 105k | BLK_SIZE_T e_search_blk_size = BLK_4x4; |
263 | | |
264 | 105k | S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4; |
265 | | |
266 | | /* Number of references to search */ |
267 | 105k | S32 i4_num_ref; |
268 | | |
269 | 105k | ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id]; |
270 | 105k | i4_num_ref = ps_coarse_prms->i4_num_ref; |
271 | | |
272 | 105k | i4_pic_wd = ps_curr_layer->i4_wd; |
273 | 105k | i4_pic_ht = ps_curr_layer->i4_ht; |
274 | | /* Macro updates num_blks_in_pic and num_blks_in_row*/ |
275 | 105k | GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic); |
276 | | |
277 | | /************************************************************************/ |
278 | | /* Initialize the mv bank that holds results of this layer. */ |
279 | | /************************************************************************/ |
280 | 105k | hme_init_mv_bank( |
281 | 105k | ps_curr_layer, |
282 | 105k | BLK_4x4, |
283 | 105k | i4_num_ref, |
284 | 105k | ps_coarse_prms->num_results, |
285 | 105k | ps_ctxt->u1_encode[ps_coarse_prms->i4_layer_id]); |
286 | | |
287 | 105k | return; |
288 | 105k | } |
289 | | |
290 | | /** |
291 | | ******************************************************************************** |
292 | | * @fn void hme_derive_worst_case_search_range(range_prms_t *ps_range, |
293 | | * range_prms_t *ps_pic_limit, |
294 | | * range_prms_t *ps_mv_limit, |
295 | | * S32 i4_x, |
296 | | * S32 i4_y, |
297 | | * S32 blk_wd, |
298 | | * S32 blk_ht) |
299 | | * |
300 | | * @brief given picture limits and blk dimensions and mv search limits, obtains |
301 | | * teh valid search range such that the blk stays within pic boundaries, |
302 | | * where picture boundaries include padded portions of picture |
303 | | * |
304 | | * @param[out] ps_range: updated with actual search range |
305 | | * |
306 | | * @param[in] ps_pic_limit : picture boundaries |
307 | | * |
308 | | * @param[in] ps_mv_limit: Search range limits for the mvs |
309 | | * |
310 | | * @param[in] i4_x : x coordinate of the blk |
311 | | * |
312 | | * @param[in] i4_y : y coordinate of the blk |
313 | | * |
314 | | * @param[in] blk_wd : blk width |
315 | | * |
316 | | * @param[in] blk_ht : blk height |
317 | | * |
318 | | * @return void |
319 | | ******************************************************************************** |
320 | | */ |
321 | | void hme_derive_worst_case_search_range( |
322 | | range_prms_t *ps_range, |
323 | | range_prms_t *ps_pic_limit, |
324 | | range_prms_t *ps_mv_limit, |
325 | | S32 i4_x, |
326 | | S32 i4_y, |
327 | | S32 blk_wd, |
328 | | S32 blk_ht) |
329 | 5.84M | { |
330 | | /* Taking max x of left block, min x of current block */ |
331 | 5.84M | ps_range->i2_max_x = |
332 | 5.84M | MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)(i4_x - 4)), ps_mv_limit->i2_max_x); |
333 | 5.84M | ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x); |
334 | | /* Taking max y of top block, min y of current block */ |
335 | 5.84M | ps_range->i2_max_y = |
336 | 5.84M | MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)(i4_y - 4)), ps_mv_limit->i2_max_y); |
337 | 5.84M | ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y); |
338 | 5.84M | } |
339 | | |
340 | | /** |
341 | | ******************************************************************************** |
342 | | * @fn void hme_combine_4x4_sads_and_compute_cost(S08 i1_ref_idx, |
343 | | * range_prms_t *ps_mv_range, |
344 | | * range_prms_t *ps_mv_limit, |
345 | | * hme_mv_t *ps_best_mv_4x8, |
346 | | * hme_mv_t *ps_best_mv_8x4, |
347 | | * pred_ctxt_t *ps_pred_ctxt, |
348 | | * PF_MV_COST_FXN pf_mv_cost_compute, |
349 | | * ME_QUALITY_PRESETS_T e_me_quality_preset, |
350 | | * S16 *pi2_sads_4x4_current, |
351 | | * S16 *pi2_sads_4x4_east, |
352 | | * S16 *pi2_sads_4x4_south, |
353 | | * FILE *fp_dump_sad) |
354 | | * |
355 | | * @brief Does a full search on entire srch window with a given step size in coarse layer |
356 | | * |
357 | | * @param[in] i1_ref_idx : Cur ref idx |
358 | | * |
359 | | * @param[in] ps_layer_ctxt: All info about this layer |
360 | | * |
361 | | * @param[out] ps_best_mv : type hme_mv_t contains best mv x and y |
362 | | * |
363 | | * @param[in] ps_pred_ctxt : Prediction ctxt for cost computation |
364 | | * |
365 | | * @param[in] pf_mv_cost_compute : mv cost computation function |
366 | | * |
367 | | * @return void |
368 | | ******************************************************************************** |
369 | | */ |
370 | | void hme_combine_4x4_sads_and_compute_cost_high_quality( |
371 | | S08 i1_ref_idx, |
372 | | range_prms_t *ps_mv_range, |
373 | | range_prms_t *ps_mv_limit, |
374 | | hme_mv_t *ps_best_mv_4x8, |
375 | | hme_mv_t *ps_best_mv_8x4, |
376 | | pred_ctxt_t *ps_pred_ctxt, |
377 | | PF_MV_COST_FXN pf_mv_cost_compute, |
378 | | S16 *pi2_sads_4x4_current, |
379 | | S16 *pi2_sads_4x4_east, |
380 | | S16 *pi2_sads_4x4_south) |
381 | 1.47M | { |
382 | | /* These control number of parts and number of pts in grid to search */ |
383 | 1.47M | S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4; |
384 | 1.47M | S32 step_shift_x, step_shift_y; |
385 | 1.47M | S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range; |
386 | | |
387 | 1.47M | S32 min_cost_4x8 = MAX_32BIT_VAL; |
388 | 1.47M | S32 min_cost_8x4 = MAX_32BIT_VAL; |
389 | | |
390 | 1.47M | search_node_t s_search_node; |
391 | 1.47M | s_search_node.i1_ref_idx = i1_ref_idx; |
392 | | |
393 | 1.47M | stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY; |
394 | | /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */ |
395 | 1.47M | step_shift_x = step_shift_y = 1; |
396 | | |
397 | 1.47M | mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x); |
398 | 1.47M | mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y); |
399 | 1.47M | mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x; |
400 | 1.47M | mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y; |
401 | | |
402 | | /* Run 2loops to sweep over the reference area */ |
403 | 26.3M | for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy) |
404 | 24.9M | { |
405 | 595M | for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx) |
406 | 570M | { |
407 | 570M | S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4; |
408 | 570M | S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) + |
409 | 570M | ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range; |
410 | | |
411 | | /* Get SAD by adding SAD for current and neighbour S */ |
412 | 570M | sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos]; |
413 | 570M | sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos]; |
414 | | |
415 | | // fprintf(fp_dump_sad,"%d\t",sad); |
416 | 570M | s_search_node.s_mv.i2_mvx = mvx; |
417 | 570M | s_search_node.s_mv.i2_mvy = mvy; |
418 | | |
419 | 570M | cost_4x8 = cost_8x4 = |
420 | 570M | pf_mv_cost_compute(&s_search_node, ps_pred_ctxt, PART_ID_2Nx2N, MV_RES_FPEL); |
421 | | |
422 | 570M | cost_4x8 += sad_4x8; |
423 | 570M | cost_8x4 += sad_8x4; |
424 | | |
425 | 570M | if(cost_4x8 < min_cost_4x8) |
426 | 14.7M | { |
427 | 14.7M | best_mv_x_4x8 = mvx; |
428 | 14.7M | best_mv_y_4x8 = mvy; |
429 | 14.7M | min_cost_4x8 = cost_4x8; |
430 | 14.7M | } |
431 | 570M | if(cost_8x4 < min_cost_8x4) |
432 | 15.7M | { |
433 | 15.7M | best_mv_x_8x4 = mvx; |
434 | 15.7M | best_mv_y_8x4 = mvy; |
435 | 15.7M | min_cost_8x4 = cost_8x4; |
436 | 15.7M | } |
437 | 570M | } |
438 | 24.9M | } |
439 | | |
440 | 1.47M | ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8; |
441 | 1.47M | ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8; |
442 | | |
443 | 1.47M | ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4; |
444 | 1.47M | ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4; |
445 | 1.47M | } |
446 | | |
447 | | void hme_combine_4x4_sads_and_compute_cost_high_speed( |
448 | | S08 i1_ref_idx, |
449 | | range_prms_t *ps_mv_range, |
450 | | range_prms_t *ps_mv_limit, |
451 | | hme_mv_t *ps_best_mv_4x8, |
452 | | hme_mv_t *ps_best_mv_8x4, |
453 | | pred_ctxt_t *ps_pred_ctxt, |
454 | | PF_MV_COST_FXN pf_mv_cost_compute, |
455 | | S16 *pi2_sads_4x4_current, |
456 | | S16 *pi2_sads_4x4_east, |
457 | | S16 *pi2_sads_4x4_south) |
458 | 2.37M | { |
459 | | /* These control number of parts and number of pts in grid to search */ |
460 | 2.37M | S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4; |
461 | 2.37M | S32 step_shift_x, step_shift_y; |
462 | 2.37M | S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range; |
463 | | |
464 | 2.37M | S32 rnd, lambda, lambda_q_shift; |
465 | | |
466 | 2.37M | S32 min_cost_4x8 = MAX_32BIT_VAL; |
467 | 2.37M | S32 min_cost_8x4 = MAX_32BIT_VAL; |
468 | | |
469 | 2.37M | (void)pf_mv_cost_compute; |
470 | 2.37M | stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED; |
471 | | /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */ |
472 | 2.37M | step_shift_x = step_shift_y = 2; |
473 | | |
474 | 2.37M | mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x); |
475 | 2.37M | mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y); |
476 | 2.37M | mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x; |
477 | 2.37M | mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y; |
478 | | |
479 | 2.37M | lambda = ps_pred_ctxt->lambda; |
480 | 2.37M | lambda_q_shift = ps_pred_ctxt->lambda_q_shift; |
481 | 2.37M | rnd = 1 << (lambda_q_shift - 1); |
482 | | |
483 | 2.37M | ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x)); |
484 | 2.37M | ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y)); |
485 | | |
486 | | /* Run 2loops to sweep over the reference area */ |
487 | 23.0M | for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy) |
488 | 20.7M | { |
489 | 258M | for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx) |
490 | 238M | { |
491 | 238M | S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4; |
492 | | |
493 | 238M | S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) + |
494 | 238M | ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range; |
495 | | |
496 | | /* Get SAD by adding SAD for current and neighbour S */ |
497 | 238M | sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos]; |
498 | 238M | sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos]; |
499 | | |
500 | | // fprintf(fp_dump_sad,"%d\t",sad); |
501 | | |
502 | 238M | cost_4x8 = cost_8x4 = |
503 | 238M | (2 * hme_get_range(ABS(mvx)) - 1) + (2 * hme_get_range(ABS(mvy)) - 1) + i1_ref_idx; |
504 | | |
505 | 238M | cost_4x8 += (mvx != 0) ? 1 : 0; |
506 | 238M | cost_4x8 += (mvy != 0) ? 1 : 0; |
507 | 238M | cost_4x8 = (cost_4x8 * lambda + rnd) >> lambda_q_shift; |
508 | | |
509 | 238M | cost_8x4 += (mvx != 0) ? 1 : 0; |
510 | 238M | cost_8x4 += (mvy != 0) ? 1 : 0; |
511 | 238M | cost_8x4 = (cost_8x4 * lambda + rnd) >> lambda_q_shift; |
512 | | |
513 | 238M | cost_4x8 += sad_4x8; |
514 | 238M | cost_8x4 += sad_8x4; |
515 | | |
516 | 238M | if(cost_4x8 < min_cost_4x8) |
517 | 16.6M | { |
518 | 16.6M | best_mv_x_4x8 = mvx; |
519 | 16.6M | best_mv_y_4x8 = mvy; |
520 | 16.6M | min_cost_4x8 = cost_4x8; |
521 | 16.6M | } |
522 | 238M | if(cost_8x4 < min_cost_8x4) |
523 | 17.1M | { |
524 | 17.1M | best_mv_x_8x4 = mvx; |
525 | 17.1M | best_mv_y_8x4 = mvy; |
526 | 17.1M | min_cost_8x4 = cost_8x4; |
527 | 17.1M | } |
528 | 238M | } |
529 | 20.7M | } |
530 | | |
531 | 2.37M | ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8; |
532 | 2.37M | ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8; |
533 | | |
534 | 2.37M | ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4; |
535 | 2.37M | ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4; |
536 | 2.37M | } |
537 | | |
538 | | /** |
539 | | ******************************************************************************** |
540 | | * @fn hme_store_4x4_sads(hme_search_prms_t *ps_search_prms, |
541 | | * layer_ctxt_t *ps_layer_ctxt) |
542 | | * |
543 | | * @brief Does a 4x4 sad computation on a given range and stores it in memory |
544 | | * |
545 | | * @param[in] ps_search_prms : Search prms structure containing info like |
546 | | * blk dimensions, search range etc |
547 | | * |
548 | | * @param[in] ps_layer_ctxt: All info about this layer |
549 | | * |
550 | | * @param[in] ps_wt_inp_prms: All info about weighted input |
551 | | * |
552 | | * @param[in] e_me_quality_preset: motion estimation quality preset |
553 | | * |
554 | | * @param[in] pi2_sads_4x4: Memory to store all 4x4 SADs for given range |
555 | | * |
556 | | * @return void |
557 | | ******************************************************************************** |
558 | | */ |
559 | | |
560 | | void hme_store_4x4_sads_high_quality( |
561 | | hme_search_prms_t *ps_search_prms, |
562 | | layer_ctxt_t *ps_layer_ctxt, |
563 | | range_prms_t *ps_mv_limit, |
564 | | wgt_pred_ctxt_t *ps_wt_inp_prms, |
565 | | S16 *pi2_sads_4x4) |
566 | 2.24M | { |
567 | 2.24M | S32 sad, i, j; |
568 | | |
569 | | /* Input and reference attributes */ |
570 | 2.24M | U08 *pu1_inp, *pu1_inp_orig, *pu1_ref; |
571 | 2.24M | S32 i4_inp_stride, i4_ref_stride, i4_ref_offset; |
572 | | |
573 | | /* The reference is actually an array of ptrs since there are several */ |
574 | | /* reference id. So an array gets passed form calling function */ |
575 | 2.24M | U08 **ppu1_ref, *pu1_ref_coloc; |
576 | | |
577 | 2.24M | S32 stepy, stepx, step_shift_x, step_shift_y; |
578 | 2.24M | S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range; |
579 | | |
580 | | /* Points to the range limits for mv */ |
581 | 2.24M | range_prms_t *ps_range_prms; |
582 | | |
583 | | /* Reference index to be searched */ |
584 | 2.24M | S32 i4_search_idx = ps_search_prms->i1_ref_idx; |
585 | | /* Using the member 0 to store for all ref. idx. */ |
586 | 2.24M | ps_range_prms = ps_search_prms->aps_mv_range[0]; |
587 | 2.24M | pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx]; |
588 | 2.24M | i4_inp_stride = ps_search_prms->i4_inp_stride; |
589 | | |
590 | | /* Move to the location of the search blk in inp buffer */ |
591 | 2.24M | pu1_inp_orig += ps_search_prms->i4_cu_x_off; |
592 | 2.24M | pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride; |
593 | | |
594 | | /*************************************************************************/ |
595 | | /* we use either input of previously encoded pictures as reference */ |
596 | | /* in coarse layer */ |
597 | | /*************************************************************************/ |
598 | 2.24M | i4_ref_stride = ps_layer_ctxt->i4_inp_stride; |
599 | 2.24M | ppu1_ref = ps_layer_ctxt->ppu1_list_inp; |
600 | | |
601 | | /* colocated position in reference picture */ |
602 | 2.24M | i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off; |
603 | 2.24M | pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset; |
604 | | |
605 | 2.24M | stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY; |
606 | | /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */ |
607 | 2.24M | step_shift_x = step_shift_y = 1; |
608 | | |
609 | 2.24M | mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x); |
610 | 2.24M | mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y); |
611 | 2.24M | mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x; |
612 | 2.24M | mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y; |
613 | | |
614 | | /* Run 2loops to sweep over the reference area */ |
615 | 42.7M | for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy) |
616 | 40.4M | { |
617 | 1.04G | for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx) |
618 | 1.00G | { |
619 | | /* Set up the reference and inp ptr */ |
620 | 1.00G | pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride); |
621 | 1.00G | pu1_inp = pu1_inp_orig; |
622 | | /* SAD computation */ |
623 | 1.00G | { |
624 | 1.00G | sad = 0; |
625 | 5.00G | for(i = 0; i < 4; i++) |
626 | 4.00G | { |
627 | 20.0G | for(j = 0; j < 4; j++) |
628 | 16.0G | { |
629 | 16.0G | sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j]))); |
630 | 16.0G | } |
631 | 4.00G | pu1_inp += i4_inp_stride; |
632 | 4.00G | pu1_ref += i4_ref_stride; |
633 | 4.00G | } |
634 | 1.00G | } |
635 | | |
636 | 1.00G | pi2_sads_4x4 |
637 | 1.00G | [((mvx >> step_shift_x) + mv_x_offset) + |
638 | 1.00G | ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad; |
639 | 1.00G | } |
640 | 40.4M | } |
641 | 2.24M | } |
642 | | |
643 | | void hme_store_4x4_sads_high_speed( |
644 | | hme_search_prms_t *ps_search_prms, |
645 | | layer_ctxt_t *ps_layer_ctxt, |
646 | | range_prms_t *ps_mv_limit, |
647 | | wgt_pred_ctxt_t *ps_wt_inp_prms, |
648 | | S16 *pi2_sads_4x4) |
649 | 3.60M | { |
650 | 3.60M | S32 sad, i, j; |
651 | | |
652 | | /* Input and reference attributes */ |
653 | 3.60M | U08 *pu1_inp, *pu1_inp_orig, *pu1_ref; |
654 | 3.60M | S32 i4_inp_stride, i4_ref_stride, i4_ref_offset; |
655 | | |
656 | | /* The reference is actually an array of ptrs since there are several */ |
657 | | /* reference id. So an array gets passed form calling function */ |
658 | 3.60M | U08 **ppu1_ref, *pu1_ref_coloc; |
659 | | |
660 | 3.60M | S32 stepy, stepx, step_shift_x, step_shift_y; |
661 | 3.60M | S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range; |
662 | | |
663 | | /* Points to the range limits for mv */ |
664 | 3.60M | range_prms_t *ps_range_prms; |
665 | | |
666 | | /* Reference index to be searched */ |
667 | 3.60M | S32 i4_search_idx = ps_search_prms->i1_ref_idx; |
668 | | |
669 | | /* Using the member 0 for all ref. idx */ |
670 | 3.60M | ps_range_prms = ps_search_prms->aps_mv_range[0]; |
671 | 3.60M | pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx]; |
672 | 3.60M | i4_inp_stride = ps_search_prms->i4_inp_stride; |
673 | | |
674 | | /* Move to the location of the search blk in inp buffer */ |
675 | 3.60M | pu1_inp_orig += ps_search_prms->i4_cu_x_off; |
676 | 3.60M | pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride; |
677 | | |
678 | | /*************************************************************************/ |
679 | | /* we use either input of previously encoded pictures as reference */ |
680 | | /* in coarse layer */ |
681 | | /*************************************************************************/ |
682 | 3.60M | i4_ref_stride = ps_layer_ctxt->i4_inp_stride; |
683 | 3.60M | ppu1_ref = ps_layer_ctxt->ppu1_list_inp; |
684 | | |
685 | | /* colocated position in reference picture */ |
686 | 3.60M | i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off; |
687 | 3.60M | pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset; |
688 | | |
689 | 3.60M | stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED; |
690 | | /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */ |
691 | 3.60M | step_shift_x = step_shift_y = 2; |
692 | | |
693 | 3.60M | mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x); |
694 | 3.60M | mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y); |
695 | 3.60M | mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x; |
696 | 3.60M | mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y; |
697 | | |
698 | | /* Run 2loops to sweep over the reference area */ |
699 | 36.0M | for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy) |
700 | 32.4M | { |
701 | 434M | for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx) |
702 | 402M | { |
703 | | /* Set up the reference and inp ptr */ |
704 | 402M | pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride); |
705 | 402M | pu1_inp = pu1_inp_orig; |
706 | | /* SAD computation */ |
707 | 402M | { |
708 | 402M | sad = 0; |
709 | 2.01G | for(i = 0; i < 4; i++) |
710 | 1.60G | { |
711 | 8.04G | for(j = 0; j < 4; j++) |
712 | 6.43G | { |
713 | 6.43G | sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j]))); |
714 | 6.43G | } |
715 | 1.60G | pu1_inp += i4_inp_stride; |
716 | 1.60G | pu1_ref += i4_ref_stride; |
717 | 1.60G | } |
718 | 402M | } |
719 | | |
720 | 402M | pi2_sads_4x4 |
721 | 402M | [((mvx >> step_shift_x) + mv_x_offset) + |
722 | 402M | ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad; |
723 | 402M | } |
724 | 32.4M | } |
725 | 3.60M | } |
726 | | /** |
727 | | ******************************************************************************** |
728 | | * @fn void hme_coarsest(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms) |
729 | | * |
730 | | * @brief Top level entry point for Coarse ME. Runs across blks and searches |
731 | | * at a 4x4 blk granularity by using 4x8 and 8x4 patterns. |
732 | | * |
733 | | * @param[in,out] ps_ctxt: ME Handle |
734 | | * |
735 | | * @param[in] ps_coarse_prms : Coarse layer config params |
736 | | * |
737 | | * @param[in] ps_multi_thrd_ctxt : Multi thread context |
738 | | * |
739 | | * @return None |
740 | | ******************************************************************************** |
741 | | */ |
742 | | void hme_coarsest( |
743 | | coarse_me_ctxt_t *ps_ctxt, |
744 | | coarse_prms_t *ps_coarse_prms, |
745 | | multi_thrd_ctxt_t *ps_multi_thrd_ctxt, |
746 | | WORD32 i4_ping_pong, |
747 | | void **ppv_dep_mngr_hme_sync) |
748 | 156k | { |
749 | 156k | S16 *pi2_cur_ref_sads_4x4; |
750 | 156k | S32 ai4_sad_4x4_block_size[MAX_NUM_REF], ai4_sad_4x4_block_stride[MAX_NUM_REF]; |
751 | 156k | S32 num_rows_coarse; |
752 | 156k | S32 sad_top_offset, sad_current_offset; |
753 | 156k | S32 search_node_top_offset, search_node_left_offset; |
754 | | |
755 | 156k | ME_QUALITY_PRESETS_T e_me_quality_preset = |
756 | 156k | ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets; |
757 | | |
758 | 156k | search_results_t *ps_search_results; |
759 | 156k | mvbank_update_prms_t s_mv_update_prms; |
760 | 156k | BLK_SIZE_T e_search_blk_size = BLK_4x4; |
761 | 156k | hme_search_prms_t s_search_prms_4x8, s_search_prms_8x4, s_search_prms_4x4; |
762 | | |
763 | 156k | S32 global_id_8x4, global_id_4x8; |
764 | | |
765 | | /*************************************************************************/ |
766 | | /* These directly point to the best search result nodes that will be */ |
767 | | /* updated by the search algorithm, rather than have to go through an */ |
768 | | /* elaborate structure */ |
769 | | /*************************************************************************/ |
770 | 156k | search_node_t *aps_best_search_node_8x4[MAX_NUM_REF]; |
771 | 156k | search_node_t *aps_best_search_node_4x8[MAX_NUM_REF]; |
772 | | |
773 | | /* These point to various spatial candts */ |
774 | 156k | search_node_t *ps_candt_8x4_l, *ps_candt_8x4_t, *ps_candt_8x4_tl; |
775 | 156k | search_node_t *ps_candt_4x8_l, *ps_candt_4x8_t, *ps_candt_4x8_tl; |
776 | 156k | search_node_t *ps_candt_zeromv_8x4, *ps_candt_zeromv_4x8; |
777 | 156k | search_node_t *ps_candt_fs_8x4, *ps_candt_fs_4x8; |
778 | 156k | search_node_t as_top_neighbours[4], as_left_neighbours[3]; |
779 | | |
780 | | /* Holds the global mv for a given ref index */ |
781 | 156k | search_node_t s_candt_global[MAX_NUM_REF]; |
782 | | |
783 | | /* All the search candidates */ |
784 | 156k | search_candt_t as_search_candts_8x4[MAX_INIT_CANDTS]; |
785 | 156k | search_candt_t as_search_candts_4x8[MAX_INIT_CANDTS]; |
786 | 156k | search_candt_t *ps_search_candts_8x4, *ps_search_candts_4x8; |
787 | | |
788 | | /* Actual range per blk and the pic level boundaries */ |
789 | 156k | range_prms_t s_range_prms, s_pic_limit, as_mv_limit[MAX_NUM_REF]; |
790 | | |
791 | | /* Current and prev pic layer ctxt at the coarsest layer */ |
792 | 156k | layer_ctxt_t *ps_curr_layer, *ps_prev_layer; |
793 | | |
794 | | /* best mv of full search */ |
795 | 156k | hme_mv_t best_mv_4x8, best_mv_8x4; |
796 | | |
797 | | /* Book keeping at blk level */ |
798 | 156k | S32 blk_x, num_blks_in_pic, num_blks_in_row, num_4x4_blks_in_row; |
799 | | |
800 | 156k | S32 blk_y; |
801 | | |
802 | | /* Block dimensions */ |
803 | 156k | S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4; |
804 | | |
805 | 156k | S32 lambda = ps_coarse_prms->lambda; |
806 | | |
807 | | /* Number of references to search */ |
808 | 156k | S32 i4_num_ref; |
809 | | |
810 | 156k | S32 i4_i, id, i; |
811 | 156k | S08 i1_ref_idx; |
812 | | |
813 | 156k | S32 i4_pic_wd, i4_pic_ht; |
814 | 156k | S32 i4_layer_id; |
815 | | |
816 | 156k | S32 end_of_frame; |
817 | | |
818 | 156k | pf_get_wt_inp fp_get_wt_inp; |
819 | | |
820 | | /* Maximum search iterations around any candidate */ |
821 | 156k | S32 i4_max_iters = ps_coarse_prms->i4_max_iters; |
822 | | |
823 | 156k | ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id]; |
824 | 156k | ps_prev_layer = hme_coarse_get_past_layer_ctxt(ps_ctxt, ps_coarse_prms->i4_layer_id); |
825 | | |
826 | | /* We need only one instance of search results structure */ |
827 | 156k | ps_search_results = &ps_ctxt->s_search_results_8x8; |
828 | | |
829 | 156k | ps_search_candts_8x4 = &as_search_candts_8x4[0]; |
830 | 156k | ps_search_candts_4x8 = &as_search_candts_4x8[0]; |
831 | | |
832 | 156k | end_of_frame = 0; |
833 | | |
834 | 156k | i4_pic_wd = ps_curr_layer->i4_wd; |
835 | 156k | i4_pic_ht = ps_curr_layer->i4_ht; |
836 | | |
837 | 156k | fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list) |
838 | 156k | ->pf_get_wt_inp_8x8; |
839 | | |
840 | 156k | num_rows_coarse = ps_ctxt->i4_num_row_bufs; |
841 | | |
842 | | /*************************************************************************/ |
843 | | /* Coarse Layer always does explicit search. Number of reference frames */ |
844 | | /* to search is a configurable parameter supplied by the application */ |
845 | | /*************************************************************************/ |
846 | 156k | i4_num_ref = ps_coarse_prms->i4_num_ref; |
847 | 156k | i4_layer_id = ps_coarse_prms->i4_layer_id; |
848 | | |
849 | | /*************************************************************************/ |
850 | | /* The search algorithm goes as follows: */ |
851 | | /* */ |
852 | | /* ___ */ |
853 | | /* | e | */ |
854 | | /* ___|___|___ */ |
855 | | /* | c | a | b | */ |
856 | | /* |___|___|___| */ |
857 | | /* | d | */ |
858 | | /* |___| */ |
859 | | /* */ |
860 | | /* For the target block a, we collect best results from 2 8x4 blks */ |
861 | | /* These are c-a and a-b. The 4x8 blks are e-a and a-d */ |
862 | | /* c-a result is already available from results of blk c. a-b is */ |
863 | | /* evaluated in this blk. Likewise e-a result is stored in a row buffer */ |
864 | | /* a-d is evaluated this blk */ |
865 | | /* So we store a row buffer which stores best 4x8 results of all top blk */ |
866 | | /*************************************************************************/ |
867 | | |
868 | | /************************************************************************/ |
869 | | /* Initialize the pointers to the best node. */ |
870 | | /************************************************************************/ |
871 | 383k | for(i4_i = 0; i4_i < i4_num_ref; i4_i++) |
872 | 227k | { |
873 | 227k | aps_best_search_node_8x4[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_2NxN_B]; |
874 | 227k | aps_best_search_node_4x8[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_Nx2N_R]; |
875 | 227k | } |
876 | | |
877 | | /************************************************************************/ |
878 | | /* Initialize the "searchresults" structure. This will set up the number*/ |
879 | | /* of search types, result updates etc */ |
880 | | /************************************************************************/ |
881 | 156k | { |
882 | 156k | S32 num_results_per_part; |
883 | | /* We evaluate 4 types of results per 4x4 blk. 8x4L and 8x4R and */ |
884 | | /* 4x8 T and 4x8B. So if we are to give 4 results, then we need to */ |
885 | | /* only evaluate 1 result per part. In the coarse layer, we are */ |
886 | | /* limited to 2 results max per part, and max of 8 results. */ |
887 | 156k | num_results_per_part = (ps_coarse_prms->num_results + 3) >> 2; |
888 | 156k | hme_init_search_results( |
889 | 156k | ps_search_results, |
890 | 156k | i4_num_ref, |
891 | 156k | ps_coarse_prms->num_results, |
892 | 156k | num_results_per_part, |
893 | 156k | BLK_8x8, |
894 | 156k | 0, |
895 | 156k | 0, |
896 | 156k | ps_ctxt->au1_is_past); |
897 | 156k | } |
898 | | |
899 | | /* Macro updates num_blks_in_pic and num_blks_in_row*/ |
900 | 156k | GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic); |
901 | | |
902 | 156k | num_4x4_blks_in_row = num_blks_in_row + 1; |
903 | | |
904 | 156k | s_mv_update_prms.e_search_blk_size = e_search_blk_size; |
905 | 156k | s_mv_update_prms.i4_num_ref = i4_num_ref; |
906 | 156k | s_mv_update_prms.i4_shift = 0; |
907 | | |
908 | | /* For full search, support 2 or 4 step size */ |
909 | 156k | if(ps_coarse_prms->do_full_search) |
910 | 156k | { |
911 | 156k | ASSERT((ps_coarse_prms->full_search_step == 2) || (ps_coarse_prms->full_search_step == 4)); |
912 | 156k | } |
913 | | |
914 | 383k | for(i4_i = 0; i4_i < i4_num_ref; i4_i++) |
915 | 227k | { |
916 | 227k | S32 blk, delta_poc; |
917 | 227k | S32 mv_x_clip, mv_y_clip; |
918 | | /* Initialize only the first row */ |
919 | 1.19M | for(blk = 0; blk < num_blks_in_row; blk++) |
920 | 965k | { |
921 | 965k | INIT_SEARCH_NODE(&ps_ctxt->aps_best_search_nodes_4x8_n_rows[i4_i][blk], i4_i); |
922 | 965k | } |
923 | | |
924 | 227k | delta_poc = ABS(ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i4_i]); |
925 | | |
926 | | /* Setting search range for different references based on the delta poc */ |
927 | | /*************************************************************************/ |
928 | | /* set the MV limit per ref. pic. */ |
929 | | /* - P pic. : Based on the config params. */ |
930 | | /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */ |
931 | | /*************************************************************************/ |
932 | 227k | { |
933 | | /* TO DO : Remove hard coding of P-P dist. of 4 */ |
934 | 227k | mv_x_clip = (ps_curr_layer->i2_max_mv_x * delta_poc) / 4; |
935 | | |
936 | | /* Only for B/b pic. */ |
937 | 227k | if(1 == ps_ctxt->s_frm_prms.bidir_enabled) |
938 | 31.9k | { |
939 | 31.9k | WORD16 i2_mv_y_per_poc; |
940 | | |
941 | | /* Get abs MAX for symmetric search */ |
942 | 31.9k | i2_mv_y_per_poc = |
943 | 31.9k | MAX(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id], |
944 | 31.9k | (ABS(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id]))); |
945 | | |
946 | 31.9k | mv_y_clip = i2_mv_y_per_poc * delta_poc; |
947 | 31.9k | } |
948 | | /* Set the Config. File Params for P pic. */ |
949 | 195k | else |
950 | 195k | { |
951 | | /* TO DO : Remove hard coding of P-P dist. of 4 */ |
952 | 195k | mv_y_clip = (ps_curr_layer->i2_max_mv_y * delta_poc) / 4; |
953 | 195k | } |
954 | | |
955 | | /* Making mv_x and mv_y range multiple of 4 */ |
956 | 227k | mv_x_clip = (((mv_x_clip + 3) >> 2) << 2); |
957 | 227k | mv_y_clip = (((mv_y_clip + 3) >> 2) << 2); |
958 | | /* Clipping the range of mv_x and mv_y */ |
959 | 227k | mv_x_clip = CLIP3(mv_x_clip, 4, MAX_MVX_SUPPORTED_IN_COARSE_LAYER); |
960 | 227k | mv_y_clip = CLIP3(mv_y_clip, 4, MAX_MVY_SUPPORTED_IN_COARSE_LAYER); |
961 | | |
962 | 227k | as_mv_limit[i4_i].i2_min_x = -mv_x_clip; |
963 | 227k | as_mv_limit[i4_i].i2_min_y = -mv_y_clip; |
964 | 227k | as_mv_limit[i4_i].i2_max_x = mv_x_clip; |
965 | 227k | as_mv_limit[i4_i].i2_max_y = mv_y_clip; |
966 | 227k | } |
967 | | /*Populating SAD block size based on search range */ |
968 | 227k | ai4_sad_4x4_block_size[i4_i] = ((2 * mv_x_clip) / ps_coarse_prms->full_search_step) * |
969 | 227k | ((2 * mv_y_clip) / ps_coarse_prms->full_search_step); |
970 | 227k | ai4_sad_4x4_block_stride[i4_i] = (num_blks_in_row + 1) * ai4_sad_4x4_block_size[i4_i]; |
971 | 227k | } |
972 | | |
973 | 18.9M | for(i = 0; i < 2 * MAX_INIT_CANDTS; i++) |
974 | 18.7M | { |
975 | 18.7M | search_node_t *ps_search_node; |
976 | 18.7M | ps_search_node = &ps_ctxt->s_init_search_node[i]; |
977 | 18.7M | INIT_SEARCH_NODE(ps_search_node, 0); |
978 | 18.7M | } |
979 | 625k | for(i = 0; i < 3; i++) |
980 | 468k | { |
981 | 468k | search_node_t *ps_search_node; |
982 | 468k | ps_search_node = &as_left_neighbours[i]; |
983 | 468k | INIT_SEARCH_NODE(ps_search_node, 0); |
984 | 468k | ps_search_node = &as_top_neighbours[i]; |
985 | 468k | INIT_SEARCH_NODE(ps_search_node, 0); |
986 | 468k | } |
987 | 156k | INIT_SEARCH_NODE(&as_top_neighbours[3], 0); |
988 | | /* Set up place holders to hold the search nodes of each initial candt */ |
989 | 9.53M | for(i = 0; i < MAX_INIT_CANDTS; i++) |
990 | 9.37M | { |
991 | 9.37M | ps_search_candts_8x4[i].ps_search_node = &ps_ctxt->s_init_search_node[i]; |
992 | | |
993 | 9.37M | ps_search_candts_4x8[i].ps_search_node = &ps_ctxt->s_init_search_node[MAX_INIT_CANDTS + i]; |
994 | | |
995 | 9.37M | ps_search_candts_8x4[i].u1_num_steps_refine = (U08)i4_max_iters; |
996 | 9.37M | ps_search_candts_4x8[i].u1_num_steps_refine = (U08)i4_max_iters; |
997 | 9.37M | } |
998 | | |
999 | | /* For Top,TopLeft and Left cand., no need for refinement */ |
1000 | 156k | id = 0; |
1001 | 156k | if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset)) |
1002 | 48.5k | { |
1003 | | /* This search candt has the full search result */ |
1004 | 48.5k | ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node; |
1005 | 48.5k | id++; |
1006 | 48.5k | } |
1007 | | |
1008 | 156k | ps_candt_8x4_l = ps_search_candts_8x4[id].ps_search_node; |
1009 | 156k | ps_search_candts_8x4[id].u1_num_steps_refine = 0; |
1010 | 156k | id++; |
1011 | 156k | ps_candt_8x4_t = ps_search_candts_8x4[id].ps_search_node; |
1012 | 156k | ps_search_candts_8x4[id].u1_num_steps_refine = 0; |
1013 | 156k | id++; |
1014 | 156k | ps_candt_8x4_tl = ps_search_candts_8x4[id].ps_search_node; |
1015 | 156k | ps_search_candts_8x4[id].u1_num_steps_refine = 0; |
1016 | 156k | id++; |
1017 | | /* This search candt stores the global candt */ |
1018 | 156k | global_id_8x4 = id; |
1019 | 156k | id++; |
1020 | | |
1021 | 156k | if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset)) |
1022 | 107k | { |
1023 | | /* This search candt has the full search result */ |
1024 | 107k | ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node; |
1025 | 107k | id++; |
1026 | 107k | } |
1027 | | /* Don't increment id as (0,0) is removed from cand. list. Initializing */ |
1028 | | /* the pointer for hme_init_pred_ctxt_no_encode() */ |
1029 | 156k | ps_candt_zeromv_8x4 = ps_search_candts_8x4[id].ps_search_node; |
1030 | | |
1031 | | /* For Top,TopLeft and Left cand., no need for refinement */ |
1032 | 156k | id = 0; |
1033 | 156k | if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset)) |
1034 | 48.5k | { |
1035 | | /* This search candt has the full search result */ |
1036 | 48.5k | ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node; |
1037 | 48.5k | id++; |
1038 | 48.5k | } |
1039 | | |
1040 | 156k | ps_candt_4x8_l = ps_search_candts_4x8[id].ps_search_node; |
1041 | 156k | ps_search_candts_4x8[id].u1_num_steps_refine = 0; |
1042 | 156k | id++; |
1043 | 156k | ps_candt_4x8_t = ps_search_candts_4x8[id].ps_search_node; |
1044 | 156k | ps_search_candts_4x8[id].u1_num_steps_refine = 0; |
1045 | 156k | id++; |
1046 | 156k | ps_candt_4x8_tl = ps_search_candts_4x8[id].ps_search_node; |
1047 | 156k | ps_search_candts_4x8[id].u1_num_steps_refine = 0; |
1048 | 156k | id++; |
1049 | | /* This search candt stores the global candt */ |
1050 | 156k | global_id_4x8 = id; |
1051 | 156k | id++; |
1052 | 156k | if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset)) |
1053 | 107k | { |
1054 | | /* This search candt has the full search result */ |
1055 | 107k | ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node; |
1056 | 107k | id++; |
1057 | 107k | } |
1058 | | /* Don't increment id4as (0,0) is removed from cand. list. Initializing */ |
1059 | | /* the pointer for hme_init_pred_ctxt_no_encode() */ |
1060 | 156k | ps_candt_zeromv_4x8 = ps_search_candts_4x8[id].ps_search_node; |
1061 | | |
1062 | | /* Zero mv always has 0 mvx and y componnent, ref idx initialized inside */ |
1063 | 156k | ps_candt_zeromv_8x4->s_mv.i2_mvx = 0; |
1064 | 156k | ps_candt_zeromv_8x4->s_mv.i2_mvy = 0; |
1065 | 156k | ps_candt_zeromv_4x8->s_mv.i2_mvx = 0; |
1066 | 156k | ps_candt_zeromv_4x8->s_mv.i2_mvy = 0; |
1067 | | |
1068 | | /* SET UP THE PRED CTXT FOR L0 AND L1 */ |
1069 | 156k | { |
1070 | 156k | S32 pred_lx; |
1071 | | |
1072 | | /* Bottom left always not available */ |
1073 | 156k | as_left_neighbours[2].u1_is_avail = 0; |
1074 | | |
1075 | 468k | for(pred_lx = 0; pred_lx < 2; pred_lx++) |
1076 | 312k | { |
1077 | 312k | pred_ctxt_t *ps_pred_ctxt; |
1078 | | |
1079 | 312k | ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; |
1080 | 312k | hme_init_pred_ctxt_no_encode( |
1081 | 312k | ps_pred_ctxt, |
1082 | 312k | ps_search_results, |
1083 | 312k | as_top_neighbours, |
1084 | 312k | as_left_neighbours, |
1085 | 312k | NULL, |
1086 | 312k | ps_candt_zeromv_8x4, |
1087 | 312k | ps_candt_zeromv_8x4, |
1088 | 312k | pred_lx, |
1089 | 312k | lambda, |
1090 | 312k | ps_coarse_prms->lambda_q_shift, |
1091 | 312k | ps_ctxt->apu1_ref_bits_tlu_lc, |
1092 | 312k | ps_ctxt->ai2_ref_scf); |
1093 | 312k | } |
1094 | 156k | } |
1095 | | |
1096 | | /*************************************************************************/ |
1097 | | /* Initialize the search parameters for search algo with the following */ |
1098 | | /* parameters: No SATD, calculated number of initial candidates, */ |
1099 | | /* No post refinement, initial step size and number of iterations as */ |
1100 | | /* passed by the calling function. */ |
1101 | | /* Also, we use input for this layer search, and not recon. */ |
1102 | | /*************************************************************************/ |
1103 | 156k | if(e_me_quality_preset == ME_XTREME_SPEED_25) |
1104 | 48.5k | s_search_prms_8x4.i4_num_init_candts = 1; |
1105 | 107k | else |
1106 | 107k | s_search_prms_8x4.i4_num_init_candts = id; |
1107 | 156k | s_search_prms_8x4.i4_use_satd = 0; |
1108 | 156k | s_search_prms_8x4.i4_start_step = ps_coarse_prms->i4_start_step; |
1109 | 156k | s_search_prms_8x4.i4_num_steps_post_refine = 0; |
1110 | 156k | s_search_prms_8x4.i4_use_rec = 0; |
1111 | 156k | s_search_prms_8x4.ps_search_candts = ps_search_candts_8x4; |
1112 | 156k | s_search_prms_8x4.e_blk_size = BLK_8x4; |
1113 | 156k | s_search_prms_8x4.i4_max_iters = ps_coarse_prms->i4_max_iters; |
1114 | | /* Coarse layer is always explicit */ |
1115 | 156k | if(ME_MEDIUM_SPEED > e_me_quality_preset) |
1116 | 45.2k | { |
1117 | 45.2k | s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse; |
1118 | 45.2k | } |
1119 | 111k | else |
1120 | 111k | { |
1121 | 111k | s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse_high_speed; |
1122 | 111k | } |
1123 | | |
1124 | 156k | s_search_prms_8x4.i4_inp_stride = 8; |
1125 | 156k | s_search_prms_8x4.i4_cu_x_off = s_search_prms_8x4.i4_cu_y_off = 0; |
1126 | 156k | if(ps_coarse_prms->do_full_search) |
1127 | 156k | s_search_prms_8x4.i4_max_iters = 1; |
1128 | 156k | s_search_prms_8x4.i4_part_mask = (1 << PART_ID_2NxN_B); |
1129 | | /* Using the member 0 to store for all ref. idx. */ |
1130 | 156k | s_search_prms_8x4.aps_mv_range[0] = &s_range_prms; |
1131 | 156k | s_search_prms_8x4.ps_search_results = ps_search_results; |
1132 | 156k | s_search_prms_8x4.full_search_step = ps_coarse_prms->full_search_step; |
1133 | | |
1134 | 156k | s_search_prms_4x8 = s_search_prms_8x4; |
1135 | 156k | s_search_prms_4x8.ps_search_candts = ps_search_candts_4x8; |
1136 | 156k | s_search_prms_4x8.e_blk_size = BLK_4x8; |
1137 | 156k | s_search_prms_4x8.i4_part_mask = (1 << PART_ID_Nx2N_R); |
1138 | | |
1139 | 156k | s_search_prms_4x4 = s_search_prms_8x4; |
1140 | | /* Since s_search_prms_4x4 is used only to computer sad at 4x4 level, search candidate is not used */ |
1141 | 156k | s_search_prms_4x4.ps_search_candts = ps_search_candts_4x8; |
1142 | 156k | s_search_prms_4x4.e_blk_size = BLK_4x4; |
1143 | 156k | s_search_prms_4x4.i4_part_mask = (1 << PART_ID_2Nx2N); |
1144 | | /*************************************************************************/ |
1145 | | /* Picture limit on all 4 sides. This will be used to set mv limits for */ |
1146 | | /* every block given its coordinate. */ |
1147 | | /*************************************************************************/ |
1148 | 156k | SET_PIC_LIMIT( |
1149 | 156k | s_pic_limit, |
1150 | 156k | ps_curr_layer->i4_pad_x_inp, |
1151 | 156k | ps_curr_layer->i4_pad_y_inp, |
1152 | 156k | ps_curr_layer->i4_wd, |
1153 | 156k | ps_curr_layer->i4_ht, |
1154 | 156k | s_search_prms_4x4.i4_num_steps_post_refine); |
1155 | | |
1156 | | /* Pick the global mv from previous reference */ |
1157 | 383k | for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++) |
1158 | 227k | { |
1159 | 227k | if(ME_XTREME_SPEED_25 != e_me_quality_preset) |
1160 | 185k | { |
1161 | | /* Distance of current pic from reference */ |
1162 | 185k | S32 i4_delta_poc; |
1163 | | |
1164 | 185k | hme_mv_t s_mv; |
1165 | 185k | i4_delta_poc = ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]; |
1166 | | |
1167 | 185k | hme_get_global_mv(ps_prev_layer, &s_mv, i4_delta_poc); |
1168 | | |
1169 | 185k | s_candt_global[i1_ref_idx].s_mv.i2_mvx = s_mv.i2_mv_x; |
1170 | 185k | s_candt_global[i1_ref_idx].s_mv.i2_mvy = s_mv.i2_mv_y; |
1171 | 185k | s_candt_global[i1_ref_idx].i1_ref_idx = i1_ref_idx; |
1172 | | |
1173 | | /*********************************************************************/ |
1174 | | /* Initialize the histogram for each reference index in current */ |
1175 | | /* layer ctxt */ |
1176 | | /*********************************************************************/ |
1177 | 185k | hme_init_histogram( |
1178 | 185k | ps_ctxt->aps_mv_hist[i1_ref_idx], |
1179 | 185k | (S32)as_mv_limit[i1_ref_idx].i2_max_x, |
1180 | 185k | (S32)as_mv_limit[i1_ref_idx].i2_max_y); |
1181 | 185k | } |
1182 | | |
1183 | | /*********************************************************************/ |
1184 | | /* Initialize the dyn. search range params. for each reference index */ |
1185 | | /* in current layer ctxt */ |
1186 | | /*********************************************************************/ |
1187 | | /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
1188 | 227k | if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) |
1189 | 176k | { |
1190 | 176k | INIT_DYN_SEARCH_PRMS( |
1191 | 176k | &ps_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][i1_ref_idx], |
1192 | 176k | ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]); |
1193 | 176k | } |
1194 | 227k | } |
1195 | | |
1196 | | /*************************************************************************/ |
1197 | | /* if exhaustive algorithmm then we use only 1 candt 0, 0 */ |
1198 | | /* else we use a lot of causal and non causal candts */ |
1199 | | /* finally set number to the configured number of candts */ |
1200 | | /*************************************************************************/ |
1201 | | |
1202 | | /* Loop in raster order over each 4x4 blk in a given row till end of frame */ |
1203 | 1.16M | while(0 == end_of_frame) |
1204 | 1.01M | { |
1205 | 1.01M | job_queue_t *ps_job; |
1206 | 1.01M | void *pv_hme_dep_mngr; |
1207 | 1.01M | WORD32 offset_val, check_dep_pos, set_dep_pos; |
1208 | | |
1209 | | /* Get the current layer HME Dep Mngr */ |
1210 | | /* Note : Use layer_id - 1 in HME layers */ |
1211 | 1.01M | pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_coarse_prms->i4_layer_id - 1]; |
1212 | | |
1213 | | /* Get the current row from the job queue */ |
1214 | 1.01M | ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job( |
1215 | 1.01M | ps_multi_thrd_ctxt, ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type, 1, i4_ping_pong); |
1216 | | |
1217 | | /* If all rows are done, set the end of process flag to 1, */ |
1218 | | /* and the current row to -1 */ |
1219 | 1.01M | if(NULL == ps_job) |
1220 | 156k | { |
1221 | 156k | blk_y = -1; |
1222 | 156k | end_of_frame = 1; |
1223 | 156k | } |
1224 | 854k | else |
1225 | 854k | { |
1226 | 854k | ASSERT(ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type == ps_job->i4_pre_enc_task_type); |
1227 | | |
1228 | | /* Obtain the current row's details from the job */ |
1229 | 854k | blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no; |
1230 | | |
1231 | 854k | if(1 == ps_ctxt->s_frm_prms.is_i_pic) |
1232 | 289k | { |
1233 | | /* set the output dependency of current row */ |
1234 | 289k | ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); |
1235 | 289k | continue; |
1236 | 289k | } |
1237 | | |
1238 | | /* Set Variables for Dep. Checking and Setting */ |
1239 | 564k | set_dep_pos = blk_y + 1; |
1240 | 564k | if(blk_y > 0) |
1241 | 458k | { |
1242 | 458k | offset_val = 2; |
1243 | 458k | check_dep_pos = blk_y - 1; |
1244 | 458k | } |
1245 | 105k | else |
1246 | 105k | { |
1247 | | /* First row should run without waiting */ |
1248 | 105k | offset_val = -1; |
1249 | 105k | check_dep_pos = 0; |
1250 | 105k | } |
1251 | | |
1252 | | /* Loop over all the blocks in current row */ |
1253 | | /* One block extra, since the last block in a row needs East block */ |
1254 | 3.52M | for(blk_x = 0; blk_x < (num_blks_in_row + 1); blk_x++) |
1255 | 2.96M | { |
1256 | | /* Wait till top row block is processed */ |
1257 | | /* Currently checking till top right block*/ |
1258 | 2.96M | if(blk_x < (num_blks_in_row)) |
1259 | 2.39M | { |
1260 | 2.39M | ihevce_dmgr_chk_row_row_sync( |
1261 | 2.39M | pv_hme_dep_mngr, |
1262 | 2.39M | blk_x, |
1263 | 2.39M | offset_val, |
1264 | 2.39M | check_dep_pos, |
1265 | 2.39M | 0, /* Col Tile No. : Not supported in PreEnc*/ |
1266 | 2.39M | ps_ctxt->thrd_id); |
1267 | 2.39M | } |
1268 | | |
1269 | | /***************************************************************/ |
1270 | | /* Get Weighted input for all references */ |
1271 | | /***************************************************************/ |
1272 | 2.96M | fp_get_wt_inp( |
1273 | 2.96M | ps_curr_layer, |
1274 | 2.96M | &ps_ctxt->s_wt_pred, |
1275 | 2.96M | 1 << (blk_size_shift + 1), |
1276 | 2.96M | blk_x << blk_size_shift, |
1277 | 2.96M | (blk_y - 1) << blk_size_shift, |
1278 | 2.96M | 1 << (blk_size_shift + 1), |
1279 | 2.96M | i4_num_ref, |
1280 | 2.96M | ps_ctxt->i4_wt_pred_enable_flag); |
1281 | | |
1282 | | /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */ |
1283 | 2.96M | hme_reset_search_results( |
1284 | 2.96M | ps_search_results, |
1285 | 2.96M | s_search_prms_8x4.i4_part_mask | s_search_prms_4x8.i4_part_mask, |
1286 | 2.96M | MV_RES_FPEL); |
1287 | | |
1288 | | /* Compute the search node offsets */ |
1289 | | /* MAX is used to clip when left and top neighbours are not availbale at coarse boundaries */ |
1290 | 2.96M | search_node_top_offset = |
1291 | 2.96M | blk_x + ps_ctxt->ai4_row_index[MAX((blk_y - 2), 0)] * num_blks_in_row; |
1292 | 2.96M | search_node_left_offset = |
1293 | 2.96M | MAX((blk_x - 1), 0) + |
1294 | 2.96M | ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * num_blks_in_row; |
1295 | | |
1296 | | /* Input offset: wrt CU start. Offset for South block */ |
1297 | 2.96M | s_search_prms_4x4.i4_cu_x_off = 0; |
1298 | 2.96M | s_search_prms_4x4.i4_cu_y_off = 4; |
1299 | 2.96M | s_search_prms_4x4.i4_inp_stride = 8; |
1300 | 2.96M | s_search_prms_4x4.i4_x_off = blk_x << blk_size_shift; |
1301 | 2.96M | s_search_prms_4x4.i4_y_off = blk_y << blk_size_shift; |
1302 | | |
1303 | 2.96M | s_search_prms_4x8.i4_x_off = s_search_prms_8x4.i4_x_off = blk_x << blk_size_shift; |
1304 | 2.96M | s_search_prms_4x8.i4_y_off = s_search_prms_8x4.i4_y_off = (blk_y - 1) |
1305 | 2.96M | << blk_size_shift; |
1306 | | |
1307 | | /* This layer will always use explicit ME */ |
1308 | | /* Loop across different Ref IDx */ |
1309 | 8.80M | for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++) |
1310 | 5.84M | { |
1311 | 5.84M | sad_top_offset = (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) + |
1312 | 5.84M | ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * |
1313 | 5.84M | ai4_sad_4x4_block_stride[i1_ref_idx]; |
1314 | 5.84M | sad_current_offset = |
1315 | 5.84M | (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) + |
1316 | 5.84M | ps_ctxt->ai4_row_index[blk_y] * ai4_sad_4x4_block_stride[i1_ref_idx]; |
1317 | | |
1318 | | /* Initialize search node if blk_x == 0, as it doesn't have left neighbours */ |
1319 | 5.84M | if(0 == blk_x) |
1320 | 1.11M | INIT_SEARCH_NODE( |
1321 | 5.84M | &ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx][blk_x], |
1322 | 5.84M | i1_ref_idx); |
1323 | | |
1324 | 5.84M | pi2_cur_ref_sads_4x4 = ps_ctxt->api2_sads_4x4_n_rows[i1_ref_idx]; |
1325 | | |
1326 | | /* Initialize changing params here */ |
1327 | 5.84M | s_search_prms_8x4.i1_ref_idx = i1_ref_idx; |
1328 | 5.84M | s_search_prms_4x8.i1_ref_idx = i1_ref_idx; |
1329 | 5.84M | s_search_prms_4x4.i1_ref_idx = i1_ref_idx; |
1330 | | |
1331 | 5.84M | if(num_blks_in_row == blk_x) |
1332 | 1.11M | { |
1333 | 1.11M | S16 *pi2_sads_4x4_current; |
1334 | | /* Since the current 4x4 block will be a padded region, which may not match with any of the reference */ |
1335 | 1.11M | pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset; |
1336 | | |
1337 | 1.11M | memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]); |
1338 | 1.11M | } |
1339 | | |
1340 | | /* SAD to be computed and stored for the 4x4 block in 1st row and the last block of all rows*/ |
1341 | 5.84M | if((0 == blk_y) || (num_blks_in_row == blk_x)) |
1342 | 1.99M | { |
1343 | 1.99M | S16 *pi2_sads_4x4_current; |
1344 | | /* Computer 4x4 SADs for current block */ |
1345 | | /* Pointer to store SADs */ |
1346 | 1.99M | pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset; |
1347 | | |
1348 | 1.99M | hme_derive_worst_case_search_range( |
1349 | 1.99M | &s_range_prms, |
1350 | 1.99M | &s_pic_limit, |
1351 | 1.99M | &as_mv_limit[i1_ref_idx], |
1352 | 1.99M | blk_x << blk_size_shift, |
1353 | 1.99M | blk_y << blk_size_shift, |
1354 | 1.99M | blk_wd, |
1355 | 1.99M | blk_ht); |
1356 | | |
1357 | 1.99M | if(ME_PRISTINE_QUALITY >= e_me_quality_preset) |
1358 | 775k | { |
1359 | 775k | ((ihevce_me_optimised_function_list_t *) |
1360 | 775k | ps_ctxt->pv_me_optimised_function_list) |
1361 | 775k | ->pf_store_4x4_sads_high_quality( |
1362 | 775k | &s_search_prms_4x4, |
1363 | 775k | ps_curr_layer, |
1364 | 775k | &as_mv_limit[i1_ref_idx], |
1365 | 775k | &ps_ctxt->s_wt_pred, |
1366 | 775k | pi2_sads_4x4_current); |
1367 | 775k | } |
1368 | 1.22M | else |
1369 | 1.22M | { |
1370 | 1.22M | ((ihevce_me_optimised_function_list_t *) |
1371 | 1.22M | ps_ctxt->pv_me_optimised_function_list) |
1372 | 1.22M | ->pf_store_4x4_sads_high_speed( |
1373 | 1.22M | &s_search_prms_4x4, |
1374 | 1.22M | ps_curr_layer, |
1375 | 1.22M | &as_mv_limit[i1_ref_idx], |
1376 | 1.22M | &ps_ctxt->s_wt_pred, |
1377 | 1.22M | pi2_sads_4x4_current); |
1378 | 1.22M | } |
1379 | 1.99M | } |
1380 | 3.84M | else |
1381 | 3.84M | { |
1382 | | /* For the zero mv candt, the ref idx to be modified */ |
1383 | 3.84M | ps_candt_zeromv_8x4->i1_ref_idx = i1_ref_idx; |
1384 | 3.84M | ps_candt_zeromv_4x8->i1_ref_idx = i1_ref_idx; |
1385 | | |
1386 | 3.84M | if(ME_XTREME_SPEED_25 != e_me_quality_preset) |
1387 | 3.25M | { |
1388 | | /* For the global mvs alone, the search node points to a local variable */ |
1389 | 3.25M | ps_search_candts_8x4[global_id_8x4].ps_search_node = |
1390 | 3.25M | &s_candt_global[i1_ref_idx]; |
1391 | 3.25M | ps_search_candts_4x8[global_id_4x8].ps_search_node = |
1392 | 3.25M | &s_candt_global[i1_ref_idx]; |
1393 | 3.25M | } |
1394 | | |
1395 | 3.84M | hme_get_spatial_candt( |
1396 | 3.84M | ps_curr_layer, |
1397 | 3.84M | BLK_4x4, |
1398 | 3.84M | blk_x, |
1399 | 3.84M | blk_y - 1, |
1400 | 3.84M | i1_ref_idx, |
1401 | 3.84M | as_top_neighbours, |
1402 | 3.84M | as_left_neighbours, |
1403 | 3.84M | 0, |
1404 | 3.84M | 1, |
1405 | 3.84M | 0, |
1406 | 3.84M | 0); |
1407 | | /* set up the various candts */ |
1408 | 3.84M | *ps_candt_4x8_l = as_left_neighbours[0]; |
1409 | 3.84M | *ps_candt_4x8_t = as_top_neighbours[1]; |
1410 | 3.84M | *ps_candt_4x8_tl = as_top_neighbours[0]; |
1411 | 3.84M | *ps_candt_8x4_l = *ps_candt_4x8_l; |
1412 | 3.84M | *ps_candt_8x4_tl = *ps_candt_4x8_tl; |
1413 | 3.84M | *ps_candt_8x4_t = *ps_candt_4x8_t; |
1414 | | |
1415 | 3.84M | { |
1416 | 3.84M | S32 pred_lx; |
1417 | 3.84M | S16 *pi2_sads_4x4_current, *pi2_sads_4x4_top; |
1418 | 3.84M | pred_ctxt_t *ps_pred_ctxt; |
1419 | 3.84M | PF_MV_COST_FXN pf_mv_cost_compute; |
1420 | | |
1421 | | /* Computer 4x4 SADs for current block */ |
1422 | | /* Pointer to store SADs */ |
1423 | 3.84M | pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset; |
1424 | | |
1425 | 3.84M | hme_derive_worst_case_search_range( |
1426 | 3.84M | &s_range_prms, |
1427 | 3.84M | &s_pic_limit, |
1428 | 3.84M | &as_mv_limit[i1_ref_idx], |
1429 | 3.84M | blk_x << blk_size_shift, |
1430 | 3.84M | blk_y << blk_size_shift, |
1431 | 3.84M | blk_wd, |
1432 | 3.84M | blk_ht); |
1433 | 3.84M | if(i4_pic_ht == blk_y) |
1434 | 0 | { |
1435 | 0 | memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]); |
1436 | 0 | } |
1437 | 3.84M | else |
1438 | 3.84M | { |
1439 | 3.84M | if(ME_PRISTINE_QUALITY >= e_me_quality_preset) |
1440 | 1.47M | { |
1441 | 1.47M | ((ihevce_me_optimised_function_list_t *) |
1442 | 1.47M | ps_ctxt->pv_me_optimised_function_list) |
1443 | 1.47M | ->pf_store_4x4_sads_high_quality( |
1444 | 1.47M | &s_search_prms_4x4, |
1445 | 1.47M | ps_curr_layer, |
1446 | 1.47M | &as_mv_limit[i1_ref_idx], |
1447 | 1.47M | &ps_ctxt->s_wt_pred, |
1448 | 1.47M | pi2_sads_4x4_current); |
1449 | 1.47M | } |
1450 | 2.37M | else |
1451 | 2.37M | { |
1452 | 2.37M | ((ihevce_me_optimised_function_list_t *) |
1453 | 2.37M | ps_ctxt->pv_me_optimised_function_list) |
1454 | 2.37M | ->pf_store_4x4_sads_high_speed( |
1455 | 2.37M | &s_search_prms_4x4, |
1456 | 2.37M | ps_curr_layer, |
1457 | 2.37M | &as_mv_limit[i1_ref_idx], |
1458 | 2.37M | &ps_ctxt->s_wt_pred, |
1459 | 2.37M | pi2_sads_4x4_current); |
1460 | 2.37M | } |
1461 | 3.84M | } |
1462 | | /* Set pred direction to L0 or L1 */ |
1463 | 3.84M | pred_lx = 1 - ps_search_results->pu1_is_past[i1_ref_idx]; |
1464 | | |
1465 | | /* Suitable context (L0 or L1) */ |
1466 | 3.84M | ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx]; |
1467 | | |
1468 | | /* Coarse layer is always explicit */ |
1469 | 3.84M | if(ME_PRISTINE_QUALITY > e_me_quality_preset) |
1470 | 0 | { |
1471 | 0 | pf_mv_cost_compute = compute_mv_cost_coarse; |
1472 | 0 | } |
1473 | 3.84M | else |
1474 | 3.84M | { |
1475 | | /* Cost function is not called in high speed case. Below one is just a dummy function */ |
1476 | 3.84M | pf_mv_cost_compute = compute_mv_cost_coarse_high_speed; |
1477 | 3.84M | } |
1478 | | |
1479 | | /*********************************************************************/ |
1480 | | /* Now, compute the mv for the top block */ |
1481 | | /*********************************************************************/ |
1482 | 3.84M | pi2_sads_4x4_top = pi2_cur_ref_sads_4x4 + sad_top_offset; |
1483 | | |
1484 | | /*********************************************************************/ |
1485 | | /* For every blk in the picture, the search range needs to be derived*/ |
1486 | | /* Any blk can have any mv, but practical search constraints are */ |
1487 | | /* imposed by the picture boundary and amt of padding. */ |
1488 | | /*********************************************************************/ |
1489 | 3.84M | hme_derive_search_range( |
1490 | 3.84M | &s_range_prms, |
1491 | 3.84M | &s_pic_limit, |
1492 | 3.84M | &as_mv_limit[i1_ref_idx], |
1493 | 3.84M | blk_x << blk_size_shift, |
1494 | 3.84M | (blk_y - 1) << blk_size_shift, |
1495 | 3.84M | blk_wd, |
1496 | 3.84M | blk_ht); |
1497 | | |
1498 | | /* Computer the mv for the top block */ |
1499 | 3.84M | if(ME_PRISTINE_QUALITY >= e_me_quality_preset) |
1500 | 1.47M | { |
1501 | 1.47M | ((ihevce_me_optimised_function_list_t *) |
1502 | 1.47M | ps_ctxt->pv_me_optimised_function_list) |
1503 | 1.47M | ->pf_combine_4x4_sads_and_compute_cost_high_quality( |
1504 | 1.47M | i1_ref_idx, |
1505 | 1.47M | &s_range_prms, /* Both 4x8 and 8x4 has same search range */ |
1506 | 1.47M | &as_mv_limit[i1_ref_idx], |
1507 | 1.47M | &best_mv_4x8, |
1508 | 1.47M | &best_mv_8x4, |
1509 | 1.47M | ps_pred_ctxt, |
1510 | 1.47M | pf_mv_cost_compute, |
1511 | 1.47M | pi2_sads_4x4_top, /* Current SAD block */ |
1512 | 1.47M | (pi2_sads_4x4_top + |
1513 | 1.47M | ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */ |
1514 | 1.47M | pi2_sads_4x4_current); /* South SAD block */ |
1515 | 1.47M | } |
1516 | 2.37M | else |
1517 | 2.37M | { |
1518 | 2.37M | ((ihevce_me_optimised_function_list_t *) |
1519 | 2.37M | ps_ctxt->pv_me_optimised_function_list) |
1520 | 2.37M | ->pf_combine_4x4_sads_and_compute_cost_high_speed( |
1521 | 2.37M | i1_ref_idx, |
1522 | 2.37M | &s_range_prms, /* Both 4x8 and 8x4 has same search range */ |
1523 | 2.37M | &as_mv_limit[i1_ref_idx], |
1524 | 2.37M | &best_mv_4x8, |
1525 | 2.37M | &best_mv_8x4, |
1526 | 2.37M | ps_pred_ctxt, |
1527 | 2.37M | pf_mv_cost_compute, |
1528 | 2.37M | pi2_sads_4x4_top, /* Current SAD block */ |
1529 | 2.37M | (pi2_sads_4x4_top + |
1530 | 2.37M | ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */ |
1531 | 2.37M | pi2_sads_4x4_current); /* South SAD block */ |
1532 | 2.37M | } |
1533 | | |
1534 | 3.84M | ps_candt_fs_4x8->s_mv.i2_mvx = best_mv_4x8.i2_mv_x; |
1535 | 3.84M | ps_candt_fs_4x8->s_mv.i2_mvy = best_mv_4x8.i2_mv_y; |
1536 | 3.84M | ps_candt_fs_4x8->i1_ref_idx = i1_ref_idx; |
1537 | | |
1538 | 3.84M | ps_candt_fs_8x4->s_mv.i2_mvx = best_mv_8x4.i2_mv_x; |
1539 | 3.84M | ps_candt_fs_8x4->s_mv.i2_mvy = best_mv_8x4.i2_mv_y; |
1540 | 3.84M | ps_candt_fs_8x4->i1_ref_idx = i1_ref_idx; |
1541 | 3.84M | } |
1542 | | |
1543 | | /* call the appropriate Search Algo for 4x8S. The 4x8N would */ |
1544 | | /* have already been called by top block */ |
1545 | 3.84M | hme_pred_search_square_stepn( |
1546 | 3.84M | &s_search_prms_8x4, |
1547 | 3.84M | ps_curr_layer, |
1548 | 3.84M | &ps_ctxt->s_wt_pred, |
1549 | 3.84M | e_me_quality_preset, |
1550 | 3.84M | (ihevce_me_optimised_function_list_t *) |
1551 | 3.84M | ps_ctxt->pv_me_optimised_function_list |
1552 | | |
1553 | 3.84M | ); |
1554 | | |
1555 | | /* Call the appropriate search algo for 8x4E */ |
1556 | 3.84M | hme_pred_search_square_stepn( |
1557 | 3.84M | &s_search_prms_4x8, |
1558 | 3.84M | ps_curr_layer, |
1559 | 3.84M | &ps_ctxt->s_wt_pred, |
1560 | 3.84M | e_me_quality_preset, |
1561 | 3.84M | (ihevce_me_optimised_function_list_t *) |
1562 | 3.84M | ps_ctxt->pv_me_optimised_function_list); |
1563 | | |
1564 | 3.84M | if(ME_XTREME_SPEED_25 != e_me_quality_preset) |
1565 | 3.25M | { |
1566 | | /* Histogram updates across different Ref ID for global MV */ |
1567 | 3.25M | hme_update_histogram( |
1568 | 3.25M | ps_ctxt->aps_mv_hist[i1_ref_idx], |
1569 | 3.25M | aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvx, |
1570 | 3.25M | aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvy); |
1571 | 3.25M | hme_update_histogram( |
1572 | 3.25M | ps_ctxt->aps_mv_hist[i1_ref_idx], |
1573 | 3.25M | aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvx, |
1574 | 3.25M | aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvy); |
1575 | 3.25M | } |
1576 | | |
1577 | | /* update the best results to the mv bank */ |
1578 | 3.84M | hme_update_mv_bank_coarse( |
1579 | 3.84M | ps_search_results, |
1580 | 3.84M | ps_curr_layer->ps_layer_mvbank, |
1581 | 3.84M | blk_x, |
1582 | 3.84M | (blk_y - 1), |
1583 | 3.84M | ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + |
1584 | 3.84M | search_node_top_offset, /* Top Candidate */ |
1585 | 3.84M | ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + |
1586 | 3.84M | search_node_left_offset, /* Left candidate */ |
1587 | 3.84M | i1_ref_idx, |
1588 | 3.84M | &s_mv_update_prms); |
1589 | | |
1590 | | /* Copy the best search result to 5 row array for future use */ |
1591 | 3.84M | *(ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + blk_x + |
1592 | 3.84M | ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) = |
1593 | 3.84M | *(aps_best_search_node_4x8[i1_ref_idx]); |
1594 | | |
1595 | 3.84M | *(ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + blk_x + |
1596 | 3.84M | ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) = |
1597 | 3.84M | *(aps_best_search_node_8x4[i1_ref_idx]); |
1598 | | |
1599 | | /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ |
1600 | | /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */ |
1601 | 3.84M | if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled) |
1602 | 3.15M | { |
1603 | 3.15M | WORD32 num_mvs, i, j; |
1604 | 3.15M | search_node_t *aps_search_nodes[4]; |
1605 | | /* Best results for 8x4R and 4x8B blocks */ |
1606 | 3.15M | search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b; |
1607 | | |
1608 | 3.15M | num_mvs = ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; |
1609 | | |
1610 | | /*************************************************************************/ |
1611 | | /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */ |
1612 | | /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp. */ |
1613 | | /* If number of results to be stored is 4, then we store all these 4 */ |
1614 | | /* results, else we pick best ones */ |
1615 | | /*************************************************************************/ |
1616 | 3.15M | ps_search_node_8x4_r = |
1617 | 3.15M | ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B]; |
1618 | 3.15M | ps_search_node_4x8_b = |
1619 | 3.15M | ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R]; |
1620 | | |
1621 | 3.15M | ASSERT(num_mvs <= 4); |
1622 | | |
1623 | | /* Doing this to sort best results */ |
1624 | 3.15M | aps_search_nodes[0] = ps_search_node_8x4_r; |
1625 | 3.15M | aps_search_nodes[1] = ps_search_node_4x8_b; |
1626 | 3.15M | aps_search_nodes[2] = |
1627 | 3.15M | ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + |
1628 | 3.15M | search_node_left_offset; /* Left candidate */ |
1629 | 3.15M | aps_search_nodes[3] = |
1630 | 3.15M | ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + |
1631 | 3.15M | search_node_top_offset; /* Top Candidate */ |
1632 | | |
1633 | | /* Note : Need to be resolved!!! */ |
1634 | | /* Added this to match with "hme_update_mv_bank_coarse" */ |
1635 | 3.15M | if(num_mvs != 4) |
1636 | 0 | { |
1637 | | /* Run through the results, store them in best to worst order */ |
1638 | 0 | for(i = 0; i < num_mvs; i++) |
1639 | 0 | { |
1640 | 0 | for(j = i + 1; j < 4; j++) |
1641 | 0 | { |
1642 | 0 | if(aps_search_nodes[j]->i4_tot_cost < |
1643 | 0 | aps_search_nodes[i]->i4_tot_cost) |
1644 | 0 | { |
1645 | 0 | SWAP_HME( |
1646 | 0 | aps_search_nodes[j], |
1647 | 0 | aps_search_nodes[i], |
1648 | 0 | search_node_t *); |
1649 | 0 | } |
1650 | 0 | } |
1651 | 0 | } |
1652 | 0 | } |
1653 | | |
1654 | | /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */ |
1655 | 15.7M | for(i = 0; i < num_mvs; i++) |
1656 | 12.6M | { |
1657 | 12.6M | hme_update_dynamic_search_params( |
1658 | 12.6M | &ps_ctxt->s_coarse_dyn_range_prms |
1659 | 12.6M | .as_dyn_range_prms[i4_layer_id][i1_ref_idx], |
1660 | 12.6M | aps_search_nodes[i]->s_mv.i2_mvy); |
1661 | 12.6M | } |
1662 | 3.15M | } |
1663 | 3.84M | } |
1664 | 5.84M | } |
1665 | | |
1666 | | /* Update the number of blocks processed in the current row */ |
1667 | 2.96M | ihevce_dmgr_set_row_row_sync( |
1668 | 2.96M | pv_hme_dep_mngr, |
1669 | 2.96M | (blk_x + 1), |
1670 | 2.96M | blk_y, |
1671 | 2.96M | 0 /* Col Tile No. : Not supported in PreEnc*/); |
1672 | 2.96M | } |
1673 | | |
1674 | | /* set the output dependency after completion of row */ |
1675 | 564k | ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong); |
1676 | 564k | } |
1677 | 1.01M | } |
1678 | | |
1679 | 156k | return; |
1680 | 156k | } |