/src/libavc/encoder/ime.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | /** |
21 | | ******************************************************************************* |
22 | | * @file |
23 | | * ime.c |
24 | | * |
25 | | * @brief |
26 | | * This file contains functions needed for computing motion vectors of a |
27 | | * 16x16 block |
28 | | * |
29 | | * @author |
30 | | * Ittiam |
31 | | * |
32 | | * @par List of Functions: |
33 | | * - ime_diamond_search_16x16 |
34 | | * - ime_evaluate_init_srchposn_16x16 |
35 | | * - ime_full_pel_motion_estimation_16x16 |
36 | | * - ime_sub_pel_motion_estimation_16x16 |
37 | | * - ime_compute_skip_cost |
38 | | * |
39 | | * @remarks |
40 | | * None |
41 | | * |
42 | | ******************************************************************************* |
43 | | */ |
44 | | |
45 | | /*****************************************************************************/ |
46 | | /* File Includes */ |
47 | | /*****************************************************************************/ |
48 | | |
49 | | /* System include files */ |
50 | | #include <stdio.h> |
51 | | #include <assert.h> |
52 | | #include <limits.h> |
53 | | #include <string.h> |
54 | | |
55 | | /* User include files */ |
56 | | #include "ime_typedefs.h" |
57 | | #include "ime_distortion_metrics.h" |
58 | | #include "ime_defs.h" |
59 | | #include "ime_structs.h" |
60 | | #include "ime.h" |
61 | | #include "ime_macros.h" |
62 | | #include "ime_statistics.h" |
63 | | |
64 | | /** |
65 | | ******************************************************************************* |
66 | | * |
67 | | * @brief Diamond Search |
68 | | * |
69 | | * @par Description: |
70 | | * This function computes the sad at vertices of several layers of diamond grid |
71 | | * at a time. The number of layers of diamond grid that would be evaluated is |
72 | | * configurable.The function computes the sad at vertices of a diamond grid. If |
73 | | * the sad at the center of the diamond grid is lesser than the sad at any other |
74 | | * point of the diamond grid, the function marks the candidate Mb partition as |
75 | | * mv. |
76 | | * |
77 | | * @param[in] ps_me_ctxt |
78 | | * pointer to me context |
79 | | * |
80 | | * @param[in] i4_reflist |
81 | | * ref list |
82 | | * |
83 | | * @returns mv pair & corresponding distortion and cost |
84 | | * |
85 | | * @remarks Diamond Srch, radius is 1 |
86 | | * |
87 | | ******************************************************************************* |
88 | | */ |
89 | | void ime_diamond_search_16x16(me_ctxt_t *ps_me_ctxt, WORD32 i4_reflist) |
90 | 259k | { |
91 | | /* MB partition info */ |
92 | 259k | mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; |
93 | | |
94 | | /* lagrange parameter */ |
95 | 259k | UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; |
96 | | |
97 | | /* srch range*/ |
98 | 259k | WORD32 i4_srch_range_n = ps_me_ctxt->i4_srch_range_n; |
99 | 259k | WORD32 i4_srch_range_s = ps_me_ctxt->i4_srch_range_s; |
100 | 259k | WORD32 i4_srch_range_e = ps_me_ctxt->i4_srch_range_e; |
101 | 259k | WORD32 i4_srch_range_w = ps_me_ctxt->i4_srch_range_w; |
102 | | |
103 | | /* enabled fast sad computation */ |
104 | | // UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; |
105 | | |
106 | | /* pointer to src macro block */ |
107 | 259k | UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; |
108 | 259k | UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; |
109 | | |
110 | | /* strides */ |
111 | 259k | WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; |
112 | 259k | WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; |
113 | | |
114 | | /* least cost */ |
115 | 259k | WORD32 i4_cost_least = ps_mb_part->i4_mb_cost; |
116 | | |
117 | | /* least sad */ |
118 | 259k | WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; |
119 | | |
120 | | /* mv pair */ |
121 | 259k | WORD16 i2_mvx, i2_mvy; |
122 | | |
123 | | /* mv bits */ |
124 | 259k | UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; |
125 | | |
126 | | /* temp var */ |
127 | 259k | WORD32 i4_cost[4]; |
128 | 259k | WORD32 i4_sad[4]; |
129 | 259k | UWORD8 *pu1_ref; |
130 | 259k | WORD16 i2_mv_u_x, i2_mv_u_y; |
131 | | |
132 | | /* Diamond search Iteration Max Cnt */ |
133 | 259k | UWORD32 u4_num_layers = ps_me_ctxt->u4_num_layers; |
134 | | |
135 | | /* temp var */ |
136 | | // UWORD8 u1_prev_jump = NONE; |
137 | | // UWORD8 u1_curr_jump = NONE; |
138 | | // UWORD8 u1_next_jump; |
139 | | // WORD32 mask_arr[5] = {15, 13, 14, 7, 11}; |
140 | | // WORD32 mask; |
141 | | // UWORD8 *apu1_ref[4]; |
142 | | // WORD32 i, cnt; |
143 | | // WORD32 dia[4][2] = {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}; |
144 | | |
145 | | /* mv with best sad during initial evaluation */ |
146 | 259k | i2_mvx = ps_mb_part->s_mv_curr.i2_mvx; |
147 | 259k | i2_mvy = ps_mb_part->s_mv_curr.i2_mvy; |
148 | | |
149 | 259k | i2_mv_u_x = i2_mvx; |
150 | 259k | i2_mv_u_y = i2_mvy; |
151 | | |
152 | 905k | while (u4_num_layers) |
153 | 892k | { |
154 | | /* FIXME : is this the write way to check for out of bounds ? */ |
155 | 892k | if ( (i2_mvx - 1 < i4_srch_range_w) || |
156 | 892k | (i2_mvx + 1 > i4_srch_range_e) || |
157 | 892k | (i2_mvy - 1 < i4_srch_range_n) || |
158 | 892k | (i2_mvy + 1 > i4_srch_range_s) ) |
159 | 40.1k | { |
160 | 40.1k | break; |
161 | 40.1k | } |
162 | | |
163 | 852k | pu1_ref = pu1_ref_mb + i2_mvx + (i2_mvy * i4_ref_strd); |
164 | | |
165 | 852k | ps_me_ctxt->pf_ime_compute_sad4_diamond(pu1_ref, |
166 | 852k | pu1_curr_mb, |
167 | 852k | i4_ref_strd, |
168 | 852k | i4_src_strd, |
169 | 852k | i4_sad); |
170 | | |
171 | 852k | DEBUG_SAD_HISTOGRAM_ADD(i4_sad[0], 2); |
172 | 852k | DEBUG_SAD_HISTOGRAM_ADD(i4_sad[1], 2); |
173 | 852k | DEBUG_SAD_HISTOGRAM_ADD(i4_sad[2], 2); |
174 | 852k | DEBUG_SAD_HISTOGRAM_ADD(i4_sad[3], 2); |
175 | | |
176 | | /* compute cost */ |
177 | 852k | i4_cost[0] = i4_sad[0] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
178 | 852k | + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] )); |
179 | 852k | i4_cost[1] = i4_sad[1] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ ((i2_mvx + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
180 | 852k | + pu1_mv_bits[(i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] )); |
181 | 852k | i4_cost[2] = i4_sad[2] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
182 | 852k | + pu1_mv_bits[((i2_mvy - 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] )); |
183 | 852k | i4_cost[3] = i4_sad[3] + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
184 | 852k | + pu1_mv_bits[((i2_mvy + 1) << 2) - ps_mb_part->s_mv_pred.i2_mvy] )); |
185 | | |
186 | | |
187 | 852k | if (i4_cost_least > i4_cost[0]) |
188 | 273k | { |
189 | 273k | i4_cost_least = i4_cost[0]; |
190 | 273k | i4_distortion_least = i4_sad[0]; |
191 | | |
192 | 273k | i2_mv_u_x = (i2_mvx - 1); |
193 | 273k | i2_mv_u_y = i2_mvy; |
194 | 273k | } |
195 | | |
196 | 852k | if (i4_cost_least > i4_cost[1]) |
197 | 213k | { |
198 | 213k | i4_cost_least = i4_cost[1]; |
199 | 213k | i4_distortion_least = i4_sad[1]; |
200 | | |
201 | 213k | i2_mv_u_x = (i2_mvx + 1); |
202 | 213k | i2_mv_u_y = i2_mvy; |
203 | 213k | } |
204 | | |
205 | 852k | if (i4_cost_least > i4_cost[2]) |
206 | 196k | { |
207 | 196k | i4_cost_least = i4_cost[2]; |
208 | 196k | i4_distortion_least = i4_sad[2]; |
209 | | |
210 | 196k | i2_mv_u_x = i2_mvx; |
211 | 196k | i2_mv_u_y = i2_mvy - 1; |
212 | 196k | } |
213 | | |
214 | 852k | if (i4_cost_least > i4_cost[3]) |
215 | 120k | { |
216 | 120k | i4_cost_least = i4_cost[3]; |
217 | 120k | i4_distortion_least = i4_sad[3]; |
218 | | |
219 | 120k | i2_mv_u_x = i2_mvx; |
220 | 120k | i2_mv_u_y = i2_mvy + 1; |
221 | 120k | } |
222 | | |
223 | 852k | if( (i2_mv_u_x == i2_mvx) && (i2_mv_u_y == i2_mvy)) |
224 | 206k | { |
225 | 206k | ps_mb_part->u4_exit = 1; |
226 | 206k | break; |
227 | 206k | } |
228 | 645k | else |
229 | 645k | { |
230 | 645k | i2_mvx = i2_mv_u_x; |
231 | 645k | i2_mvy = i2_mv_u_y; |
232 | 645k | } |
233 | 645k | u4_num_layers--; |
234 | 645k | } |
235 | | |
236 | 259k | if (i4_cost_least < ps_mb_part->i4_mb_cost) |
237 | 126k | { |
238 | 126k | ps_mb_part->i4_mb_cost = i4_cost_least; |
239 | 126k | ps_mb_part->i4_mb_distortion = i4_distortion_least; |
240 | 126k | ps_mb_part->s_mv_curr.i2_mvx = i2_mvx; |
241 | 126k | ps_mb_part->s_mv_curr.i2_mvy = i2_mvy; |
242 | 126k | } |
243 | | |
244 | 259k | } |
245 | | |
246 | | |
247 | | /** |
248 | | ******************************************************************************* |
249 | | * |
250 | | * @brief This function computes the best motion vector among the tentative mv |
251 | | * candidates chosen. |
252 | | * |
253 | | * @par Description: |
254 | | * This function determines the position in the search window at which the motion |
255 | | * estimation should begin in order to minimise the number of search iterations. |
256 | | * |
257 | | * @param[in] ps_me_ctxt |
258 | | * pointer to me context |
259 | | * |
260 | | * @param[in] i4_reflist |
261 | | * ref list |
262 | | * |
263 | | * @returns mv pair & corresponding distortion and cost |
264 | | * |
265 | | * @remarks none |
266 | | * |
267 | | ******************************************************************************* |
268 | | */ |
269 | | |
270 | | void ime_evaluate_init_srchposn_16x16 |
271 | | ( |
272 | | me_ctxt_t *ps_me_ctxt, |
273 | | WORD32 i4_reflist |
274 | | ) |
275 | 259k | { |
276 | 259k | UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; |
277 | | |
278 | | /* candidate mv cnt */ |
279 | 259k | UWORD32 u4_num_candidates = ps_me_ctxt->u4_num_candidates[i4_reflist]; |
280 | | |
281 | | /* list of candidate mvs */ |
282 | 259k | ime_mv_t *ps_mv_list = ps_me_ctxt->as_mv_init_search[i4_reflist]; |
283 | | |
284 | | /* pointer to src macro block */ |
285 | 259k | UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; |
286 | 259k | UWORD8 *pu1_ref_mb = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist]; |
287 | | |
288 | | /* strides */ |
289 | 259k | WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; |
290 | 259k | WORD32 i4_ref_strd = ps_me_ctxt->i4_rec_strd; |
291 | | |
292 | | /* enabled fast sad computation */ |
293 | 259k | UWORD32 u4_enable_fast_sad = ps_me_ctxt->u4_enable_fast_sad; |
294 | | |
295 | | /* SAD(distortion metric) of an 8x8 block */ |
296 | 259k | WORD32 i4_mb_distortion; |
297 | | |
298 | | /* cost = distortion + u4_lambda_motion * rate */ |
299 | 259k | WORD32 i4_mb_cost, i4_mb_cost_least = INT_MAX, i4_distortion_least = INT_MAX; |
300 | | |
301 | | /* mb partitions info */ |
302 | 259k | mb_part_ctxt *ps_mb_part = &(ps_me_ctxt->as_mb_part[i4_reflist]); |
303 | | |
304 | | /* mv bits */ |
305 | 259k | UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; |
306 | | |
307 | | /* temp var */ |
308 | 259k | UWORD32 i, j; |
309 | 259k | WORD32 i4_srch_pos_idx = 0; |
310 | 259k | UWORD8 *pu1_ref = NULL; |
311 | | |
312 | | /* Carry out a search using each of the motion vector pairs identified above as predictors. */ |
313 | | /* TODO : Just like Skip, Do we need to add any bias to zero mv as well */ |
314 | 1.27M | for(i = 0; i < u4_num_candidates; i++) |
315 | 1.01M | { |
316 | | /* compute sad */ |
317 | 1.01M | WORD32 c_sad = 1; |
318 | | |
319 | 1.63M | for(j = 0; j < i; j++ ) |
320 | 1.10M | { |
321 | 1.10M | if ( (ps_mv_list[i].i2_mvx == ps_mv_list[j].i2_mvx) && |
322 | 1.10M | (ps_mv_list[i].i2_mvy == ps_mv_list[j].i2_mvy) ) |
323 | 481k | { |
324 | 481k | c_sad = 0; |
325 | 481k | break; |
326 | 481k | } |
327 | 1.10M | } |
328 | 1.01M | if(c_sad) |
329 | 529k | { |
330 | | /* adjust ref pointer */ |
331 | 529k | pu1_ref = pu1_ref_mb + ps_mv_list[i].i2_mvx + (ps_mv_list[i].i2_mvy * i4_ref_strd); |
332 | | |
333 | | /* compute distortion */ |
334 | 529k | ps_me_ctxt->pf_ime_compute_sad_16x16[u4_enable_fast_sad](pu1_curr_mb, pu1_ref, i4_src_strd, i4_ref_strd, i4_mb_cost_least, &i4_mb_distortion); |
335 | | |
336 | 529k | DEBUG_SAD_HISTOGRAM_ADD(i4_mb_distortion, 3); |
337 | | |
338 | | /* compute cost */ |
339 | 529k | i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ (ps_mv_list[i].i2_mvx << 2) - ps_mb_part->s_mv_pred.i2_mvx] |
340 | 529k | + pu1_mv_bits[(ps_mv_list[i].i2_mvy << 2) - ps_mb_part->s_mv_pred.i2_mvy] )); |
341 | | |
342 | 529k | if (i4_mb_cost < i4_mb_cost_least) |
343 | 390k | { |
344 | 390k | i4_mb_cost_least = i4_mb_cost; |
345 | | |
346 | 390k | i4_distortion_least = i4_mb_distortion; |
347 | | |
348 | 390k | i4_srch_pos_idx = i; |
349 | 390k | } |
350 | 529k | } |
351 | 1.01M | } |
352 | | |
353 | 259k | if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) |
354 | 259k | { |
355 | 259k | ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; |
356 | 259k | ps_mb_part->i4_mb_cost = i4_mb_cost_least; |
357 | 259k | ps_mb_part->i4_mb_distortion = i4_distortion_least; |
358 | 259k | ps_mb_part->s_mv_curr.i2_mvx = ps_mv_list[i4_srch_pos_idx].i2_mvx; |
359 | 259k | ps_mb_part->s_mv_curr.i2_mvy = ps_mv_list[i4_srch_pos_idx].i2_mvy; |
360 | 259k | } |
361 | 259k | } |
362 | | |
363 | | /** |
364 | | ******************************************************************************* |
365 | | * |
366 | | * @brief Searches for the best matching full pixel predictor within the search |
367 | | * range |
368 | | * |
369 | | * @par Description: |
370 | | * For a given algorithm (diamond, Hex, nStep, ...) chosen, it searches for the |
371 | | * best matching full pixel predictor within the search range |
372 | | * |
373 | | * @param[in] ps_me_ctxt |
374 | | * pointer to me context |
375 | | * |
376 | | * @param[in] i4_reflist |
377 | | * ref list |
378 | | * |
379 | | * @returns mv pair & corresponding distortion and cost |
380 | | * |
381 | | * @remarks none |
382 | | * |
383 | | ******************************************************************************* |
384 | | */ |
385 | | void ime_full_pel_motion_estimation_16x16 |
386 | | ( |
387 | | me_ctxt_t *ps_me_ctxt, |
388 | | WORD32 i4_ref_list |
389 | | ) |
390 | 259k | { |
391 | | /* mb part info */ |
392 | 259k | mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_ref_list]; |
393 | | |
394 | | /******************************************************************/ |
395 | | /* Modify Search range about initial candidate instead of zero mv */ |
396 | | /******************************************************************/ |
397 | | /* |
398 | | * FIXME: The motion vectors in a way can become unbounded. It may so happen that |
399 | | * MV might exceed the limit of the profile configured. |
400 | | */ |
401 | 259k | ps_me_ctxt->i4_srch_range_w = MAX(ps_me_ctxt->i4_srch_range_w, |
402 | 259k | -ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); |
403 | 259k | ps_me_ctxt->i4_srch_range_e = MIN(ps_me_ctxt->i4_srch_range_e, |
404 | 259k | ps_me_ctxt->ai2_srch_boundaries[0] + ps_mb_part->s_mv_curr.i2_mvx); |
405 | 259k | ps_me_ctxt->i4_srch_range_n = MAX(ps_me_ctxt->i4_srch_range_n, |
406 | 259k | -ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); |
407 | 259k | ps_me_ctxt->i4_srch_range_s = MIN(ps_me_ctxt->i4_srch_range_s, |
408 | 259k | ps_me_ctxt->ai2_srch_boundaries[1] + ps_mb_part->s_mv_curr.i2_mvy); |
409 | | |
410 | | /************************************************************/ |
411 | | /* Traverse about best initial candidate for mv */ |
412 | | /************************************************************/ |
413 | | |
414 | 259k | switch (ps_me_ctxt->u4_me_speed_preset) |
415 | 259k | { |
416 | 259k | case DMND_SRCH: |
417 | 259k | ime_diamond_search_16x16(ps_me_ctxt, i4_ref_list); |
418 | 259k | break; |
419 | 0 | default: |
420 | 0 | assert(0); |
421 | 0 | break; |
422 | 259k | } |
423 | 259k | } |
424 | | |
425 | | /** |
426 | | ******************************************************************************* |
427 | | * |
428 | | * @brief Searches for the best matching sub pixel predictor within the search |
429 | | * range |
430 | | * |
431 | | * @par Description: |
432 | | * This function begins by searching across all sub pixel sample points |
433 | | * around the full pel motion vector. The vector with least cost is chosen as |
434 | | * the mv for the current mb. |
435 | | * |
436 | | * @param[in] ps_me_ctxt |
437 | | * pointer to me context |
438 | | * |
439 | | * @param[in] i4_reflist |
440 | | * ref list |
441 | | * |
442 | | * @returns mv pair & corresponding distortion and cost |
443 | | * |
444 | | * @remarks none |
445 | | * |
446 | | ******************************************************************************* |
447 | | */ |
448 | | void ime_sub_pel_motion_estimation_16x16 |
449 | | ( |
450 | | me_ctxt_t *ps_me_ctxt, |
451 | | WORD32 i4_reflist |
452 | | ) |
453 | 179k | { |
454 | | /* pointers to src & ref macro block */ |
455 | 179k | UWORD8 *pu1_curr_mb = ps_me_ctxt->pu1_src_buf_luma; |
456 | | |
457 | | /* pointers to ref. half pel planes */ |
458 | 179k | UWORD8 *pu1_ref_mb_half_x; |
459 | 179k | UWORD8 *pu1_ref_mb_half_y; |
460 | 179k | UWORD8 *pu1_ref_mb_half_xy; |
461 | | |
462 | | /* pointers to ref. half pel planes */ |
463 | 179k | UWORD8 *pu1_ref_mb_half_x_temp; |
464 | 179k | UWORD8 *pu1_ref_mb_half_y_temp; |
465 | 179k | UWORD8 *pu1_ref_mb_half_xy_temp; |
466 | | |
467 | | /* strides */ |
468 | 179k | WORD32 i4_src_strd = ps_me_ctxt->i4_src_strd; |
469 | | |
470 | 179k | WORD32 i4_ref_strd = ps_me_ctxt->u4_subpel_buf_strd; |
471 | | |
472 | | /* mb partitions info */ |
473 | 179k | mb_part_ctxt *ps_mb_part = &ps_me_ctxt->as_mb_part[i4_reflist]; |
474 | | |
475 | | /* SAD(distortion metric) of an mb */ |
476 | 179k | WORD32 i4_mb_distortion; |
477 | 179k | WORD32 i4_distortion_least = ps_mb_part->i4_mb_distortion; |
478 | | |
479 | | /* cost = distortion + u4_lambda_motion * rate */ |
480 | 179k | WORD32 i4_mb_cost; |
481 | 179k | WORD32 i4_mb_cost_least = ps_mb_part->i4_mb_cost; |
482 | | |
483 | | /*Best half pel buffer*/ |
484 | 179k | UWORD8 *pu1_best_hpel_buf = NULL; |
485 | | |
486 | | /* mv bits */ |
487 | 179k | UWORD8 *pu1_mv_bits = ps_me_ctxt->pu1_mv_bits; |
488 | | |
489 | | /* Motion vectors in full-pel units */ |
490 | 179k | WORD16 mv_x, mv_y; |
491 | | |
492 | | /* lambda - lagrange constant */ |
493 | 179k | UWORD32 u4_lambda_motion = ps_me_ctxt->u4_lambda_motion; |
494 | | |
495 | | /* Flags to check if half pel points needs to be evaluated */ |
496 | | /**************************************/ |
497 | | /* 1 bit for each half pel candidate */ |
498 | | /* bit 0 - half x = 1, half y = 0 */ |
499 | | /* bit 1 - half x = -1, half y = 0 */ |
500 | | /* bit 2 - half x = 0, half y = 1 */ |
501 | | /* bit 3 - half x = 0, half y = -1 */ |
502 | | /* bit 4 - half x = 1, half y = 1 */ |
503 | | /* bit 5 - half x = -1, half y = 1 */ |
504 | | /* bit 6 - half x = 1, half y = -1 */ |
505 | | /* bit 7 - half x = -1, half y = -1 */ |
506 | | /**************************************/ |
507 | | /* temp var */ |
508 | 179k | WORD16 i2_mv_u_x, i2_mv_u_y; |
509 | 179k | WORD32 i, j; |
510 | 179k | WORD32 ai4_sad[8]; |
511 | | |
512 | 179k | WORD32 i4_srch_pos_idx = ps_mb_part->i4_srch_pos_idx; |
513 | | |
514 | 179k | i2_mv_u_x = ps_mb_part->s_mv_curr.i2_mvx; |
515 | 179k | i2_mv_u_y = ps_mb_part->s_mv_curr.i2_mvy; |
516 | | |
517 | | /************************************************************/ |
518 | | /* Evaluate half pel */ |
519 | | /************************************************************/ |
520 | 179k | mv_x = ps_mb_part->s_mv_curr.i2_mvx >> 2; |
521 | 179k | mv_y = ps_mb_part->s_mv_curr.i2_mvy >> 2; |
522 | | |
523 | | |
524 | | /**************************************************************/ |
525 | | /* ps_me_ctxt->pu1_half_x points to the half pel pixel on the */ |
526 | | /* left side of full pel */ |
527 | | /* ps_me_ctxt->pu1_half_y points to the half pel pixel on the */ |
528 | | /* top side of full pel */ |
529 | | /* ps_me_ctxt->pu1_half_xy points to the half pel pixel */ |
530 | | /* on the top left side of full pel */ |
531 | | /* for the function pf_ime_sub_pel_compute_sad_16x16 the */ |
532 | | /* default postions are */ |
533 | | /* ps_me_ctxt->pu1_half_x = right halp_pel */ |
534 | | /* ps_me_ctxt->pu1_half_y = bottom halp_pel */ |
535 | | /* ps_me_ctxt->pu1_half_xy = bottom right halp_pel */ |
536 | | /* Hence corresponding adjustments made here */ |
537 | | /**************************************************************/ |
538 | | |
539 | 179k | pu1_ref_mb_half_x_temp = pu1_ref_mb_half_x = ps_me_ctxt->apu1_subpel_buffs[0] + 1; |
540 | 179k | pu1_ref_mb_half_y_temp = pu1_ref_mb_half_y = ps_me_ctxt->apu1_subpel_buffs[1] + 1 + i4_ref_strd; |
541 | 179k | pu1_ref_mb_half_xy_temp = pu1_ref_mb_half_xy = ps_me_ctxt->apu1_subpel_buffs[2] + 1 + i4_ref_strd; |
542 | | |
543 | 179k | ps_me_ctxt->pf_ime_sub_pel_compute_sad_16x16(pu1_curr_mb, pu1_ref_mb_half_x, |
544 | 179k | pu1_ref_mb_half_y, |
545 | 179k | pu1_ref_mb_half_xy, |
546 | 179k | i4_src_strd, i4_ref_strd, |
547 | 179k | ai4_sad); |
548 | | |
549 | | /* Half x plane */ |
550 | 538k | for(i = 0; i < 2; i++) |
551 | 358k | { |
552 | 358k | WORD32 mv_x_tmp = (mv_x << 2) + 2; |
553 | 358k | WORD32 mv_y_tmp = (mv_y << 2); |
554 | | |
555 | 358k | mv_x_tmp -= (i * 4); |
556 | | |
557 | 358k | i4_mb_distortion = ai4_sad[i]; |
558 | | |
559 | | /* compute cost */ |
560 | 358k | i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] |
561 | 358k | + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] )); |
562 | | |
563 | 358k | if (i4_mb_cost < i4_mb_cost_least) |
564 | 69.9k | { |
565 | 69.9k | i4_mb_cost_least = i4_mb_cost; |
566 | | |
567 | 69.9k | i4_distortion_least = i4_mb_distortion; |
568 | | |
569 | 69.9k | i2_mv_u_x = mv_x_tmp; |
570 | | |
571 | 69.9k | i2_mv_u_y = mv_y_tmp; |
572 | | |
573 | 69.9k | #ifndef HP_PL /*choosing whether left or right half_x*/ |
574 | 69.9k | ps_me_ctxt->apu1_subpel_buffs[0] = pu1_ref_mb_half_x_temp - i; |
575 | 69.9k | pu1_best_hpel_buf = pu1_ref_mb_half_x_temp - i; |
576 | | |
577 | 69.9k | i4_srch_pos_idx = 0; |
578 | 69.9k | #endif |
579 | 69.9k | } |
580 | | |
581 | 358k | } |
582 | | |
583 | | /* Half y plane */ |
584 | 538k | for(i = 0; i < 2; i++) |
585 | 358k | { |
586 | 358k | WORD32 mv_x_tmp = (mv_x << 2); |
587 | 358k | WORD32 mv_y_tmp = (mv_y << 2) + 2; |
588 | | |
589 | 358k | mv_y_tmp -= (i * 4); |
590 | | |
591 | 358k | i4_mb_distortion = ai4_sad[2 + i]; |
592 | | |
593 | | /* compute cost */ |
594 | 358k | i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] |
595 | 358k | + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] )); |
596 | | |
597 | 358k | if (i4_mb_cost < i4_mb_cost_least) |
598 | 49.0k | { |
599 | 49.0k | i4_mb_cost_least = i4_mb_cost; |
600 | | |
601 | 49.0k | i4_distortion_least = i4_mb_distortion; |
602 | | |
603 | 49.0k | i2_mv_u_x = mv_x_tmp; |
604 | | |
605 | 49.0k | i2_mv_u_y = mv_y_tmp; |
606 | | |
607 | 49.0k | #ifndef HP_PL/*choosing whether top or bottom half_y*/ |
608 | 49.0k | ps_me_ctxt->apu1_subpel_buffs[1] = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); |
609 | 49.0k | pu1_best_hpel_buf = pu1_ref_mb_half_y_temp - i*(i4_ref_strd); |
610 | | |
611 | 49.0k | i4_srch_pos_idx = 1; |
612 | 49.0k | #endif |
613 | 49.0k | } |
614 | | |
615 | 358k | } |
616 | | |
617 | | /* Half xy plane */ |
618 | 538k | for(j = 0; j < 2; j++) |
619 | 358k | { |
620 | 1.07M | for(i = 0; i < 2; i++) |
621 | 717k | { |
622 | 717k | WORD32 mv_x_tmp = (mv_x << 2) + 2; |
623 | 717k | WORD32 mv_y_tmp = (mv_y << 2) + 2; |
624 | | |
625 | 717k | mv_x_tmp -= (i * 4); |
626 | 717k | mv_y_tmp -= (j * 4); |
627 | | |
628 | 717k | i4_mb_distortion = ai4_sad[4 + i + 2 * j]; |
629 | | |
630 | | /* compute cost */ |
631 | 717k | i4_mb_cost = i4_mb_distortion + (WORD32)(u4_lambda_motion * ( pu1_mv_bits[ mv_x_tmp - ps_mb_part->s_mv_pred.i2_mvx] |
632 | 717k | + pu1_mv_bits[mv_y_tmp - ps_mb_part->s_mv_pred.i2_mvy] )); |
633 | | |
634 | 717k | if (i4_mb_cost < i4_mb_cost_least) |
635 | 39.3k | { |
636 | 39.3k | i4_mb_cost_least = i4_mb_cost; |
637 | | |
638 | 39.3k | i4_distortion_least = i4_mb_distortion; |
639 | | |
640 | 39.3k | i2_mv_u_x = mv_x_tmp; |
641 | | |
642 | 39.3k | i2_mv_u_y = mv_y_tmp; |
643 | | |
644 | 39.3k | #ifndef HP_PL /*choosing between four half_xy */ |
645 | 39.3k | ps_me_ctxt->apu1_subpel_buffs[2] = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; |
646 | 39.3k | pu1_best_hpel_buf = pu1_ref_mb_half_xy_temp - j*(i4_ref_strd) - i; |
647 | | |
648 | 39.3k | i4_srch_pos_idx = 2; |
649 | 39.3k | #endif |
650 | 39.3k | } |
651 | | |
652 | 717k | } |
653 | 358k | } |
654 | | |
655 | 179k | if (i4_mb_cost_least < ps_mb_part->i4_mb_cost) |
656 | 87.7k | { |
657 | 87.7k | ps_mb_part->i4_mb_cost = i4_mb_cost_least; |
658 | 87.7k | ps_mb_part->i4_mb_distortion = i4_distortion_least; |
659 | 87.7k | ps_mb_part->s_mv_curr.i2_mvx = i2_mv_u_x; |
660 | 87.7k | ps_mb_part->s_mv_curr.i2_mvy = i2_mv_u_y; |
661 | 87.7k | ps_mb_part->pu1_best_hpel_buf = pu1_best_hpel_buf; |
662 | 87.7k | ps_mb_part->i4_srch_pos_idx = i4_srch_pos_idx; |
663 | 87.7k | } |
664 | 179k | } |
665 | | |
666 | | /** |
667 | | ******************************************************************************* |
668 | | * |
669 | | * @brief This function computes cost of skip macroblocks |
670 | | * |
671 | | * @par Description: |
672 | | * |
673 | | * @param[in] ps_me_ctxt |
674 | | * pointer to me ctxt |
675 | | * |
676 | | * |
677 | | * @returns none |
678 | | * |
679 | | * @remarks |
680 | | * NOTE: while computing the skip cost, do not enable early exit from compute |
681 | | * sad function because, a negative bias gets added later |
682 | | * Note that the last ME candidate in me ctxt is taken as skip motion vector |
683 | | * |
684 | | ******************************************************************************* |
685 | | */ |
686 | | void ime_compute_skip_cost |
687 | | ( |
688 | | me_ctxt_t *ps_me_ctxt, |
689 | | ime_mv_t *ps_skip_mv, |
690 | | mb_part_ctxt *ps_smb_part_info, |
691 | | UWORD32 u4_use_stat_sad, |
692 | | WORD32 i4_reflist, |
693 | | WORD32 i4_is_slice_type_b |
694 | | ) |
695 | 151k | { |
696 | | |
697 | | /* SAD(distortion metric) of an mb */ |
698 | 151k | WORD32 i4_mb_distortion; |
699 | | |
700 | | /* cost = distortion + u4_lambda_motion * rate */ |
701 | 151k | WORD32 i4_mb_cost; |
702 | | |
703 | | /* temp var */ |
704 | 151k | UWORD8 *pu1_ref = NULL; |
705 | | |
706 | 151k | ime_mv_t s_skip_mv; |
707 | | |
708 | 151k | s_skip_mv.i2_mvx = (ps_skip_mv->i2_mvx +2)>>2; |
709 | 151k | s_skip_mv.i2_mvy = (ps_skip_mv->i2_mvy +2)>>2; |
710 | | |
711 | | /* Check if the skip mv is out of bounds or subpel */ |
712 | 151k | { |
713 | | /* skip mv */ |
714 | 151k | ime_mv_t s_clip_skip_mv; |
715 | | |
716 | 151k | s_clip_skip_mv.i2_mvx = CLIP3(ps_me_ctxt->i4_srch_range_w, ps_me_ctxt->i4_srch_range_e, s_skip_mv.i2_mvx); |
717 | 151k | s_clip_skip_mv.i2_mvy = CLIP3(ps_me_ctxt->i4_srch_range_n, ps_me_ctxt->i4_srch_range_s, s_skip_mv.i2_mvy); |
718 | | |
719 | 151k | if ((s_clip_skip_mv.i2_mvx != s_skip_mv.i2_mvx) || |
720 | 151k | (s_clip_skip_mv.i2_mvy != s_skip_mv.i2_mvy) || |
721 | 151k | (ps_skip_mv->i2_mvx & 0x3) || |
722 | 151k | (ps_skip_mv->i2_mvy & 0x3)) |
723 | 17.4k | { |
724 | 17.4k | return ; |
725 | 17.4k | } |
726 | 151k | } |
727 | | |
728 | | |
729 | | /* adjust ref pointer */ |
730 | 134k | pu1_ref = ps_me_ctxt->apu1_ref_buf_luma[i4_reflist] + s_skip_mv.i2_mvx |
731 | 134k | + (s_skip_mv.i2_mvy * ps_me_ctxt->i4_rec_strd); |
732 | | |
733 | 134k | if(u4_use_stat_sad == 1) |
734 | 134k | { |
735 | 134k | UWORD32 u4_is_nonzero; |
736 | | |
737 | 134k | ps_me_ctxt->pf_ime_compute_sad_stat_luma_16x16( |
738 | 134k | ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, |
739 | 134k | ps_me_ctxt->i4_rec_strd, ps_me_ctxt->pu2_sad_thrsh, |
740 | 134k | &i4_mb_distortion, &u4_is_nonzero); |
741 | | |
742 | 134k | if (u4_is_nonzero == 0 || i4_mb_distortion <= ps_me_ctxt->i4_min_sad) |
743 | 23.7k | { |
744 | 23.7k | ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ |
745 | 23.7k | ps_me_ctxt->i4_min_sad = (u4_is_nonzero == 0) ? 0 : i4_mb_distortion; |
746 | 23.7k | } |
747 | 134k | } |
748 | 18.4E | else |
749 | 18.4E | { |
750 | 18.4E | ps_me_ctxt->pf_ime_compute_sad_16x16[ps_me_ctxt->u4_enable_fast_sad]( |
751 | 18.4E | ps_me_ctxt->pu1_src_buf_luma, pu1_ref, ps_me_ctxt->i4_src_strd, |
752 | 18.4E | ps_me_ctxt->i4_rec_strd, INT_MAX, &i4_mb_distortion); |
753 | | |
754 | 18.4E | if(i4_mb_distortion <= ps_me_ctxt->i4_min_sad) |
755 | 0 | { |
756 | 0 | ps_me_ctxt->i4_min_sad = i4_mb_distortion; |
757 | 0 | ps_me_ctxt->u4_min_sad_reached = 1; /* found min sad */ |
758 | 0 | } |
759 | 18.4E | } |
760 | | |
761 | | |
762 | | /* for skip mode cost & distortion are identical |
763 | | * But we shall add a bias to favor skip mode. |
764 | | * Doc. JVT B118 Suggests SKIP_BIAS as 16. |
765 | | * TODO : Empirical analysis of SKIP_BIAS is necessary */ |
766 | | |
767 | 134k | i4_mb_cost = i4_mb_distortion - (ps_me_ctxt->u4_lambda_motion * (ps_me_ctxt->i4_skip_bias[0] + ps_me_ctxt->i4_skip_bias[1] * i4_is_slice_type_b)); |
768 | | |
769 | 134k | if (i4_mb_cost <= ps_smb_part_info->i4_mb_cost) |
770 | 134k | { |
771 | 134k | ps_smb_part_info->i4_mb_cost = i4_mb_cost; |
772 | 134k | ps_smb_part_info->i4_mb_distortion = i4_mb_distortion; |
773 | 134k | ps_smb_part_info->s_mv_curr.i2_mvx = s_skip_mv.i2_mvx; |
774 | 134k | ps_smb_part_info->s_mv_curr.i2_mvy = s_skip_mv.i2_mvy; |
775 | 134k | } |
776 | 134k | } |
777 | | |