/src/libvpx/vp9/encoder/vp9_mcomp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include <assert.h> |
12 | | #include <limits.h> |
13 | | #include <math.h> |
14 | | #include <stdio.h> |
15 | | |
16 | | #include "./vpx_config.h" |
17 | | #include "./vpx_dsp_rtcd.h" |
18 | | |
19 | | #include "vpx_dsp/vpx_dsp_common.h" |
20 | | #include "vpx_mem/vpx_mem.h" |
21 | | #include "vpx_ports/mem.h" |
22 | | |
23 | | #include "vp9/common/vp9_common.h" |
24 | | #include "vp9/common/vp9_mvref_common.h" |
25 | | #include "vp9/common/vp9_reconinter.h" |
26 | | |
27 | | #include "vp9/encoder/vp9_encoder.h" |
28 | | #include "vp9/encoder/vp9_mcomp.h" |
29 | | |
30 | | // #define NEW_DIAMOND_SEARCH |
31 | | |
32 | 23.3M | void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { |
33 | 23.3M | int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); |
34 | 23.3M | int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); |
35 | 23.3M | int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; |
36 | 23.3M | int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; |
37 | | |
38 | 23.3M | col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1); |
39 | 23.3M | row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1); |
40 | 23.3M | col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1); |
41 | 23.3M | row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1); |
42 | | |
43 | | // Get intersection of UMV window and valid MV window to reduce # of checks |
44 | | // in diamond search. |
45 | 23.3M | if (mv_limits->col_min < col_min) mv_limits->col_min = col_min; |
46 | 23.3M | if (mv_limits->col_max > col_max) mv_limits->col_max = col_max; |
47 | 23.3M | if (mv_limits->row_min < row_min) mv_limits->row_min = row_min; |
48 | 23.3M | if (mv_limits->row_max > row_max) mv_limits->row_max = row_max; |
49 | 23.3M | } |
50 | | |
51 | | void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, |
52 | | const MvLimits *umv_window_limits, |
53 | 22.2M | const MV *ref_mv) { |
54 | 22.2M | subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8, |
55 | 22.2M | ref_mv->col - MAX_FULL_PEL_VAL * 8); |
56 | 22.2M | subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8, |
57 | 22.2M | ref_mv->col + MAX_FULL_PEL_VAL * 8); |
58 | 22.2M | subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8, |
59 | 22.2M | ref_mv->row - MAX_FULL_PEL_VAL * 8); |
60 | 22.2M | subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8, |
61 | 22.2M | ref_mv->row + MAX_FULL_PEL_VAL * 8); |
62 | | |
63 | 22.2M | subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min); |
64 | 22.2M | subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max); |
65 | 22.2M | subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min); |
66 | 22.2M | subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max); |
67 | 22.2M | } |
68 | | |
69 | 23.0M | int vp9_init_search_range(int size) { |
70 | 23.0M | int sr = 0; |
71 | | // Minimum search size no matter what the passed in value. |
72 | 23.0M | size = VPXMAX(16, size); |
73 | | |
74 | 152M | while ((size << sr) < MAX_FULL_PEL_VAL) sr++; |
75 | | |
76 | 23.0M | sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2); |
77 | 23.0M | return sr; |
78 | 23.0M | } |
79 | | |
80 | | int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, |
81 | 28.6M | int *mvcost[2], int weight) { |
82 | 28.6M | const MV diff = { mv->row - ref->row, mv->col - ref->col }; |
83 | 28.6M | return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); |
84 | 28.6M | } |
85 | | |
86 | | #define PIXEL_TRANSFORM_ERROR_SCALE 4 |
87 | | static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, |
88 | 392M | int *mvcost[2], int error_per_bit) { |
89 | 392M | if (mvcost) { |
90 | 392M | const MV diff = { mv->row - ref->row, mv->col - ref->col }; |
91 | 392M | return (int)ROUND64_POWER_OF_TWO( |
92 | 392M | (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, |
93 | 392M | RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + |
94 | 392M | PIXEL_TRANSFORM_ERROR_SCALE); |
95 | 392M | } |
96 | 0 | return 0; |
97 | 392M | } |
98 | 0 | void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { |
99 | 0 | int len; |
100 | 0 | int ss_count = 0; |
101 | |
|
102 | 0 | for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
103 | | // Generate offsets for 4 search sites per step. |
104 | 0 | const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } }; |
105 | 0 | int i; |
106 | 0 | for (i = 0; i < 4; ++i, ++ss_count) { |
107 | 0 | cfg->ss_mv[ss_count] = ss_mvs[i]; |
108 | 0 | cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; |
109 | 0 | } |
110 | 0 | } |
111 | |
|
112 | 0 | cfg->searches_per_step = 4; |
113 | 0 | cfg->total_steps = ss_count / cfg->searches_per_step; |
114 | 0 | } |
115 | | |
116 | 84.5k | void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { |
117 | 84.5k | int len; |
118 | 84.5k | int ss_count = 0; |
119 | | |
120 | 1.01M | for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
121 | | // Generate offsets for 8 search sites per step. |
122 | 930k | const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len }, |
123 | 930k | { 0, len }, { -len, -len }, { -len, len }, |
124 | 930k | { len, -len }, { len, len } }; |
125 | 930k | int i; |
126 | 8.37M | for (i = 0; i < 8; ++i, ++ss_count) { |
127 | 7.44M | cfg->ss_mv[ss_count] = ss_mvs[i]; |
128 | 7.44M | cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; |
129 | 7.44M | } |
130 | 930k | } |
131 | | |
132 | 84.5k | cfg->searches_per_step = 8; |
133 | 84.5k | cfg->total_steps = ss_count / cfg->searches_per_step; |
134 | 84.5k | } |
135 | | |
136 | | // convert motion vector component to offset for sv[a]f calc |
137 | 356M | static INLINE int sp(int x) { return x & 7; } |
138 | | |
139 | 21.7M | static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { |
140 | 21.7M | return &buf[(r >> 3) * stride + (c >> 3)]; |
141 | 21.7M | } |
142 | | |
143 | | #if CONFIG_VP9_HIGHBITDEPTH |
144 | | /* checks if (r, c) has better score than previous best */ |
145 | | #define CHECK_BETTER(v, r, c) \ |
146 | 22.1M | do { \ |
147 | 22.1M | if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ |
148 | 21.7M | int64_t tmpmse; \ |
149 | 21.7M | const MV cb_mv = { r, c }; \ |
150 | 21.7M | const MV cb_ref_mv = { rr, rc }; \ |
151 | 21.7M | if (second_pred == NULL) { \ |
152 | 21.7M | thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ |
153 | 21.7M | src_stride, &sse); \ |
154 | 21.7M | } else { \ |
155 | 0 | thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ |
156 | 0 | src_stride, &sse, second_pred); \ |
157 | 0 | } \ |
158 | 21.7M | tmpmse = thismse; \ |
159 | 21.7M | tmpmse += \ |
160 | 21.7M | mv_err_cost(&cb_mv, &cb_ref_mv, mvjcost, mvcost, error_per_bit); \ |
161 | 21.7M | if (tmpmse >= INT_MAX) { \ |
162 | 0 | v = INT_MAX; \ |
163 | 21.7M | } else if ((v = (uint32_t)tmpmse) < besterr) { \ |
164 | 1.28M | besterr = v; \ |
165 | 1.28M | br = r; \ |
166 | 1.28M | bc = c; \ |
167 | 1.28M | *distortion = thismse; \ |
168 | 1.28M | *sse1 = sse; \ |
169 | 1.28M | } \ |
170 | 21.7M | } else { \ |
171 | 394k | v = INT_MAX; \ |
172 | 394k | } \ |
173 | 22.1M | } while (0) |
174 | | #else |
175 | | /* checks if (r, c) has better score than previous best */ |
176 | | #define CHECK_BETTER(v, r, c) \ |
177 | | do { \ |
178 | | if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ |
179 | | const MV cb_mv = { r, c }; \ |
180 | | const MV cb_ref_mv = { rr, rc }; \ |
181 | | if (second_pred == NULL) \ |
182 | | thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ |
183 | | src_stride, &sse); \ |
184 | | else \ |
185 | | thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ |
186 | | src_stride, &sse, second_pred); \ |
187 | | if ((v = mv_err_cost(&cb_mv, &cb_ref_mv, mvjcost, mvcost, \ |
188 | | error_per_bit) + \ |
189 | | thismse) < besterr) { \ |
190 | | besterr = v; \ |
191 | | br = r; \ |
192 | | bc = c; \ |
193 | | *distortion = thismse; \ |
194 | | *sse1 = sse; \ |
195 | | } \ |
196 | | } else { \ |
197 | | v = INT_MAX; \ |
198 | | } \ |
199 | | } while (0) |
200 | | |
201 | | #endif |
202 | | #define FIRST_LEVEL_CHECKS \ |
203 | 0 | do { \ |
204 | 0 | unsigned int left, right, up, down, diag; \ |
205 | 0 | CHECK_BETTER(left, tr, tc - hstep); \ |
206 | 0 | CHECK_BETTER(right, tr, tc + hstep); \ |
207 | 0 | CHECK_BETTER(up, tr - hstep, tc); \ |
208 | 0 | CHECK_BETTER(down, tr + hstep, tc); \ |
209 | 0 | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \ |
210 | 0 | switch (whichdir) { \ |
211 | 0 | case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \ |
212 | 0 | case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \ |
213 | 0 | case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \ |
214 | 0 | case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \ |
215 | 0 | } \ |
216 | 0 | } while (0) |
217 | | |
218 | | #define SECOND_LEVEL_CHECKS \ |
219 | 0 | do { \ |
220 | 0 | int kr, kc; \ |
221 | 0 | unsigned int second; \ |
222 | 0 | if (tr != br && tc != bc) { \ |
223 | 0 | kr = br - tr; \ |
224 | 0 | kc = bc - tc; \ |
225 | 0 | CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ |
226 | 0 | CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ |
227 | 0 | } else if (tr == br && tc != bc) { \ |
228 | 0 | kc = bc - tc; \ |
229 | 0 | CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ |
230 | 0 | CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ |
231 | 0 | switch (whichdir) { \ |
232 | 0 | case 0: \ |
233 | 0 | case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \ |
234 | 0 | case 2: \ |
235 | 0 | case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \ |
236 | 0 | } \ |
237 | 0 | } else if (tr != br && tc == bc) { \ |
238 | 0 | kr = br - tr; \ |
239 | 0 | CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ |
240 | 0 | CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ |
241 | 0 | switch (whichdir) { \ |
242 | 0 | case 0: \ |
243 | 0 | case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \ |
244 | 0 | case 1: \ |
245 | 0 | case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \ |
246 | 0 | } \ |
247 | 0 | } \ |
248 | 0 | } while (0) |
249 | | |
250 | | #define SETUP_SUBPEL_SEARCH \ |
251 | 0 | const uint8_t *const z = x->plane[0].src.buf; \ |
252 | 0 | const int src_stride = x->plane[0].src.stride; \ |
253 | 0 | const MACROBLOCKD *xd = &x->e_mbd; \ |
254 | 0 | unsigned int besterr = UINT_MAX; \ |
255 | 0 | unsigned int sse; \ |
256 | 0 | unsigned int whichdir; \ |
257 | 0 | int thismse; \ |
258 | 0 | const unsigned int halfiters = iters_per_step; \ |
259 | 0 | const unsigned int quarteriters = iters_per_step; \ |
260 | 0 | const unsigned int eighthiters = iters_per_step; \ |
261 | 0 | const int y_stride = xd->plane[0].pre[0].stride; \ |
262 | 0 | const int offset = bestmv->row * y_stride + bestmv->col; \ |
263 | 0 | const uint8_t *const y = xd->plane[0].pre[0].buf; \ |
264 | 0 | \ |
265 | 0 | int rr = ref_mv->row; \ |
266 | 0 | int rc = ref_mv->col; \ |
267 | 0 | int br = bestmv->row * 8; \ |
268 | 0 | int bc = bestmv->col * 8; \ |
269 | 0 | int hstep = 4; \ |
270 | 0 | int minc, maxc, minr, maxr; \ |
271 | 0 | int tr = br; \ |
272 | 0 | int tc = bc; \ |
273 | 0 | MvLimits subpel_mv_limits; \ |
274 | 0 | \ |
275 | 0 | vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \ |
276 | 0 | minc = subpel_mv_limits.col_min; \ |
277 | 0 | maxc = subpel_mv_limits.col_max; \ |
278 | 0 | minr = subpel_mv_limits.row_min; \ |
279 | 0 | maxr = subpel_mv_limits.row_max; \ |
280 | 0 | \ |
281 | 0 | bestmv->row *= 8; \ |
282 | 0 | bestmv->col *= 8 |
283 | | |
284 | | static unsigned int setup_center_error( |
285 | | const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, |
286 | | int error_per_bit, const vp9_variance_fn_ptr_t *vfp, |
287 | | const uint8_t *const src, const int src_stride, const uint8_t *const y, |
288 | | int y_stride, const uint8_t *second_pred, int w, int h, int offset, |
289 | 22.2M | int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) { |
290 | 22.2M | #if CONFIG_VP9_HIGHBITDEPTH |
291 | 22.2M | uint64_t besterr; |
292 | 22.2M | if (second_pred != NULL) { |
293 | 0 | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
294 | 0 | DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); |
295 | 0 | vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, |
296 | 0 | h, CONVERT_TO_SHORTPTR(y + offset), y_stride); |
297 | 0 | besterr = |
298 | 0 | vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); |
299 | 0 | } else { |
300 | 0 | DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]); |
301 | 0 | vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); |
302 | 0 | besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); |
303 | 0 | } |
304 | 22.2M | } else { |
305 | 22.2M | besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); |
306 | 22.2M | } |
307 | 22.2M | *distortion = (uint32_t)besterr; |
308 | 22.2M | besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
309 | 22.2M | if (besterr >= UINT_MAX) return UINT_MAX; |
310 | 22.2M | return (uint32_t)besterr; |
311 | | #else |
312 | | uint32_t besterr; |
313 | | (void)xd; |
314 | | if (second_pred != NULL) { |
315 | | DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]); |
316 | | vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); |
317 | | besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); |
318 | | } else { |
319 | | besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); |
320 | | } |
321 | | *distortion = besterr; |
322 | | besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
323 | | return besterr; |
324 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
325 | 22.2M | } |
326 | | |
327 | 0 | static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) { |
328 | 0 | return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); |
329 | 0 | } |
330 | | |
331 | 0 | static INLINE int is_cost_list_wellbehaved(int *cost_list) { |
332 | 0 | return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && |
333 | 0 | cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; |
334 | 0 | } |
335 | | |
336 | | // Returns surface minima estimate at given precision in 1/2^n bits. |
337 | | // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C |
338 | | // For a given set of costs S0, S1, S2, S3, S4 at points |
339 | | // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, |
340 | | // the solution for the location of the minima (x0, y0) is given by: |
341 | | // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), |
342 | | // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). |
343 | | // The code below is an integerized version of that. |
344 | 0 | static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { |
345 | 0 | const int64_t x0 = (int64_t)cost_list[1] - cost_list[3]; |
346 | 0 | const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3]; |
347 | 0 | const int64_t x1 = (int64_t)cost_list[4] - cost_list[2]; |
348 | 0 | const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2]; |
349 | 0 | const int b = 1 << (bits - 1); |
350 | 0 | *ic = (int)divide_and_round(x0 * b, y0); |
351 | 0 | *ir = (int)divide_and_round(x1 * b, y1); |
352 | 0 | } |
353 | | |
354 | | uint32_t vp9_skip_sub_pixel_tree( |
355 | | const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, |
356 | | int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, |
357 | | int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], |
358 | | uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, |
359 | 0 | int h, int use_accurate_subpel_search) { |
360 | 0 | SETUP_SUBPEL_SEARCH; |
361 | 0 | besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, |
362 | 0 | src_stride, y, y_stride, second_pred, w, h, |
363 | 0 | offset, mvjcost, mvcost, sse1, distortion); |
364 | 0 | (void)halfiters; |
365 | 0 | (void)quarteriters; |
366 | 0 | (void)eighthiters; |
367 | 0 | (void)whichdir; |
368 | 0 | (void)allow_hp; |
369 | 0 | (void)forced_stop; |
370 | 0 | (void)hstep; |
371 | 0 | (void)rr; |
372 | 0 | (void)rc; |
373 | 0 | (void)minr; |
374 | 0 | (void)minc; |
375 | 0 | (void)maxr; |
376 | 0 | (void)maxc; |
377 | 0 | (void)tr; |
378 | 0 | (void)tc; |
379 | 0 | (void)sse; |
380 | 0 | (void)thismse; |
381 | 0 | (void)cost_list; |
382 | 0 | (void)use_accurate_subpel_search; |
383 | |
|
384 | 0 | return besterr; |
385 | 0 | } |
386 | | |
387 | | uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( |
388 | | const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, |
389 | | int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, |
390 | | int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], |
391 | | uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, |
392 | 0 | int h, int use_accurate_subpel_search) { |
393 | 0 | SETUP_SUBPEL_SEARCH; |
394 | 0 | besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, |
395 | 0 | src_stride, y, y_stride, second_pred, w, h, |
396 | 0 | offset, mvjcost, mvcost, sse1, distortion); |
397 | 0 | (void)halfiters; |
398 | 0 | (void)quarteriters; |
399 | 0 | (void)eighthiters; |
400 | 0 | (void)whichdir; |
401 | 0 | (void)allow_hp; |
402 | 0 | (void)forced_stop; |
403 | 0 | (void)hstep; |
404 | 0 | (void)use_accurate_subpel_search; |
405 | |
|
406 | 0 | if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && |
407 | 0 | cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && |
408 | 0 | cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { |
409 | 0 | int ir, ic; |
410 | 0 | unsigned int minpt = INT_MAX; |
411 | 0 | get_cost_surf_min(cost_list, &ir, &ic, 2); |
412 | 0 | if (ir != 0 || ic != 0) { |
413 | 0 | CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic); |
414 | 0 | } |
415 | 0 | } else { |
416 | 0 | FIRST_LEVEL_CHECKS; |
417 | 0 | if (halfiters > 1) { |
418 | 0 | SECOND_LEVEL_CHECKS; |
419 | 0 | } |
420 | | |
421 | 0 | tr = br; |
422 | 0 | tc = bc; |
423 | | |
424 | | // Each subsequent iteration checks at least one point in common with |
425 | | // the last iteration could be 2 ( if diag selected) 1/4 pel |
426 | | // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
427 | 0 | if (forced_stop != 2) { |
428 | 0 | hstep >>= 1; |
429 | 0 | FIRST_LEVEL_CHECKS; |
430 | 0 | if (quarteriters > 1) { |
431 | 0 | SECOND_LEVEL_CHECKS; |
432 | 0 | } |
433 | 0 | } |
434 | 0 | } |
435 | | |
436 | 0 | tr = br; |
437 | 0 | tc = bc; |
438 | |
|
439 | 0 | if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { |
440 | 0 | hstep >>= 1; |
441 | 0 | FIRST_LEVEL_CHECKS; |
442 | 0 | if (eighthiters > 1) { |
443 | 0 | SECOND_LEVEL_CHECKS; |
444 | 0 | } |
445 | 0 | } |
446 | | |
447 | 0 | bestmv->row = br; |
448 | 0 | bestmv->col = bc; |
449 | |
|
450 | 0 | return besterr; |
451 | 0 | } |
452 | | |
453 | | uint32_t vp9_find_best_sub_pixel_tree_pruned_more( |
454 | | const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, |
455 | | int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, |
456 | | int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], |
457 | | uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, |
458 | 0 | int h, int use_accurate_subpel_search) { |
459 | 0 | SETUP_SUBPEL_SEARCH; |
460 | 0 | (void)use_accurate_subpel_search; |
461 | |
|
462 | 0 | besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, |
463 | 0 | src_stride, y, y_stride, second_pred, w, h, |
464 | 0 | offset, mvjcost, mvcost, sse1, distortion); |
465 | 0 | if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && |
466 | 0 | cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && |
467 | 0 | cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { |
468 | 0 | unsigned int minpt; |
469 | 0 | int ir, ic; |
470 | 0 | get_cost_surf_min(cost_list, &ir, &ic, 1); |
471 | 0 | if (ir != 0 || ic != 0) { |
472 | 0 | CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); |
473 | 0 | } |
474 | 0 | } else { |
475 | 0 | FIRST_LEVEL_CHECKS; |
476 | 0 | if (halfiters > 1) { |
477 | 0 | SECOND_LEVEL_CHECKS; |
478 | 0 | } |
479 | 0 | } |
480 | | |
481 | | // Each subsequent iteration checks at least one point in common with |
482 | | // the last iteration could be 2 ( if diag selected) 1/4 pel |
483 | | |
484 | | // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
485 | 0 | if (forced_stop != 2) { |
486 | 0 | tr = br; |
487 | 0 | tc = bc; |
488 | 0 | hstep >>= 1; |
489 | 0 | FIRST_LEVEL_CHECKS; |
490 | 0 | if (quarteriters > 1) { |
491 | 0 | SECOND_LEVEL_CHECKS; |
492 | 0 | } |
493 | 0 | } |
494 | | |
495 | 0 | if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { |
496 | 0 | tr = br; |
497 | 0 | tc = bc; |
498 | 0 | hstep >>= 1; |
499 | 0 | FIRST_LEVEL_CHECKS; |
500 | 0 | if (eighthiters > 1) { |
501 | 0 | SECOND_LEVEL_CHECKS; |
502 | 0 | } |
503 | 0 | } |
504 | | // These lines insure static analysis doesn't warn that |
505 | | // tr and tc aren't used after the above point. |
506 | 0 | (void)tr; |
507 | 0 | (void)tc; |
508 | |
|
509 | 0 | bestmv->row = br; |
510 | 0 | bestmv->col = bc; |
511 | |
|
512 | 0 | return besterr; |
513 | 0 | } |
514 | | |
515 | | uint32_t vp9_find_best_sub_pixel_tree_pruned( |
516 | | const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, |
517 | | int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, |
518 | | int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], |
519 | | uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, |
520 | 0 | int h, int use_accurate_subpel_search) { |
521 | 0 | SETUP_SUBPEL_SEARCH; |
522 | 0 | (void)use_accurate_subpel_search; |
523 | |
|
524 | 0 | besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, |
525 | 0 | src_stride, y, y_stride, second_pred, w, h, |
526 | 0 | offset, mvjcost, mvcost, sse1, distortion); |
527 | 0 | if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && |
528 | 0 | cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && |
529 | 0 | cost_list[4] != INT_MAX) { |
530 | 0 | unsigned int left, right, up, down, diag; |
531 | 0 | whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + |
532 | 0 | (cost_list[2] < cost_list[4] ? 0 : 2); |
533 | 0 | switch (whichdir) { |
534 | 0 | case 0: |
535 | 0 | CHECK_BETTER(left, tr, tc - hstep); |
536 | 0 | CHECK_BETTER(down, tr + hstep, tc); |
537 | 0 | CHECK_BETTER(diag, tr + hstep, tc - hstep); |
538 | 0 | break; |
539 | 0 | case 1: |
540 | 0 | CHECK_BETTER(right, tr, tc + hstep); |
541 | 0 | CHECK_BETTER(down, tr + hstep, tc); |
542 | 0 | CHECK_BETTER(diag, tr + hstep, tc + hstep); |
543 | 0 | break; |
544 | 0 | case 2: |
545 | 0 | CHECK_BETTER(left, tr, tc - hstep); |
546 | 0 | CHECK_BETTER(up, tr - hstep, tc); |
547 | 0 | CHECK_BETTER(diag, tr - hstep, tc - hstep); |
548 | 0 | break; |
549 | 0 | case 3: |
550 | 0 | CHECK_BETTER(right, tr, tc + hstep); |
551 | 0 | CHECK_BETTER(up, tr - hstep, tc); |
552 | 0 | CHECK_BETTER(diag, tr - hstep, tc + hstep); |
553 | 0 | break; |
554 | 0 | } |
555 | 0 | } else { |
556 | 0 | FIRST_LEVEL_CHECKS; |
557 | 0 | if (halfiters > 1) { |
558 | 0 | SECOND_LEVEL_CHECKS; |
559 | 0 | } |
560 | 0 | } |
561 | | |
562 | 0 | tr = br; |
563 | 0 | tc = bc; |
564 | | |
565 | | // Each subsequent iteration checks at least one point in common with |
566 | | // the last iteration could be 2 ( if diag selected) 1/4 pel |
567 | | |
568 | | // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only |
569 | 0 | if (forced_stop != 2) { |
570 | 0 | hstep >>= 1; |
571 | 0 | FIRST_LEVEL_CHECKS; |
572 | 0 | if (quarteriters > 1) { |
573 | 0 | SECOND_LEVEL_CHECKS; |
574 | 0 | } |
575 | 0 | tr = br; |
576 | 0 | tc = bc; |
577 | 0 | } |
578 | | |
579 | 0 | if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { |
580 | 0 | hstep >>= 1; |
581 | 0 | FIRST_LEVEL_CHECKS; |
582 | 0 | if (eighthiters > 1) { |
583 | 0 | SECOND_LEVEL_CHECKS; |
584 | 0 | } |
585 | 0 | tr = br; |
586 | 0 | tc = bc; |
587 | 0 | } |
588 | | // These lines insure static analysis doesn't warn that |
589 | | // tr and tc aren't used after the above point. |
590 | 0 | (void)tr; |
591 | 0 | (void)tc; |
592 | |
|
593 | 0 | bestmv->row = br; |
594 | 0 | bestmv->col = bc; |
595 | |
|
596 | 0 | return besterr; |
597 | 0 | } |
598 | | |
599 | | /* clang-format off */ |
600 | | static const MV search_step_table[12] = { |
601 | | // left, right, up, down |
602 | | { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 }, |
603 | | { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 }, |
604 | | { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 } |
605 | | }; |
606 | | /* clang-format on */ |
607 | | |
608 | | static int accurate_sub_pel_search( |
609 | | const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf, |
610 | | const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp, |
611 | | const uint8_t *const src_address, const int src_stride, |
612 | | const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred, |
613 | 112M | int w, int h, uint32_t *sse) { |
614 | 112M | #if CONFIG_VP9_HIGHBITDEPTH |
615 | 112M | uint64_t besterr; |
616 | 112M | assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); |
617 | 112M | assert(w != 0 && h != 0); |
618 | 112M | if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { |
619 | 0 | DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]); |
620 | 0 | vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride, |
621 | 0 | pred16, w, this_mv, sf, w, h, 0, kernel, |
622 | 0 | MV_PRECISION_Q3, 0, 0, xd->bd); |
623 | 0 | if (second_pred != NULL) { |
624 | 0 | DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); |
625 | 0 | vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, |
626 | 0 | h, pred16, w); |
627 | 0 | besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address, |
628 | 0 | src_stride, sse); |
629 | 0 | } else { |
630 | 0 | besterr = |
631 | 0 | vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse); |
632 | 0 | } |
633 | 112M | } else { |
634 | 112M | DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); |
635 | 112M | vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, |
636 | 112M | 0, kernel, MV_PRECISION_Q3, 0, 0); |
637 | 112M | if (second_pred != NULL) { |
638 | 0 | DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]); |
639 | 0 | vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); |
640 | 0 | besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); |
641 | 112M | } else { |
642 | 112M | besterr = vfp->vf(pred, w, src_address, src_stride, sse); |
643 | 112M | } |
644 | 112M | } |
645 | 112M | if (besterr >= UINT_MAX) return UINT_MAX; |
646 | 112M | return (int)besterr; |
647 | | #else |
648 | | int besterr; |
649 | | DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); |
650 | | assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); |
651 | | assert(w != 0 && h != 0); |
652 | | (void)xd; |
653 | | |
654 | | vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, |
655 | | 0, kernel, MV_PRECISION_Q3, 0, 0); |
656 | | if (second_pred != NULL) { |
657 | | DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]); |
658 | | vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); |
659 | | besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); |
660 | | } else { |
661 | | besterr = vfp->vf(pred, w, src_address, src_stride, sse); |
662 | | } |
663 | | return besterr; |
664 | | #endif // CONFIG_VP9_HIGHBITDEPTH |
665 | 112M | } |
666 | | |
667 | | // TODO(yunqing): this part can be further refactored. |
668 | | #if CONFIG_VP9_HIGHBITDEPTH |
669 | | /* checks if (r, c) has better score than previous best */ |
670 | | #define CHECK_BETTER1(v, r, c) \ |
671 | 7.42M | do { \ |
672 | 7.42M | if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ |
673 | 7.18M | int64_t tmpmse; \ |
674 | 7.18M | const MV cb_mv = { r, c }; \ |
675 | 7.18M | const MV cb_ref_mv = { rr, rc }; \ |
676 | 7.18M | thismse = accurate_sub_pel_search(xd, &cb_mv, x->me_sf, kernel, vfp, z, \ |
677 | 7.18M | src_stride, y, y_stride, second_pred, \ |
678 | 7.18M | w, h, &sse); \ |
679 | 7.18M | tmpmse = thismse; \ |
680 | 7.18M | tmpmse += \ |
681 | 7.18M | mv_err_cost(&cb_mv, &cb_ref_mv, mvjcost, mvcost, error_per_bit); \ |
682 | 7.18M | if (tmpmse >= INT_MAX) { \ |
683 | 0 | v = INT_MAX; \ |
684 | 7.18M | } else if ((v = (uint32_t)tmpmse) < besterr) { \ |
685 | 351k | besterr = v; \ |
686 | 351k | br = r; \ |
687 | 351k | bc = c; \ |
688 | 351k | *distortion = thismse; \ |
689 | 351k | *sse1 = sse; \ |
690 | 351k | } \ |
691 | 7.18M | } else { \ |
692 | 239k | v = INT_MAX; \ |
693 | 239k | } \ |
694 | 7.42M | } while (0) |
695 | | #else |
696 | | /* checks if (r, c) has better score than previous best */ |
697 | | #define CHECK_BETTER1(v, r, c) \ |
698 | | do { \ |
699 | | if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ |
700 | | const MV cb_mv = { r, c }; \ |
701 | | const MV cb_ref_mv = { rr, rc }; \ |
702 | | thismse = accurate_sub_pel_search(xd, &cb_mv, x->me_sf, kernel, vfp, z, \ |
703 | | src_stride, y, y_stride, second_pred, \ |
704 | | w, h, &sse); \ |
705 | | if ((v = mv_err_cost(&cb_mv, &cb_ref_mv, mvjcost, mvcost, \ |
706 | | error_per_bit) + \ |
707 | | thismse) < besterr) { \ |
708 | | besterr = v; \ |
709 | | br = r; \ |
710 | | bc = c; \ |
711 | | *distortion = thismse; \ |
712 | | *sse1 = sse; \ |
713 | | } \ |
714 | | } else { \ |
715 | | v = INT_MAX; \ |
716 | | } \ |
717 | | } while (0) |
718 | | |
719 | | #endif |
720 | | |
721 | | uint32_t vp9_find_best_sub_pixel_tree( |
722 | | const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, |
723 | | int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, |
724 | | int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], |
725 | | uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, |
726 | 22.2M | int h, int use_accurate_subpel_search) { |
727 | 22.2M | const uint8_t *const z = x->plane[0].src.buf; |
728 | 22.2M | const uint8_t *const src_address = z; |
729 | 22.2M | const int src_stride = x->plane[0].src.stride; |
730 | 22.2M | const MACROBLOCKD *xd = &x->e_mbd; |
731 | 22.2M | unsigned int besterr = UINT_MAX; |
732 | 22.2M | unsigned int sse; |
733 | 22.2M | int thismse; |
734 | 22.2M | const int y_stride = xd->plane[0].pre[0].stride; |
735 | 22.2M | const int offset = bestmv->row * y_stride + bestmv->col; |
736 | 22.2M | const uint8_t *const y = xd->plane[0].pre[0].buf; |
737 | | |
738 | 22.2M | int rr = ref_mv->row; |
739 | 22.2M | int rc = ref_mv->col; |
740 | 22.2M | int br = bestmv->row * 8; |
741 | 22.2M | int bc = bestmv->col * 8; |
742 | 22.2M | int hstep = 4; |
743 | 22.2M | int iter, round = 3 - forced_stop; |
744 | | |
745 | 22.2M | int minc, maxc, minr, maxr; |
746 | 22.2M | int tr = br; |
747 | 22.2M | int tc = bc; |
748 | 22.2M | const MV *search_step = search_step_table; |
749 | 22.2M | int idx, best_idx = -1; |
750 | 22.2M | unsigned int cost_array[5]; |
751 | 22.2M | int kr, kc; |
752 | 22.2M | MvLimits subpel_mv_limits; |
753 | | |
754 | | // TODO(yunqing): need to add 4-tap filter optimization to speed up the |
755 | | // encoder. |
756 | 22.2M | const InterpKernel *kernel = |
757 | 22.2M | (use_accurate_subpel_search > 0) |
758 | 22.2M | ? ((use_accurate_subpel_search == USE_4_TAPS) |
759 | 8.81M | ? vp9_filter_kernels[FOURTAP] |
760 | 8.81M | : ((use_accurate_subpel_search == USE_8_TAPS) |
761 | 0 | ? vp9_filter_kernels[EIGHTTAP] |
762 | 0 | : vp9_filter_kernels[EIGHTTAP_SHARP])) |
763 | 22.2M | : vp9_filter_kernels[BILINEAR]; |
764 | | |
765 | 22.2M | vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); |
766 | 22.2M | minc = subpel_mv_limits.col_min; |
767 | 22.2M | maxc = subpel_mv_limits.col_max; |
768 | 22.2M | minr = subpel_mv_limits.row_min; |
769 | 22.2M | maxr = subpel_mv_limits.row_max; |
770 | | |
771 | 22.2M | if (!(allow_hp && use_mv_hp(ref_mv))) |
772 | 13.7M | if (round == 3) round = 2; |
773 | | |
774 | 22.2M | bestmv->row *= 8; |
775 | 22.2M | bestmv->col *= 8; |
776 | | |
777 | 22.2M | besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, |
778 | 22.2M | src_stride, y, y_stride, second_pred, w, h, |
779 | 22.2M | offset, mvjcost, mvcost, sse1, distortion); |
780 | | |
781 | 22.2M | (void)cost_list; // to silence compiler warning |
782 | | |
783 | 75.2M | for (iter = 0; iter < round; ++iter) { |
784 | | // Check vertical and horizontal sub-pixel positions. |
785 | 265M | for (idx = 0; idx < 4; ++idx) { |
786 | 212M | tr = br + search_step[idx].row; |
787 | 212M | tc = bc + search_step[idx].col; |
788 | 212M | if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { |
789 | 208M | MV this_mv; |
790 | 208M | this_mv.row = tr; |
791 | 208M | this_mv.col = tc; |
792 | | |
793 | 208M | if (use_accurate_subpel_search) { |
794 | 83.7M | thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, |
795 | 83.7M | src_address, src_stride, y, |
796 | 83.7M | y_stride, second_pred, w, h, &sse); |
797 | 124M | } else { |
798 | 124M | const uint8_t *const pre_address = |
799 | 124M | y + (tr >> 3) * y_stride + (tc >> 3); |
800 | 124M | if (second_pred == NULL) |
801 | 124M | thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), |
802 | 124M | src_address, src_stride, &sse); |
803 | 0 | else |
804 | 0 | thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), |
805 | 0 | src_address, src_stride, &sse, second_pred); |
806 | 124M | } |
807 | | |
808 | 208M | cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, |
809 | 208M | mvcost, error_per_bit); |
810 | | |
811 | 208M | if (cost_array[idx] < besterr) { |
812 | 24.9M | best_idx = idx; |
813 | 24.9M | besterr = cost_array[idx]; |
814 | 24.9M | *distortion = thismse; |
815 | 24.9M | *sse1 = sse; |
816 | 24.9M | } |
817 | 208M | } else { |
818 | 3.34M | cost_array[idx] = UINT_MAX; |
819 | 3.34M | } |
820 | 212M | } |
821 | | |
822 | | // Check diagonal sub-pixel position |
823 | 53.0M | kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); |
824 | 53.0M | kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); |
825 | | |
826 | 53.0M | tc = bc + kc; |
827 | 53.0M | tr = br + kr; |
828 | 53.0M | if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { |
829 | 53.0M | MV this_mv = { tr, tc }; |
830 | 53.0M | if (use_accurate_subpel_search) { |
831 | 21.2M | thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, |
832 | 21.2M | src_address, src_stride, y, y_stride, |
833 | 21.2M | second_pred, w, h, &sse); |
834 | 31.7M | } else { |
835 | 31.7M | const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); |
836 | 31.7M | if (second_pred == NULL) |
837 | 31.7M | thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, |
838 | 31.7M | src_stride, &sse); |
839 | 0 | else |
840 | 0 | thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), |
841 | 0 | src_address, src_stride, &sse, second_pred); |
842 | 31.7M | } |
843 | | |
844 | 53.0M | cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, |
845 | 53.0M | error_per_bit); |
846 | | |
847 | 53.0M | if (cost_array[4] < besterr) { |
848 | 4.09M | best_idx = 4; |
849 | 4.09M | besterr = cost_array[4]; |
850 | 4.09M | *distortion = thismse; |
851 | 4.09M | *sse1 = sse; |
852 | 4.09M | } |
853 | 53.0M | } else { |
854 | 0 | cost_array[idx] = UINT_MAX; |
855 | 0 | } |
856 | | |
857 | 53.0M | if (best_idx < 4 && best_idx >= 0) { |
858 | 15.9M | br += search_step[best_idx].row; |
859 | 15.9M | bc += search_step[best_idx].col; |
860 | 37.0M | } else if (best_idx == 4) { |
861 | 4.09M | br = tr; |
862 | 4.09M | bc = tc; |
863 | 4.09M | } |
864 | | |
865 | 53.0M | if (iters_per_step > 0 && best_idx != -1) { |
866 | 20.0M | unsigned int second; |
867 | 20.0M | const int br0 = br; |
868 | 20.0M | const int bc0 = bc; |
869 | 20.0M | assert(tr == br || tc == bc); |
870 | | |
871 | 20.0M | if (tr == br && tc != bc) { |
872 | 7.82M | kc = bc - tc; |
873 | 7.82M | if (iters_per_step == 1) { |
874 | 3.68M | if (use_accurate_subpel_search) { |
875 | 3.68M | CHECK_BETTER1(second, br0, bc0 + kc); |
876 | 3.68M | } else { |
877 | 0 | CHECK_BETTER(second, br0, bc0 + kc); |
878 | 0 | } |
879 | 3.68M | } |
880 | 12.2M | } else if (tr != br && tc == bc) { |
881 | 8.16M | kr = br - tr; |
882 | 8.16M | if (iters_per_step == 1) { |
883 | 3.73M | if (use_accurate_subpel_search) { |
884 | 3.73M | CHECK_BETTER1(second, br0 + kr, bc0); |
885 | 3.73M | } else { |
886 | 0 | CHECK_BETTER(second, br0 + kr, bc0); |
887 | 0 | } |
888 | 3.73M | } |
889 | 8.16M | } |
890 | | |
891 | 20.0M | if (iters_per_step > 1) { |
892 | 10.4M | if (use_accurate_subpel_search) { |
893 | 0 | CHECK_BETTER1(second, br0 + kr, bc0); |
894 | 0 | CHECK_BETTER1(second, br0, bc0 + kc); |
895 | 0 | if (br0 != br || bc0 != bc) { |
896 | 0 | CHECK_BETTER1(second, br0 + kr, bc0 + kc); |
897 | 0 | } |
898 | 10.4M | } else { |
899 | 10.4M | CHECK_BETTER(second, br0 + kr, bc0); |
900 | 10.4M | CHECK_BETTER(second, br0, bc0 + kc); |
901 | 10.4M | if (br0 != br || bc0 != bc) { |
902 | 1.20M | CHECK_BETTER(second, br0 + kr, bc0 + kc); |
903 | 1.20M | } |
904 | 10.4M | } |
905 | 10.4M | } |
906 | 20.0M | } |
907 | | |
908 | 53.0M | search_step += 4; |
909 | 53.0M | hstep >>= 1; |
910 | 53.0M | best_idx = -1; |
911 | 53.0M | } |
912 | | |
913 | | // Each subsequent iteration checks at least one point in common with |
914 | | // the last iteration could be 2 ( if diag selected) 1/4 pel |
915 | | |
916 | | // These lines insure static analysis doesn't warn that |
917 | | // tr and tc aren't used after the above point. |
918 | 22.2M | (void)tr; |
919 | 22.2M | (void)tc; |
920 | | |
921 | 22.2M | bestmv->row = br; |
922 | 22.2M | bestmv->col = bc; |
923 | | |
924 | 22.2M | return besterr; |
925 | 22.2M | } |
926 | | |
927 | | #undef CHECK_BETTER |
928 | | #undef CHECK_BETTER1 |
929 | | |
930 | | static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, |
931 | 0 | int range) { |
932 | 0 | return ((row - range) >= mv_limits->row_min) & |
933 | 0 | ((row + range) <= mv_limits->row_max) & |
934 | 0 | ((col - range) >= mv_limits->col_min) & |
935 | 0 | ((col + range) <= mv_limits->col_max); |
936 | 0 | } |
937 | | |
938 | 288M | static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) { |
939 | 288M | return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) && |
940 | 288M | (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max); |
941 | 288M | } |
942 | | |
943 | | #define CHECK_BETTER \ |
944 | 0 | { \ |
945 | 0 | if (thissad < bestsad) { \ |
946 | 0 | if (use_mvcost) \ |
947 | 0 | thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \ |
948 | 0 | if (thissad < bestsad) { \ |
949 | 0 | bestsad = thissad; \ |
950 | 0 | best_site = i; \ |
951 | 0 | } \ |
952 | 0 | } \ |
953 | 0 | } |
954 | | |
955 | | #define MAX_PATTERN_SCALES 11 |
956 | | #define MAX_PATTERN_CANDIDATES 8 // max number of candidates per scale |
957 | 0 | #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates |
958 | | |
959 | | // Calculate and return a sad+mvcost list around an integer best pel. |
960 | | static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv, |
961 | | int sadpb, |
962 | | const vp9_variance_fn_ptr_t *fn_ptr, |
963 | 0 | const MV *best_mv, int *cost_list) { |
964 | 0 | static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; |
965 | 0 | const struct buf_2d *const what = &x->plane[0].src; |
966 | 0 | const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; |
967 | 0 | const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; |
968 | 0 | int br = best_mv->row; |
969 | 0 | int bc = best_mv->col; |
970 | 0 | const MV mv = { br, bc }; |
971 | 0 | int i; |
972 | 0 | unsigned int sse; |
973 | |
|
974 | 0 | cost_list[0] = |
975 | 0 | fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), |
976 | 0 | in_what->stride, &sse) + |
977 | 0 | mvsad_err_cost(x, &mv, &fcenter_mv, sadpb); |
978 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1)) { |
979 | 0 | for (i = 0; i < 4; i++) { |
980 | 0 | const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; |
981 | 0 | cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, |
982 | 0 | get_buf_from_mv(in_what, &this_mv), |
983 | 0 | in_what->stride, &sse) + |
984 | 0 | mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, |
985 | 0 | x->mvcost, x->errorperbit); |
986 | 0 | } |
987 | 0 | } else { |
988 | 0 | for (i = 0; i < 4; i++) { |
989 | 0 | const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; |
990 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) |
991 | 0 | cost_list[i + 1] = INT_MAX; |
992 | 0 | else |
993 | 0 | cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, |
994 | 0 | get_buf_from_mv(in_what, &this_mv), |
995 | 0 | in_what->stride, &sse) + |
996 | 0 | mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, |
997 | 0 | x->mvcost, x->errorperbit); |
998 | 0 | } |
999 | 0 | } |
1000 | 0 | } |
1001 | | |
1002 | | // Generic pattern search function that searches over multiple scales. |
1003 | | // Each scale can have a different number of candidates and shape of |
1004 | | // candidates as indicated in the num_candidates and candidates arrays |
1005 | | // passed into this function |
1006 | | // |
1007 | | static int vp9_pattern_search( |
1008 | | const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, |
1009 | | int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, |
1010 | | int use_mvcost, const MV *center_mv, MV *best_mv, |
1011 | | const int num_candidates[MAX_PATTERN_SCALES], |
1012 | 0 | const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { |
1013 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1014 | 0 | static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { |
1015 | 0 | 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
1016 | 0 | }; |
1017 | 0 | int i, s, t; |
1018 | 0 | const struct buf_2d *const what = &x->plane[0].src; |
1019 | 0 | const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1020 | 0 | int br, bc; |
1021 | 0 | int bestsad = INT_MAX; |
1022 | 0 | int thissad; |
1023 | 0 | int k = -1; |
1024 | 0 | const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; |
1025 | 0 | int best_init_s = search_param_to_steps[search_param]; |
1026 | | // adjust ref_mv to make sure it is within MV range |
1027 | 0 | clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, |
1028 | 0 | x->mv_limits.row_min, x->mv_limits.row_max); |
1029 | 0 | br = ref_mv->row; |
1030 | 0 | bc = ref_mv->col; |
1031 | | |
1032 | | // Work out the start point for the search |
1033 | 0 | bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), |
1034 | 0 | in_what->stride) + |
1035 | 0 | mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); |
1036 | | |
1037 | | // Search all possible scales up to the search param around the center point |
1038 | | // pick the scale of the point that is best as the starting scale of |
1039 | | // further steps around it. |
1040 | 0 | if (do_init_search) { |
1041 | 0 | s = best_init_s; |
1042 | 0 | best_init_s = -1; |
1043 | 0 | for (t = 0; t <= s; ++t) { |
1044 | 0 | int best_site = -1; |
1045 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { |
1046 | 0 | for (i = 0; i < num_candidates[t]; i++) { |
1047 | 0 | const MV this_mv = { br + candidates[t][i].row, |
1048 | 0 | bc + candidates[t][i].col }; |
1049 | 0 | thissad = |
1050 | 0 | vfp->sdf(what->buf, what->stride, |
1051 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1052 | 0 | CHECK_BETTER |
1053 | 0 | } |
1054 | 0 | } else { |
1055 | 0 | for (i = 0; i < num_candidates[t]; i++) { |
1056 | 0 | const MV this_mv = { br + candidates[t][i].row, |
1057 | 0 | bc + candidates[t][i].col }; |
1058 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) continue; |
1059 | 0 | thissad = |
1060 | 0 | vfp->sdf(what->buf, what->stride, |
1061 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1062 | 0 | CHECK_BETTER |
1063 | 0 | } |
1064 | 0 | } |
1065 | 0 | if (best_site == -1) { |
1066 | 0 | continue; |
1067 | 0 | } else { |
1068 | 0 | best_init_s = t; |
1069 | 0 | k = best_site; |
1070 | 0 | } |
1071 | 0 | } |
1072 | 0 | if (best_init_s != -1) { |
1073 | 0 | br += candidates[best_init_s][k].row; |
1074 | 0 | bc += candidates[best_init_s][k].col; |
1075 | 0 | } |
1076 | 0 | } |
1077 | | |
1078 | | // If the center point is still the best, just skip this and move to |
1079 | | // the refinement step. |
1080 | 0 | if (best_init_s != -1) { |
1081 | 0 | int best_site = -1; |
1082 | 0 | s = best_init_s; |
1083 | |
|
1084 | 0 | do { |
1085 | | // No need to search all 6 points the 1st time if initial search was used |
1086 | 0 | if (!do_init_search || s != best_init_s) { |
1087 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { |
1088 | 0 | for (i = 0; i < num_candidates[s]; i++) { |
1089 | 0 | const MV this_mv = { br + candidates[s][i].row, |
1090 | 0 | bc + candidates[s][i].col }; |
1091 | 0 | thissad = |
1092 | 0 | vfp->sdf(what->buf, what->stride, |
1093 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1094 | 0 | CHECK_BETTER |
1095 | 0 | } |
1096 | 0 | } else { |
1097 | 0 | for (i = 0; i < num_candidates[s]; i++) { |
1098 | 0 | const MV this_mv = { br + candidates[s][i].row, |
1099 | 0 | bc + candidates[s][i].col }; |
1100 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) continue; |
1101 | 0 | thissad = |
1102 | 0 | vfp->sdf(what->buf, what->stride, |
1103 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1104 | 0 | CHECK_BETTER |
1105 | 0 | } |
1106 | 0 | } |
1107 | |
|
1108 | 0 | if (best_site == -1) { |
1109 | 0 | continue; |
1110 | 0 | } else { |
1111 | 0 | br += candidates[s][best_site].row; |
1112 | 0 | bc += candidates[s][best_site].col; |
1113 | 0 | k = best_site; |
1114 | 0 | } |
1115 | 0 | } |
1116 | | |
1117 | 0 | do { |
1118 | 0 | int next_chkpts_indices[PATTERN_CANDIDATES_REF]; |
1119 | 0 | best_site = -1; |
1120 | 0 | next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; |
1121 | 0 | next_chkpts_indices[1] = k; |
1122 | 0 | next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; |
1123 | |
|
1124 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { |
1125 | 0 | for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
1126 | 0 | const MV this_mv = { |
1127 | 0 | br + candidates[s][next_chkpts_indices[i]].row, |
1128 | 0 | bc + candidates[s][next_chkpts_indices[i]].col |
1129 | 0 | }; |
1130 | 0 | thissad = |
1131 | 0 | vfp->sdf(what->buf, what->stride, |
1132 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1133 | 0 | CHECK_BETTER |
1134 | 0 | } |
1135 | 0 | } else { |
1136 | 0 | for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
1137 | 0 | const MV this_mv = { |
1138 | 0 | br + candidates[s][next_chkpts_indices[i]].row, |
1139 | 0 | bc + candidates[s][next_chkpts_indices[i]].col |
1140 | 0 | }; |
1141 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) continue; |
1142 | 0 | thissad = |
1143 | 0 | vfp->sdf(what->buf, what->stride, |
1144 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1145 | 0 | CHECK_BETTER |
1146 | 0 | } |
1147 | 0 | } |
1148 | |
|
1149 | 0 | if (best_site != -1) { |
1150 | 0 | k = next_chkpts_indices[best_site]; |
1151 | 0 | br += candidates[s][k].row; |
1152 | 0 | bc += candidates[s][k].col; |
1153 | 0 | } |
1154 | 0 | } while (best_site != -1); |
1155 | 0 | } while (s--); |
1156 | 0 | } |
1157 | | |
1158 | 0 | best_mv->row = br; |
1159 | 0 | best_mv->col = bc; |
1160 | | |
1161 | | // Returns the one-away integer pel sad values around the best as follows: |
1162 | | // cost_list[0]: cost at the best integer pel |
1163 | | // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel |
1164 | | // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel |
1165 | | // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel |
1166 | | // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel |
1167 | 0 | if (cost_list) { |
1168 | 0 | calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, best_mv, cost_list); |
1169 | 0 | } |
1170 | 0 | return bestsad; |
1171 | 0 | } |
1172 | | |
1173 | | // A specialized function where the smallest scale search candidates |
1174 | | // are 4 1-away neighbors, and cost_list is non-null |
1175 | | // TODO(debargha): Merge this function with the one above. Also remove |
1176 | | // use_mvcost option since it is always 1, to save unnecessary branches. |
1177 | | static int vp9_pattern_search_sad( |
1178 | | const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, |
1179 | | int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, |
1180 | | int use_mvcost, const MV *center_mv, MV *best_mv, |
1181 | | const int num_candidates[MAX_PATTERN_SCALES], |
1182 | 0 | const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { |
1183 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1184 | 0 | static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { |
1185 | 0 | 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, |
1186 | 0 | }; |
1187 | 0 | int i, s, t; |
1188 | 0 | const struct buf_2d *const what = &x->plane[0].src; |
1189 | 0 | const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1190 | 0 | int br, bc; |
1191 | 0 | int bestsad = INT_MAX; |
1192 | 0 | int thissad; |
1193 | 0 | int k = -1; |
1194 | 0 | const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; |
1195 | 0 | int best_init_s = search_param_to_steps[search_param]; |
1196 | | // adjust ref_mv to make sure it is within MV range |
1197 | 0 | clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, |
1198 | 0 | x->mv_limits.row_min, x->mv_limits.row_max); |
1199 | 0 | br = ref_mv->row; |
1200 | 0 | bc = ref_mv->col; |
1201 | 0 | if (cost_list != NULL) { |
1202 | 0 | cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = |
1203 | 0 | INT_MAX; |
1204 | 0 | } |
1205 | | |
1206 | | // Work out the start point for the search |
1207 | 0 | bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), |
1208 | 0 | in_what->stride) + |
1209 | 0 | mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); |
1210 | | |
1211 | | // Search all possible scales up to the search param around the center point |
1212 | | // pick the scale of the point that is best as the starting scale of |
1213 | | // further steps around it. |
1214 | 0 | if (do_init_search) { |
1215 | 0 | s = best_init_s; |
1216 | 0 | best_init_s = -1; |
1217 | 0 | for (t = 0; t <= s; ++t) { |
1218 | 0 | int best_site = -1; |
1219 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { |
1220 | 0 | for (i = 0; i < num_candidates[t]; i++) { |
1221 | 0 | const MV this_mv = { br + candidates[t][i].row, |
1222 | 0 | bc + candidates[t][i].col }; |
1223 | 0 | thissad = |
1224 | 0 | vfp->sdf(what->buf, what->stride, |
1225 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1226 | 0 | CHECK_BETTER |
1227 | 0 | } |
1228 | 0 | } else { |
1229 | 0 | for (i = 0; i < num_candidates[t]; i++) { |
1230 | 0 | const MV this_mv = { br + candidates[t][i].row, |
1231 | 0 | bc + candidates[t][i].col }; |
1232 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) continue; |
1233 | 0 | thissad = |
1234 | 0 | vfp->sdf(what->buf, what->stride, |
1235 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1236 | 0 | CHECK_BETTER |
1237 | 0 | } |
1238 | 0 | } |
1239 | 0 | if (best_site == -1) { |
1240 | 0 | continue; |
1241 | 0 | } else { |
1242 | 0 | best_init_s = t; |
1243 | 0 | k = best_site; |
1244 | 0 | } |
1245 | 0 | } |
1246 | 0 | if (best_init_s != -1) { |
1247 | 0 | br += candidates[best_init_s][k].row; |
1248 | 0 | bc += candidates[best_init_s][k].col; |
1249 | 0 | } |
1250 | 0 | } |
1251 | | |
1252 | | // If the center point is still the best, just skip this and move to |
1253 | | // the refinement step. |
1254 | 0 | if (best_init_s != -1) { |
1255 | 0 | int do_sad = (num_candidates[0] == 4 && cost_list != NULL); |
1256 | 0 | int best_site = -1; |
1257 | 0 | s = best_init_s; |
1258 | |
|
1259 | 0 | for (; s >= do_sad; s--) { |
1260 | 0 | if (!do_init_search || s != best_init_s) { |
1261 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { |
1262 | 0 | for (i = 0; i < num_candidates[s]; i++) { |
1263 | 0 | const MV this_mv = { br + candidates[s][i].row, |
1264 | 0 | bc + candidates[s][i].col }; |
1265 | 0 | thissad = |
1266 | 0 | vfp->sdf(what->buf, what->stride, |
1267 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1268 | 0 | CHECK_BETTER |
1269 | 0 | } |
1270 | 0 | } else { |
1271 | 0 | for (i = 0; i < num_candidates[s]; i++) { |
1272 | 0 | const MV this_mv = { br + candidates[s][i].row, |
1273 | 0 | bc + candidates[s][i].col }; |
1274 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) continue; |
1275 | 0 | thissad = |
1276 | 0 | vfp->sdf(what->buf, what->stride, |
1277 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1278 | 0 | CHECK_BETTER |
1279 | 0 | } |
1280 | 0 | } |
1281 | |
|
1282 | 0 | if (best_site == -1) { |
1283 | 0 | continue; |
1284 | 0 | } else { |
1285 | 0 | br += candidates[s][best_site].row; |
1286 | 0 | bc += candidates[s][best_site].col; |
1287 | 0 | k = best_site; |
1288 | 0 | } |
1289 | 0 | } |
1290 | | |
1291 | 0 | do { |
1292 | 0 | int next_chkpts_indices[PATTERN_CANDIDATES_REF]; |
1293 | 0 | best_site = -1; |
1294 | 0 | next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; |
1295 | 0 | next_chkpts_indices[1] = k; |
1296 | 0 | next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; |
1297 | |
|
1298 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { |
1299 | 0 | for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
1300 | 0 | const MV this_mv = { |
1301 | 0 | br + candidates[s][next_chkpts_indices[i]].row, |
1302 | 0 | bc + candidates[s][next_chkpts_indices[i]].col |
1303 | 0 | }; |
1304 | 0 | thissad = |
1305 | 0 | vfp->sdf(what->buf, what->stride, |
1306 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1307 | 0 | CHECK_BETTER |
1308 | 0 | } |
1309 | 0 | } else { |
1310 | 0 | for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
1311 | 0 | const MV this_mv = { |
1312 | 0 | br + candidates[s][next_chkpts_indices[i]].row, |
1313 | 0 | bc + candidates[s][next_chkpts_indices[i]].col |
1314 | 0 | }; |
1315 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) continue; |
1316 | 0 | thissad = |
1317 | 0 | vfp->sdf(what->buf, what->stride, |
1318 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1319 | 0 | CHECK_BETTER |
1320 | 0 | } |
1321 | 0 | } |
1322 | |
|
1323 | 0 | if (best_site != -1) { |
1324 | 0 | k = next_chkpts_indices[best_site]; |
1325 | 0 | br += candidates[s][k].row; |
1326 | 0 | bc += candidates[s][k].col; |
1327 | 0 | } |
1328 | 0 | } while (best_site != -1); |
1329 | 0 | } |
1330 | | |
1331 | | // Note: If we enter the if below, then cost_list must be non-NULL. |
1332 | 0 | if (s == 0) { |
1333 | 0 | cost_list[0] = bestsad; |
1334 | 0 | if (!do_init_search || s != best_init_s) { |
1335 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { |
1336 | 0 | for (i = 0; i < num_candidates[s]; i++) { |
1337 | 0 | const MV this_mv = { br + candidates[s][i].row, |
1338 | 0 | bc + candidates[s][i].col }; |
1339 | 0 | cost_list[i + 1] = thissad = |
1340 | 0 | vfp->sdf(what->buf, what->stride, |
1341 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1342 | 0 | CHECK_BETTER |
1343 | 0 | } |
1344 | 0 | } else { |
1345 | 0 | for (i = 0; i < num_candidates[s]; i++) { |
1346 | 0 | const MV this_mv = { br + candidates[s][i].row, |
1347 | 0 | bc + candidates[s][i].col }; |
1348 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) continue; |
1349 | 0 | cost_list[i + 1] = thissad = |
1350 | 0 | vfp->sdf(what->buf, what->stride, |
1351 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1352 | 0 | CHECK_BETTER |
1353 | 0 | } |
1354 | 0 | } |
1355 | |
|
1356 | 0 | if (best_site != -1) { |
1357 | 0 | br += candidates[s][best_site].row; |
1358 | 0 | bc += candidates[s][best_site].col; |
1359 | 0 | k = best_site; |
1360 | 0 | } |
1361 | 0 | } |
1362 | 0 | while (best_site != -1) { |
1363 | 0 | int next_chkpts_indices[PATTERN_CANDIDATES_REF]; |
1364 | 0 | best_site = -1; |
1365 | 0 | next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; |
1366 | 0 | next_chkpts_indices[1] = k; |
1367 | 0 | next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; |
1368 | 0 | cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; |
1369 | 0 | cost_list[((k + 2) % 4) + 1] = cost_list[0]; |
1370 | 0 | cost_list[0] = bestsad; |
1371 | |
|
1372 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { |
1373 | 0 | for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
1374 | 0 | const MV this_mv = { |
1375 | 0 | br + candidates[s][next_chkpts_indices[i]].row, |
1376 | 0 | bc + candidates[s][next_chkpts_indices[i]].col |
1377 | 0 | }; |
1378 | 0 | cost_list[next_chkpts_indices[i] + 1] = thissad = |
1379 | 0 | vfp->sdf(what->buf, what->stride, |
1380 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1381 | 0 | CHECK_BETTER |
1382 | 0 | } |
1383 | 0 | } else { |
1384 | 0 | for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { |
1385 | 0 | const MV this_mv = { |
1386 | 0 | br + candidates[s][next_chkpts_indices[i]].row, |
1387 | 0 | bc + candidates[s][next_chkpts_indices[i]].col |
1388 | 0 | }; |
1389 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) { |
1390 | 0 | cost_list[next_chkpts_indices[i] + 1] = INT_MAX; |
1391 | 0 | continue; |
1392 | 0 | } |
1393 | 0 | cost_list[next_chkpts_indices[i] + 1] = thissad = |
1394 | 0 | vfp->sdf(what->buf, what->stride, |
1395 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1396 | 0 | CHECK_BETTER |
1397 | 0 | } |
1398 | 0 | } |
1399 | |
|
1400 | 0 | if (best_site != -1) { |
1401 | 0 | k = next_chkpts_indices[best_site]; |
1402 | 0 | br += candidates[s][k].row; |
1403 | 0 | bc += candidates[s][k].col; |
1404 | 0 | } |
1405 | 0 | } |
1406 | 0 | } |
1407 | 0 | } |
1408 | | |
1409 | | // Returns the one-away integer pel sad values around the best as follows: |
1410 | | // cost_list[0]: sad at the best integer pel |
1411 | | // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel |
1412 | | // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel |
1413 | | // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel |
1414 | | // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel |
1415 | 0 | if (cost_list) { |
1416 | 0 | static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; |
1417 | 0 | if (cost_list[0] == INT_MAX) { |
1418 | 0 | cost_list[0] = bestsad; |
1419 | 0 | if (check_bounds(&x->mv_limits, br, bc, 1)) { |
1420 | 0 | for (i = 0; i < 4; i++) { |
1421 | 0 | const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; |
1422 | 0 | cost_list[i + 1] = |
1423 | 0 | vfp->sdf(what->buf, what->stride, |
1424 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1425 | 0 | } |
1426 | 0 | } else { |
1427 | 0 | for (i = 0; i < 4; i++) { |
1428 | 0 | const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; |
1429 | 0 | if (!is_mv_in(&x->mv_limits, &this_mv)) |
1430 | 0 | cost_list[i + 1] = INT_MAX; |
1431 | 0 | else |
1432 | 0 | cost_list[i + 1] = |
1433 | 0 | vfp->sdf(what->buf, what->stride, |
1434 | 0 | get_buf_from_mv(in_what, &this_mv), in_what->stride); |
1435 | 0 | } |
1436 | 0 | } |
1437 | 0 | } else { |
1438 | 0 | if (use_mvcost) { |
1439 | 0 | for (i = 0; i < 4; i++) { |
1440 | 0 | const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; |
1441 | 0 | if (cost_list[i + 1] != INT_MAX) { |
1442 | 0 | cost_list[i + 1] += |
1443 | 0 | mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); |
1444 | 0 | } |
1445 | 0 | } |
1446 | 0 | } |
1447 | 0 | } |
1448 | 0 | } |
1449 | 0 | best_mv->row = br; |
1450 | 0 | best_mv->col = bc; |
1451 | 0 | return bestsad; |
1452 | 0 | } |
1453 | | |
1454 | | int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, |
1455 | | const MV *center_mv, const vp9_variance_fn_ptr_t *vfp, |
1456 | 79.7M | int use_mvcost) { |
1457 | 79.7M | const MACROBLOCKD *const xd = &x->e_mbd; |
1458 | 79.7M | const struct buf_2d *const what = &x->plane[0].src; |
1459 | 79.7M | const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1460 | 79.7M | const MV mv = { best_mv->row * 8, best_mv->col * 8 }; |
1461 | 79.7M | uint32_t unused; |
1462 | 79.7M | #if CONFIG_VP9_HIGHBITDEPTH |
1463 | 79.7M | uint64_t err = |
1464 | 79.7M | vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), |
1465 | 79.7M | in_what->stride, &unused); |
1466 | 79.7M | err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, |
1467 | 79.7M | x->errorperbit) |
1468 | 79.7M | : 0); |
1469 | 79.7M | if (err >= INT_MAX) return INT_MAX; |
1470 | 79.7M | return (int)err; |
1471 | | #else |
1472 | | return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), |
1473 | | in_what->stride, &unused) + |
1474 | | (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, |
1475 | | x->errorperbit) |
1476 | | : 0); |
1477 | | #endif |
1478 | 79.7M | } |
1479 | | |
1480 | | int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, |
1481 | | const MV *center_mv, const uint8_t *second_pred, |
1482 | 0 | const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { |
1483 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1484 | 0 | const struct buf_2d *const what = &x->plane[0].src; |
1485 | 0 | const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1486 | 0 | const MV mv = { best_mv->row * 8, best_mv->col * 8 }; |
1487 | 0 | unsigned int unused; |
1488 | |
|
1489 | 0 | return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, |
1490 | 0 | what->buf, what->stride, &unused, second_pred) + |
1491 | 0 | (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, |
1492 | 0 | x->errorperbit) |
1493 | 0 | : 0); |
1494 | 0 | } |
1495 | | |
1496 | | static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, |
1497 | | int sad_per_bit, int do_init_search, int *cost_list, |
1498 | | const vp9_variance_fn_ptr_t *vfp, int use_mvcost, |
1499 | 0 | const MV *center_mv, MV *best_mv) { |
1500 | | // First scale has 8-closest points, the rest have 6 points in hex shape |
1501 | | // at increasing scales |
1502 | 0 | static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6, |
1503 | 0 | 6, 6, 6, 6, 6 }; |
1504 | | // Note that the largest candidate step at each scale is 2^scale |
1505 | | /* clang-format off */ |
1506 | 0 | static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { |
1507 | 0 | { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 }, |
1508 | 0 | { -1, 0 } }, |
1509 | 0 | { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }, |
1510 | 0 | { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } }, |
1511 | 0 | { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } }, |
1512 | 0 | { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } }, |
1513 | 0 | { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 }, |
1514 | 0 | { -32, 0 } }, |
1515 | 0 | { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 }, |
1516 | 0 | { -64, 0 } }, |
1517 | 0 | { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 }, |
1518 | 0 | { -128, 0 } }, |
1519 | 0 | { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 }, |
1520 | 0 | { -256, 0 } }, |
1521 | 0 | { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 }, |
1522 | 0 | { -512, 0 } }, |
1523 | 0 | { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 }, |
1524 | 0 | { -512, 1024 }, { -1024, 0 } } |
1525 | 0 | }; |
1526 | | /* clang-format on */ |
1527 | 0 | return vp9_pattern_search( |
1528 | 0 | x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, |
1529 | 0 | use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates); |
1530 | 0 | } |
1531 | | |
1532 | | static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, |
1533 | | int sad_per_bit, int do_init_search, int *cost_list, |
1534 | | const vp9_variance_fn_ptr_t *vfp, int use_mvcost, |
1535 | 0 | const MV *center_mv, MV *best_mv) { |
1536 | | // First scale has 4-closest points, the rest have 8 points in diamond |
1537 | | // shape at increasing scales |
1538 | 0 | static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { |
1539 | 0 | 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
1540 | 0 | }; |
1541 | | // Note that the largest candidate step at each scale is 2^scale |
1542 | | /* clang-format off */ |
1543 | 0 | static const MV |
1544 | 0 | bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { |
1545 | 0 | { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }, |
1546 | 0 | { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 }, |
1547 | 0 | { -1, 1 }, { -2, 0 } }, |
1548 | 0 | { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 }, |
1549 | 0 | { -2, 2 }, { -4, 0 } }, |
1550 | 0 | { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 }, |
1551 | 0 | { -4, 4 }, { -8, 0 } }, |
1552 | 0 | { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 }, |
1553 | 0 | { -8, 8 }, { -16, 0 } }, |
1554 | 0 | { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 }, |
1555 | 0 | { 0, 32 }, { -16, 16 }, { -32, 0 } }, |
1556 | 0 | { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 }, |
1557 | 0 | { 0, 64 }, { -32, 32 }, { -64, 0 } }, |
1558 | 0 | { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 }, |
1559 | 0 | { 0, 128 }, { -64, 64 }, { -128, 0 } }, |
1560 | 0 | { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 }, |
1561 | 0 | { 0, 256 }, { -128, 128 }, { -256, 0 } }, |
1562 | 0 | { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 }, |
1563 | 0 | { 0, 512 }, { -256, 256 }, { -512, 0 } }, |
1564 | 0 | { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 }, |
1565 | 0 | { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } } |
1566 | 0 | }; |
1567 | | /* clang-format on */ |
1568 | 0 | return vp9_pattern_search_sad( |
1569 | 0 | x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, |
1570 | 0 | use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates); |
1571 | 0 | } |
1572 | | |
1573 | | static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param, |
1574 | | int sad_per_bit, int do_init_search, int *cost_list, |
1575 | | const vp9_variance_fn_ptr_t *vfp, int use_mvcost, |
1576 | 0 | const MV *center_mv, MV *best_mv) { |
1577 | | // All scales have 8 closest points in square shape |
1578 | 0 | static const int square_num_candidates[MAX_PATTERN_SCALES] = { |
1579 | 0 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
1580 | 0 | }; |
1581 | | // Note that the largest candidate step at each scale is 2^scale |
1582 | | /* clang-format off */ |
1583 | 0 | static const MV |
1584 | 0 | square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { |
1585 | 0 | { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, |
1586 | 0 | { -1, 1 }, { -1, 0 } }, |
1587 | 0 | { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 }, |
1588 | 0 | { -2, 2 }, { -2, 0 } }, |
1589 | 0 | { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 }, |
1590 | 0 | { -4, 4 }, { -4, 0 } }, |
1591 | 0 | { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 }, |
1592 | 0 | { -8, 8 }, { -8, 0 } }, |
1593 | 0 | { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 }, |
1594 | 0 | { 0, 16 }, { -16, 16 }, { -16, 0 } }, |
1595 | 0 | { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 }, |
1596 | 0 | { 0, 32 }, { -32, 32 }, { -32, 0 } }, |
1597 | 0 | { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 }, |
1598 | 0 | { 0, 64 }, { -64, 64 }, { -64, 0 } }, |
1599 | 0 | { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 }, |
1600 | 0 | { 0, 128 }, { -128, 128 }, { -128, 0 } }, |
1601 | 0 | { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 }, |
1602 | 0 | { 0, 256 }, { -256, 256 }, { -256, 0 } }, |
1603 | 0 | { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 }, |
1604 | 0 | { 0, 512 }, { -512, 512 }, { -512, 0 } }, |
1605 | 0 | { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 }, |
1606 | 0 | { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } } |
1607 | 0 | }; |
1608 | | /* clang-format on */ |
1609 | 0 | return vp9_pattern_search( |
1610 | 0 | x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, |
1611 | 0 | use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); |
1612 | 0 | } |
1613 | | |
1614 | | static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, |
1615 | | int sad_per_bit, |
1616 | | int do_init_search, // must be zero for fast_hex |
1617 | | int *cost_list, const vp9_variance_fn_ptr_t *vfp, |
1618 | 0 | int use_mvcost, const MV *center_mv, MV *best_mv) { |
1619 | 0 | return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), |
1620 | 0 | sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, |
1621 | 0 | center_mv, best_mv); |
1622 | 0 | } |
1623 | | |
1624 | | static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, |
1625 | | int sad_per_bit, int do_init_search, int *cost_list, |
1626 | | const vp9_variance_fn_ptr_t *vfp, int use_mvcost, |
1627 | 0 | const MV *center_mv, MV *best_mv) { |
1628 | 0 | return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), |
1629 | 0 | sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, |
1630 | 0 | center_mv, best_mv); |
1631 | 0 | } |
1632 | | |
1633 | | #undef CHECK_BETTER |
1634 | | |
1635 | | // Exhuastive motion search around a given centre position with a given |
1636 | | // step size. |
1637 | | static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, |
1638 | | int range, int step, int sad_per_bit, |
1639 | | const vp9_variance_fn_ptr_t *fn_ptr, |
1640 | 0 | const MV *center_mv) { |
1641 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
1642 | 0 | const struct buf_2d *const what = &x->plane[0].src; |
1643 | 0 | const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
1644 | 0 | MV fcenter_mv = { center_mv->row, center_mv->col }; |
1645 | 0 | unsigned int best_sad = INT_MAX; |
1646 | 0 | int r, c, i; |
1647 | 0 | int start_col, end_col, start_row, end_row; |
1648 | 0 | int col_step = (step > 1) ? step : 4; |
1649 | |
|
1650 | 0 | assert(step >= 1); |
1651 | |
|
1652 | 0 | clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, |
1653 | 0 | x->mv_limits.row_min, x->mv_limits.row_max); |
1654 | 0 | *best_mv = fcenter_mv; |
1655 | 0 | best_sad = |
1656 | 0 | fn_ptr->sdf(what->buf, what->stride, |
1657 | 0 | get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + |
1658 | 0 | mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); |
1659 | 0 | start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row); |
1660 | 0 | start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col); |
1661 | 0 | end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row); |
1662 | 0 | end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col); |
1663 | |
|
1664 | 0 | for (r = start_row; r <= end_row; r += step) { |
1665 | 0 | for (c = start_col; c <= end_col; c += col_step) { |
1666 | | // Step > 1 means we are not checking every location in this pass. |
1667 | 0 | if (step > 1) { |
1668 | 0 | const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; |
1669 | 0 | unsigned int sad = |
1670 | 0 | fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), |
1671 | 0 | in_what->stride); |
1672 | 0 | if (sad < best_sad) { |
1673 | 0 | sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); |
1674 | 0 | if (sad < best_sad) { |
1675 | 0 | best_sad = sad; |
1676 | 0 | *best_mv = mv; |
1677 | 0 | } |
1678 | 0 | } |
1679 | 0 | } else { |
1680 | | // 4 sads in a single call if we are checking every location |
1681 | 0 | if (c + 3 <= end_col) { |
1682 | 0 | unsigned int sads[4]; |
1683 | 0 | const uint8_t *addrs[4]; |
1684 | 0 | for (i = 0; i < 4; ++i) { |
1685 | 0 | const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; |
1686 | 0 | addrs[i] = get_buf_from_mv(in_what, &mv); |
1687 | 0 | } |
1688 | 0 | fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); |
1689 | |
|
1690 | 0 | for (i = 0; i < 4; ++i) { |
1691 | 0 | if (sads[i] < best_sad) { |
1692 | 0 | const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; |
1693 | 0 | const unsigned int sad = |
1694 | 0 | sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); |
1695 | 0 | if (sad < best_sad) { |
1696 | 0 | best_sad = sad; |
1697 | 0 | *best_mv = mv; |
1698 | 0 | } |
1699 | 0 | } |
1700 | 0 | } |
1701 | 0 | } else { |
1702 | 0 | for (i = 0; i < end_col - c; ++i) { |
1703 | 0 | const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; |
1704 | 0 | unsigned int sad = |
1705 | 0 | fn_ptr->sdf(what->buf, what->stride, |
1706 | 0 | get_buf_from_mv(in_what, &mv), in_what->stride); |
1707 | 0 | if (sad < best_sad) { |
1708 | 0 | sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); |
1709 | 0 | if (sad < best_sad) { |
1710 | 0 | best_sad = sad; |
1711 | 0 | *best_mv = mv; |
1712 | 0 | } |
1713 | 0 | } |
1714 | 0 | } |
1715 | 0 | } |
1716 | 0 | } |
1717 | 0 | } |
1718 | 0 | } |
1719 | |
|
1720 | 0 | return best_sad; |
1721 | 0 | } |
1722 | | |
1723 | 0 | #define MIN_RANGE 7 |
1724 | 0 | #define MAX_RANGE 256 |
1725 | 0 | #define MIN_INTERVAL 1 |
1726 | | #if CONFIG_NON_GREEDY_MV |
1727 | | static int64_t exhaustive_mesh_search_multi_step( |
1728 | | MV *best_mv, const MV *center_mv, int range, int step, |
1729 | | const struct buf_2d *src, const struct buf_2d *pre, int lambda, |
1730 | | const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits, |
1731 | | const vp9_variance_fn_ptr_t *fn_ptr) { |
1732 | | int64_t best_sad; |
1733 | | int r, c; |
1734 | | int start_col, end_col, start_row, end_row; |
1735 | | *best_mv = *center_mv; |
1736 | | best_sad = |
1737 | | ((int64_t)fn_ptr->sdf(src->buf, src->stride, |
1738 | | get_buf_from_mv(pre, center_mv), pre->stride) |
1739 | | << LOG2_PRECISION) + |
1740 | | lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); |
1741 | | start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); |
1742 | | start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); |
1743 | | end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); |
1744 | | end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); |
1745 | | for (r = start_row; r <= end_row; r += step) { |
1746 | | for (c = start_col; c <= end_col; c += step) { |
1747 | | const MV mv = { r, c }; |
1748 | | int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, |
1749 | | get_buf_from_mv(pre, &mv), pre->stride) |
1750 | | << LOG2_PRECISION; |
1751 | | if (sad < best_sad) { |
1752 | | sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); |
1753 | | if (sad < best_sad) { |
1754 | | best_sad = sad; |
1755 | | *best_mv = mv; |
1756 | | } |
1757 | | } |
1758 | | } |
1759 | | } |
1760 | | return best_sad; |
1761 | | } |
1762 | | |
1763 | | static int64_t exhaustive_mesh_search_single_step( |
1764 | | MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src, |
1765 | | const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs, |
1766 | | int full_mv_num, const MvLimits *mv_limits, |
1767 | | const vp9_variance_fn_ptr_t *fn_ptr) { |
1768 | | int64_t best_sad; |
1769 | | int r, c, i; |
1770 | | int start_col, end_col, start_row, end_row; |
1771 | | |
1772 | | *best_mv = *center_mv; |
1773 | | best_sad = |
1774 | | ((int64_t)fn_ptr->sdf(src->buf, src->stride, |
1775 | | get_buf_from_mv(pre, center_mv), pre->stride) |
1776 | | << LOG2_PRECISION) + |
1777 | | lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); |
1778 | | start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); |
1779 | | start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); |
1780 | | end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); |
1781 | | end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); |
1782 | | for (r = start_row; r <= end_row; r += 1) { |
1783 | | c = start_col; |
1784 | | while (c + 3 <= end_col) { |
1785 | | unsigned int sads[4]; |
1786 | | const uint8_t *addrs[4]; |
1787 | | for (i = 0; i < 4; ++i) { |
1788 | | const MV mv = { r, c + i }; |
1789 | | addrs[i] = get_buf_from_mv(pre, &mv); |
1790 | | } |
1791 | | fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads); |
1792 | | |
1793 | | for (i = 0; i < 4; ++i) { |
1794 | | int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; |
1795 | | if (sad < best_sad) { |
1796 | | const MV mv = { r, c + i }; |
1797 | | sad += |
1798 | | lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); |
1799 | | if (sad < best_sad) { |
1800 | | best_sad = sad; |
1801 | | *best_mv = mv; |
1802 | | } |
1803 | | } |
1804 | | } |
1805 | | c += 4; |
1806 | | } |
1807 | | while (c <= end_col) { |
1808 | | const MV mv = { r, c }; |
1809 | | int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, |
1810 | | get_buf_from_mv(pre, &mv), pre->stride) |
1811 | | << LOG2_PRECISION; |
1812 | | if (sad < best_sad) { |
1813 | | sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); |
1814 | | if (sad < best_sad) { |
1815 | | best_sad = sad; |
1816 | | *best_mv = mv; |
1817 | | } |
1818 | | } |
1819 | | c += 1; |
1820 | | } |
1821 | | } |
1822 | | return best_sad; |
1823 | | } |
1824 | | |
1825 | | static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, |
1826 | | int range, int step, |
1827 | | const vp9_variance_fn_ptr_t *fn_ptr, |
1828 | | const MV *center_mv, int lambda, |
1829 | | const int_mv *nb_full_mvs, |
1830 | | int full_mv_num) { |
1831 | | const MACROBLOCKD *const xd = &x->e_mbd; |
1832 | | const struct buf_2d *src = &x->plane[0].src; |
1833 | | const struct buf_2d *pre = &xd->plane[0].pre[0]; |
1834 | | assert(step >= 1); |
1835 | | assert(is_mv_in(&x->mv_limits, center_mv)); |
1836 | | if (step == 1) { |
1837 | | return exhaustive_mesh_search_single_step( |
1838 | | best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num, |
1839 | | &x->mv_limits, fn_ptr); |
1840 | | } |
1841 | | return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src, |
1842 | | pre, lambda, nb_full_mvs, |
1843 | | full_mv_num, &x->mv_limits, fn_ptr); |
1844 | | } |
1845 | | |
1846 | | static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, |
1847 | | MV *centre_mv_full, |
1848 | | const vp9_variance_fn_ptr_t *fn_ptr, |
1849 | | MV *dst_mv, int lambda, |
1850 | | const int_mv *nb_full_mvs, |
1851 | | int full_mv_num) { |
1852 | | const SPEED_FEATURES *const sf = &cpi->sf; |
1853 | | MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; |
1854 | | int64_t bestsme; |
1855 | | int i; |
1856 | | int interval = sf->mesh_patterns[0].interval; |
1857 | | int range = sf->mesh_patterns[0].range; |
1858 | | int baseline_interval_divisor; |
1859 | | |
1860 | | // Trap illegal values for interval and range for this function. |
1861 | | if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || |
1862 | | (interval > range)) { |
1863 | | printf("ERROR: invalid range\n"); |
1864 | | assert(0); |
1865 | | } |
1866 | | |
1867 | | baseline_interval_divisor = range / interval; |
1868 | | |
1869 | | // Check size of proposed first range against magnitude of the centre |
1870 | | // value used as a starting point. |
1871 | | range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); |
1872 | | range = VPXMIN(range, MAX_RANGE); |
1873 | | interval = VPXMAX(interval, range / baseline_interval_divisor); |
1874 | | |
1875 | | // initial search |
1876 | | bestsme = |
1877 | | exhaustive_mesh_search_new(x, &temp_mv, range, interval, fn_ptr, &temp_mv, |
1878 | | lambda, nb_full_mvs, full_mv_num); |
1879 | | |
1880 | | if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { |
1881 | | // Progressive searches with range and step size decreasing each time |
1882 | | // till we reach a step size of 1. Then break out. |
1883 | | for (i = 1; i < MAX_MESH_STEP; ++i) { |
1884 | | // First pass with coarser step and longer range |
1885 | | bestsme = exhaustive_mesh_search_new( |
1886 | | x, &temp_mv, sf->mesh_patterns[i].range, |
1887 | | sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs, |
1888 | | full_mv_num); |
1889 | | |
1890 | | if (sf->mesh_patterns[i].interval == 1) break; |
1891 | | } |
1892 | | } |
1893 | | |
1894 | | *dst_mv = temp_mv; |
1895 | | |
1896 | | return bestsme; |
1897 | | } |
1898 | | |
1899 | | static int64_t diamond_search_sad_new(const MACROBLOCK *x, |
1900 | | const search_site_config *cfg, |
1901 | | const MV *init_full_mv, MV *best_full_mv, |
1902 | | int search_param, int lambda, int *num00, |
1903 | | const vp9_variance_fn_ptr_t *fn_ptr, |
1904 | | const int_mv *nb_full_mvs, |
1905 | | int full_mv_num) { |
1906 | | int i, j, step; |
1907 | | |
1908 | | const MACROBLOCKD *const xd = &x->e_mbd; |
1909 | | uint8_t *what = x->plane[0].src.buf; |
1910 | | const int what_stride = x->plane[0].src.stride; |
1911 | | const uint8_t *in_what; |
1912 | | const int in_what_stride = xd->plane[0].pre[0].stride; |
1913 | | const uint8_t *best_address; |
1914 | | |
1915 | | int64_t bestsad; |
1916 | | int best_site = -1; |
1917 | | int last_site = -1; |
1918 | | |
1919 | | // search_param determines the length of the initial step and hence the number |
1920 | | // of iterations. |
1921 | | // 0 = initial step (MAX_FIRST_STEP) pel |
1922 | | // 1 = (MAX_FIRST_STEP/2) pel, |
1923 | | // 2 = (MAX_FIRST_STEP/4) pel... |
1924 | | // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; |
1925 | | const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; |
1926 | | const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; |
1927 | | const int tot_steps = cfg->total_steps - search_param; |
1928 | | vpx_clear_system_state(); |
1929 | | |
1930 | | *best_full_mv = *init_full_mv; |
1931 | | clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max, |
1932 | | x->mv_limits.row_min, x->mv_limits.row_max); |
1933 | | *num00 = 0; |
1934 | | |
1935 | | // Work out the start point for the search |
1936 | | in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride + |
1937 | | best_full_mv->col; |
1938 | | best_address = in_what; |
1939 | | |
1940 | | // Check the starting position |
1941 | | { |
1942 | | const int64_t mv_dist = |
1943 | | (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride) |
1944 | | << LOG2_PRECISION; |
1945 | | const int64_t mv_cost = |
1946 | | vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); |
1947 | | bestsad = mv_dist + lambda * mv_cost; |
1948 | | } |
1949 | | |
1950 | | i = 0; |
1951 | | |
1952 | | for (step = 0; step < tot_steps; step++) { |
1953 | | int all_in = 1, t; |
1954 | | |
1955 | | // All_in is true if every one of the points we are checking are within |
1956 | | // the bounds of the image. |
1957 | | all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min); |
1958 | | all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); |
1959 | | all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); |
1960 | | all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); |
1961 | | |
1962 | | // If all the pixels are within the bounds we don't check whether the |
1963 | | // search point is valid in this loop, otherwise we check each point |
1964 | | // for validity.. |
1965 | | if (all_in) { |
1966 | | unsigned int sad_array[4]; |
1967 | | |
1968 | | for (j = 0; j < cfg->searches_per_step; j += 4) { |
1969 | | unsigned char const *block_offset[4]; |
1970 | | |
1971 | | for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; |
1972 | | |
1973 | | fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
1974 | | sad_array); |
1975 | | |
1976 | | for (t = 0; t < 4; t++, i++) { |
1977 | | const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION; |
1978 | | if (mv_dist < bestsad) { |
1979 | | const MV this_mv = { best_full_mv->row + ss_mv[i].row, |
1980 | | best_full_mv->col + ss_mv[i].col }; |
1981 | | const int64_t mv_cost = |
1982 | | vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); |
1983 | | const int64_t thissad = mv_dist + lambda * mv_cost; |
1984 | | if (thissad < bestsad) { |
1985 | | bestsad = thissad; |
1986 | | best_site = i; |
1987 | | } |
1988 | | } |
1989 | | } |
1990 | | } |
1991 | | } else { |
1992 | | for (j = 0; j < cfg->searches_per_step; j++) { |
1993 | | // Trap illegal vectors |
1994 | | const MV this_mv = { best_full_mv->row + ss_mv[i].row, |
1995 | | best_full_mv->col + ss_mv[i].col }; |
1996 | | |
1997 | | if (is_mv_in(&x->mv_limits, &this_mv)) { |
1998 | | const uint8_t *const check_here = ss_os[i] + best_address; |
1999 | | const int64_t mv_dist = |
2000 | | (int64_t)fn_ptr->sdf(what, what_stride, check_here, |
2001 | | in_what_stride) |
2002 | | << LOG2_PRECISION; |
2003 | | if (mv_dist < bestsad) { |
2004 | | const int64_t mv_cost = |
2005 | | vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); |
2006 | | const int64_t thissad = mv_dist + lambda * mv_cost; |
2007 | | if (thissad < bestsad) { |
2008 | | bestsad = thissad; |
2009 | | best_site = i; |
2010 | | } |
2011 | | } |
2012 | | } |
2013 | | i++; |
2014 | | } |
2015 | | } |
2016 | | if (best_site != last_site) { |
2017 | | best_full_mv->row += ss_mv[best_site].row; |
2018 | | best_full_mv->col += ss_mv[best_site].col; |
2019 | | best_address += ss_os[best_site]; |
2020 | | last_site = best_site; |
2021 | | } else if (best_address == in_what) { |
2022 | | (*num00)++; |
2023 | | } |
2024 | | } |
2025 | | return bestsad; |
2026 | | } |
2027 | | |
2028 | | int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row, |
2029 | | int mi_col, int_mv *nb_full_mvs) { |
2030 | | const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; |
2031 | | const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; |
2032 | | const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; |
2033 | | int nb_full_mv_num = 0; |
2034 | | int i; |
2035 | | assert(mi_row % mi_height == 0); |
2036 | | assert(mi_col % mi_width == 0); |
2037 | | for (i = 0; i < NB_MVS_NUM; ++i) { |
2038 | | int r = dirs[i][0]; |
2039 | | int c = dirs[i][1]; |
2040 | | int brow = mi_row / mi_height + r; |
2041 | | int bcol = mi_col / mi_width + c; |
2042 | | if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 && |
2043 | | bcol < motion_field->block_cols) { |
2044 | | if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) { |
2045 | | int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol); |
2046 | | nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv); |
2047 | | ++nb_full_mv_num; |
2048 | | } |
2049 | | } |
2050 | | } |
2051 | | return nb_full_mv_num; |
2052 | | } |
2053 | | #endif // CONFIG_NON_GREEDY_MV |
2054 | | |
2055 | | int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg, |
2056 | | MV *ref_mv, uint32_t start_mv_sad, MV *best_mv, |
2057 | | int search_param, int sad_per_bit, int *num00, |
2058 | | const vp9_sad_fn_ptr_t *sad_fn_ptr, |
2059 | 64.6M | const MV *center_mv) { |
2060 | 64.6M | int i, j, step; |
2061 | | |
2062 | 64.6M | const MACROBLOCKD *const xd = &x->e_mbd; |
2063 | 64.6M | uint8_t *what = x->plane[0].src.buf; |
2064 | 64.6M | const int what_stride = x->plane[0].src.stride; |
2065 | 64.6M | const uint8_t *in_what; |
2066 | 64.6M | const int in_what_stride = xd->plane[0].pre[0].stride; |
2067 | 64.6M | const uint8_t *best_address; |
2068 | | |
2069 | 64.6M | unsigned int bestsad = start_mv_sad; |
2070 | 64.6M | int best_site = -1; |
2071 | 64.6M | int last_site = -1; |
2072 | | |
2073 | 64.6M | int ref_row; |
2074 | 64.6M | int ref_col; |
2075 | | |
2076 | | // search_param determines the length of the initial step and hence the number |
2077 | | // of iterations. |
2078 | | // 0 = initial step (MAX_FIRST_STEP) pel |
2079 | | // 1 = (MAX_FIRST_STEP/2) pel, |
2080 | | // 2 = (MAX_FIRST_STEP/4) pel... |
2081 | | // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; |
2082 | 64.6M | const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; |
2083 | 64.6M | const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; |
2084 | 64.6M | const int tot_steps = cfg->total_steps - search_param; |
2085 | | |
2086 | 64.6M | const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; |
2087 | 64.6M | ref_row = ref_mv->row; |
2088 | 64.6M | ref_col = ref_mv->col; |
2089 | 64.6M | *num00 = 0; |
2090 | 64.6M | best_mv->row = ref_row; |
2091 | 64.6M | best_mv->col = ref_col; |
2092 | | |
2093 | | // Work out the start point for the search |
2094 | 64.6M | in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; |
2095 | 64.6M | best_address = in_what; |
2096 | | |
2097 | 64.6M | i = 0; |
2098 | | |
2099 | 241M | for (step = 0; step < tot_steps; step++) { |
2100 | 176M | int all_in = 1, t; |
2101 | | |
2102 | | // All_in is true if every one of the points we are checking are within |
2103 | | // the bounds of the image. |
2104 | 176M | all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min); |
2105 | 176M | all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); |
2106 | 176M | all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); |
2107 | 176M | all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); |
2108 | | |
2109 | | // If all the pixels are within the bounds we don't check whether the |
2110 | | // search point is valid in this loop, otherwise we check each point |
2111 | | // for validity.. |
2112 | 176M | if (all_in) { |
2113 | 141M | unsigned int sad_array[4]; |
2114 | | |
2115 | 423M | for (j = 0; j < cfg->searches_per_step; j += 4) { |
2116 | 282M | unsigned char const *block_offset[4]; |
2117 | | |
2118 | 1.41G | for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; |
2119 | | |
2120 | 282M | sad_fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
2121 | 282M | sad_array); |
2122 | | |
2123 | 1.41G | for (t = 0; t < 4; t++, i++) { |
2124 | 1.12G | if (sad_array[t] < bestsad) { |
2125 | 407M | const MV this_mv = { best_mv->row + ss_mv[i].row, |
2126 | 407M | best_mv->col + ss_mv[i].col }; |
2127 | 407M | sad_array[t] += |
2128 | 407M | mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); |
2129 | 407M | if (sad_array[t] < bestsad) { |
2130 | 123M | bestsad = sad_array[t]; |
2131 | 123M | best_site = i; |
2132 | 123M | } |
2133 | 407M | } |
2134 | 1.12G | } |
2135 | 282M | } |
2136 | 141M | } else { |
2137 | 320M | for (j = 0; j < cfg->searches_per_step; j++) { |
2138 | | // Trap illegal vectors |
2139 | 284M | const MV this_mv = { best_mv->row + ss_mv[i].row, |
2140 | 284M | best_mv->col + ss_mv[i].col }; |
2141 | | |
2142 | 284M | if (is_mv_in(&x->mv_limits, &this_mv)) { |
2143 | 148M | const uint8_t *const check_here = ss_os[i] + best_address; |
2144 | 148M | unsigned int thissad = |
2145 | 148M | sad_fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
2146 | | |
2147 | 148M | if (thissad < bestsad) { |
2148 | 70.1M | thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); |
2149 | 70.1M | if (thissad < bestsad) { |
2150 | 17.4M | bestsad = thissad; |
2151 | 17.4M | best_site = i; |
2152 | 17.4M | } |
2153 | 70.1M | } |
2154 | 148M | } |
2155 | 284M | i++; |
2156 | 284M | } |
2157 | 35.6M | } |
2158 | 176M | if (best_site != last_site) { |
2159 | 91.7M | best_mv->row += ss_mv[best_site].row; |
2160 | 91.7M | best_mv->col += ss_mv[best_site].col; |
2161 | 91.7M | best_address += ss_os[best_site]; |
2162 | 91.7M | last_site = best_site; |
2163 | | #if defined(NEW_DIAMOND_SEARCH) |
2164 | | while (1) { |
2165 | | const MV this_mv = { best_mv->row + ss_mv[best_site].row, |
2166 | | best_mv->col + ss_mv[best_site].col }; |
2167 | | if (is_mv_in(&x->mv_limits, &this_mv)) { |
2168 | | const uint8_t *const check_here = ss_os[best_site] + best_address; |
2169 | | unsigned int thissad = |
2170 | | fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
2171 | | if (thissad < bestsad) { |
2172 | | thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); |
2173 | | if (thissad < bestsad) { |
2174 | | bestsad = thissad; |
2175 | | best_mv->row += ss_mv[best_site].row; |
2176 | | best_mv->col += ss_mv[best_site].col; |
2177 | | best_address += ss_os[best_site]; |
2178 | | continue; |
2179 | | } |
2180 | | } |
2181 | | } |
2182 | | break; |
2183 | | } |
2184 | | #endif |
2185 | 91.7M | } else if (best_address == in_what) { |
2186 | 50.1M | (*num00)++; |
2187 | 50.1M | } |
2188 | 176M | } |
2189 | 64.6M | return bestsad; |
2190 | 64.6M | } |
2191 | | |
2192 | 0 | static int vector_match(int16_t *ref, int16_t *src, int bwl) { |
2193 | 0 | int best_sad = INT_MAX; |
2194 | 0 | int this_sad; |
2195 | 0 | int d; |
2196 | 0 | int center, offset = 0; |
2197 | 0 | int bw = 4 << bwl; // redundant variable, to be changed in the experiments. |
2198 | 0 | for (d = 0; d <= bw; d += 16) { |
2199 | 0 | this_sad = vpx_vector_var(&ref[d], src, bwl); |
2200 | 0 | if (this_sad < best_sad) { |
2201 | 0 | best_sad = this_sad; |
2202 | 0 | offset = d; |
2203 | 0 | } |
2204 | 0 | } |
2205 | 0 | center = offset; |
2206 | |
|
2207 | 0 | for (d = -8; d <= 8; d += 16) { |
2208 | 0 | int this_pos = offset + d; |
2209 | | // check limit |
2210 | 0 | if (this_pos < 0 || this_pos > bw) continue; |
2211 | 0 | this_sad = vpx_vector_var(&ref[this_pos], src, bwl); |
2212 | 0 | if (this_sad < best_sad) { |
2213 | 0 | best_sad = this_sad; |
2214 | 0 | center = this_pos; |
2215 | 0 | } |
2216 | 0 | } |
2217 | 0 | offset = center; |
2218 | |
|
2219 | 0 | for (d = -4; d <= 4; d += 8) { |
2220 | 0 | int this_pos = offset + d; |
2221 | | // check limit |
2222 | 0 | if (this_pos < 0 || this_pos > bw) continue; |
2223 | 0 | this_sad = vpx_vector_var(&ref[this_pos], src, bwl); |
2224 | 0 | if (this_sad < best_sad) { |
2225 | 0 | best_sad = this_sad; |
2226 | 0 | center = this_pos; |
2227 | 0 | } |
2228 | 0 | } |
2229 | 0 | offset = center; |
2230 | |
|
2231 | 0 | for (d = -2; d <= 2; d += 4) { |
2232 | 0 | int this_pos = offset + d; |
2233 | | // check limit |
2234 | 0 | if (this_pos < 0 || this_pos > bw) continue; |
2235 | 0 | this_sad = vpx_vector_var(&ref[this_pos], src, bwl); |
2236 | 0 | if (this_sad < best_sad) { |
2237 | 0 | best_sad = this_sad; |
2238 | 0 | center = this_pos; |
2239 | 0 | } |
2240 | 0 | } |
2241 | 0 | offset = center; |
2242 | |
|
2243 | 0 | for (d = -1; d <= 1; d += 2) { |
2244 | 0 | int this_pos = offset + d; |
2245 | | // check limit |
2246 | 0 | if (this_pos < 0 || this_pos > bw) continue; |
2247 | 0 | this_sad = vpx_vector_var(&ref[this_pos], src, bwl); |
2248 | 0 | if (this_sad < best_sad) { |
2249 | 0 | best_sad = this_sad; |
2250 | 0 | center = this_pos; |
2251 | 0 | } |
2252 | 0 | } |
2253 | |
|
2254 | 0 | return (center - (bw >> 1)); |
2255 | 0 | } |
2256 | | |
2257 | | static const MV search_pos[4] = { |
2258 | | { -1, 0 }, |
2259 | | { 0, -1 }, |
2260 | | { 0, 1 }, |
2261 | | { 1, 0 }, |
2262 | | }; |
2263 | | |
2264 | | unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, |
2265 | | BLOCK_SIZE bsize, int mi_row, |
2266 | 0 | int mi_col, const MV *ref_mv) { |
2267 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
2268 | 0 | MODE_INFO *mi = xd->mi[0]; |
2269 | 0 | struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; |
2270 | 0 | DECLARE_ALIGNED(16, int16_t, hbuf[128]); |
2271 | 0 | DECLARE_ALIGNED(16, int16_t, vbuf[128]); |
2272 | 0 | DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); |
2273 | 0 | DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); |
2274 | 0 | int idx; |
2275 | 0 | const int bw = 4 << b_width_log2_lookup[bsize]; |
2276 | 0 | const int bh = 4 << b_height_log2_lookup[bsize]; |
2277 | 0 | const int search_width = bw << 1; |
2278 | 0 | const int search_height = bh << 1; |
2279 | 0 | const int src_stride = x->plane[0].src.stride; |
2280 | 0 | const int ref_stride = xd->plane[0].pre[0].stride; |
2281 | 0 | uint8_t const *ref_buf, *src_buf; |
2282 | 0 | MV *tmp_mv = &xd->mi[0]->mv[0].as_mv; |
2283 | 0 | unsigned int best_sad, tmp_sad, this_sad[4]; |
2284 | 0 | MV this_mv; |
2285 | 0 | const int norm_factor = 3 + (bw >> 5); |
2286 | 0 | const YV12_BUFFER_CONFIG *scaled_ref_frame = |
2287 | 0 | vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]); |
2288 | 0 | MvLimits subpel_mv_limits; |
2289 | |
|
2290 | 0 | if (scaled_ref_frame) { |
2291 | 0 | int i; |
2292 | | // Swap out the reference frame for a version that's been scaled to |
2293 | | // match the resolution of the current frame, allowing the existing |
2294 | | // motion search code to be used without additional modifications. |
2295 | 0 | for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; |
2296 | 0 | vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); |
2297 | 0 | } |
2298 | |
|
2299 | 0 | #if CONFIG_VP9_HIGHBITDEPTH |
2300 | | // TODO(jingning): Implement integral projection functions for high bit-depth |
2301 | | // setting and remove this part of code. |
2302 | 0 | if (xd->bd != 8) { |
2303 | 0 | const unsigned int sad = cpi->fn_ptr[bsize].sdf( |
2304 | 0 | x->plane[0].src.buf, src_stride, xd->plane[0].pre[0].buf, ref_stride); |
2305 | 0 | tmp_mv->row = 0; |
2306 | 0 | tmp_mv->col = 0; |
2307 | |
|
2308 | 0 | if (scaled_ref_frame) { |
2309 | 0 | int i; |
2310 | 0 | for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; |
2311 | 0 | } |
2312 | 0 | return sad; |
2313 | 0 | } |
2314 | 0 | #endif |
2315 | | |
2316 | | // Set up prediction 1-D reference set |
2317 | 0 | ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); |
2318 | 0 | for (idx = 0; idx < search_width; idx += 16) { |
2319 | 0 | vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); |
2320 | 0 | ref_buf += 16; |
2321 | 0 | } |
2322 | |
|
2323 | 0 | ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; |
2324 | 0 | for (idx = 0; idx < search_height; ++idx) { |
2325 | 0 | vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor; |
2326 | 0 | ref_buf += ref_stride; |
2327 | 0 | } |
2328 | | |
2329 | | // Set up src 1-D reference set |
2330 | 0 | for (idx = 0; idx < bw; idx += 16) { |
2331 | 0 | src_buf = x->plane[0].src.buf + idx; |
2332 | 0 | vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); |
2333 | 0 | } |
2334 | |
|
2335 | 0 | src_buf = x->plane[0].src.buf; |
2336 | 0 | for (idx = 0; idx < bh; ++idx) { |
2337 | 0 | src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor; |
2338 | 0 | src_buf += src_stride; |
2339 | 0 | } |
2340 | | |
2341 | | // Find the best match per 1-D search |
2342 | 0 | tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]); |
2343 | 0 | tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]); |
2344 | |
|
2345 | 0 | this_mv = *tmp_mv; |
2346 | 0 | src_buf = x->plane[0].src.buf; |
2347 | 0 | ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; |
2348 | 0 | best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); |
2349 | |
|
2350 | 0 | { |
2351 | 0 | const uint8_t *const pos[4] = { |
2352 | 0 | ref_buf - ref_stride, |
2353 | 0 | ref_buf - 1, |
2354 | 0 | ref_buf + 1, |
2355 | 0 | ref_buf + ref_stride, |
2356 | 0 | }; |
2357 | |
|
2358 | 0 | cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); |
2359 | 0 | } |
2360 | |
|
2361 | 0 | for (idx = 0; idx < 4; ++idx) { |
2362 | 0 | if (this_sad[idx] < best_sad) { |
2363 | 0 | best_sad = this_sad[idx]; |
2364 | 0 | tmp_mv->row = search_pos[idx].row + this_mv.row; |
2365 | 0 | tmp_mv->col = search_pos[idx].col + this_mv.col; |
2366 | 0 | } |
2367 | 0 | } |
2368 | |
|
2369 | 0 | if (this_sad[0] < this_sad[3]) |
2370 | 0 | this_mv.row -= 1; |
2371 | 0 | else |
2372 | 0 | this_mv.row += 1; |
2373 | |
|
2374 | 0 | if (this_sad[1] < this_sad[2]) |
2375 | 0 | this_mv.col -= 1; |
2376 | 0 | else |
2377 | 0 | this_mv.col += 1; |
2378 | |
|
2379 | 0 | ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; |
2380 | |
|
2381 | 0 | tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); |
2382 | 0 | if (best_sad > tmp_sad) { |
2383 | 0 | *tmp_mv = this_mv; |
2384 | 0 | best_sad = tmp_sad; |
2385 | 0 | } |
2386 | |
|
2387 | 0 | tmp_mv->row *= 8; |
2388 | 0 | tmp_mv->col *= 8; |
2389 | |
|
2390 | 0 | vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); |
2391 | 0 | clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max, |
2392 | 0 | subpel_mv_limits.row_min, subpel_mv_limits.row_max); |
2393 | |
|
2394 | 0 | if (scaled_ref_frame) { |
2395 | 0 | int i; |
2396 | 0 | for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; |
2397 | 0 | } |
2398 | |
|
2399 | 0 | return best_sad; |
2400 | 0 | } |
2401 | | |
2402 | | static int get_exhaustive_threshold(int exhaustive_searches_thresh, |
2403 | 0 | BLOCK_SIZE bsize) { |
2404 | 0 | return exhaustive_searches_thresh >> |
2405 | 0 | (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize])); |
2406 | 0 | } |
2407 | | |
2408 | | #if CONFIG_NON_GREEDY_MV |
2409 | | // Runs sequence of diamond searches in smaller steps for RD. |
2410 | | /* do_refine: If last step (1-away) of n-step search doesn't pick the center |
2411 | | point as the best match, we will do a final 1-away diamond |
2412 | | refining search */ |
2413 | | int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, |
2414 | | BLOCK_SIZE bsize, MV *mvp_full, int step_param, |
2415 | | int lambda, int do_refine, |
2416 | | const int_mv *nb_full_mvs, int full_mv_num, |
2417 | | MV *best_mv) { |
2418 | | const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; |
2419 | | const SPEED_FEATURES *const sf = &cpi->sf; |
2420 | | int n, num00 = 0; |
2421 | | int thissme; |
2422 | | int bestsme; |
2423 | | const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; |
2424 | | const MV center_mv = { 0, 0 }; |
2425 | | vpx_clear_system_state(); |
2426 | | diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda, |
2427 | | &n, fn_ptr, nb_full_mvs, full_mv_num); |
2428 | | |
2429 | | bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); |
2430 | | |
2431 | | // If there won't be more n-step search, check to see if refining search is |
2432 | | // needed. |
2433 | | if (n > further_steps) do_refine = 0; |
2434 | | |
2435 | | while (n < further_steps) { |
2436 | | ++n; |
2437 | | if (num00) { |
2438 | | num00--; |
2439 | | } else { |
2440 | | MV temp_mv; |
2441 | | diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv, |
2442 | | step_param + n, lambda, &num00, fn_ptr, |
2443 | | nb_full_mvs, full_mv_num); |
2444 | | thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); |
2445 | | // check to see if refining search is needed. |
2446 | | if (num00 > further_steps - n) do_refine = 0; |
2447 | | |
2448 | | if (thissme < bestsme) { |
2449 | | bestsme = thissme; |
2450 | | *best_mv = temp_mv; |
2451 | | } |
2452 | | } |
2453 | | } |
2454 | | |
2455 | | // final 1-away diamond refining search |
2456 | | if (do_refine) { |
2457 | | const int search_range = 8; |
2458 | | MV temp_mv = *best_mv; |
2459 | | vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr, |
2460 | | nb_full_mvs, full_mv_num); |
2461 | | thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); |
2462 | | if (thissme < bestsme) { |
2463 | | bestsme = thissme; |
2464 | | *best_mv = temp_mv; |
2465 | | } |
2466 | | } |
2467 | | |
2468 | | if (sf->exhaustive_searches_thresh < INT_MAX && |
2469 | | !cpi->rc.is_src_frame_alt_ref) { |
2470 | | const int64_t exhaustive_thr = |
2471 | | get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); |
2472 | | if (bestsme > exhaustive_thr) { |
2473 | | full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda, |
2474 | | nb_full_mvs, full_mv_num); |
2475 | | bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); |
2476 | | } |
2477 | | } |
2478 | | return bestsme; |
2479 | | } |
2480 | | #endif // CONFIG_NON_GREEDY_MV |
2481 | | |
2482 | | // Runs sequence of diamond searches in smaller steps for RD. |
2483 | | /* do_refine: If last step (1-away) of n-step search doesn't pick the center |
2484 | | point as the best match, we will do a final 1-away diamond |
2485 | | refining search */ |
2486 | | static int full_pixel_diamond(const VP9_COMP *const cpi, |
2487 | | const MACROBLOCK *const x, BLOCK_SIZE bsize, |
2488 | | MV *mvp_full, int step_param, int sadpb, |
2489 | | int further_steps, int do_refine, |
2490 | | int use_downsampled_sad, int *cost_list, |
2491 | | const vp9_variance_fn_ptr_t *fn_ptr, |
2492 | 27.1M | const MV *ref_mv, MV *dst_mv) { |
2493 | 27.1M | MV temp_mv; |
2494 | 27.1M | int thissme, n, num00 = 0; |
2495 | 27.1M | int bestsme; |
2496 | 27.1M | const int src_buf_stride = x->plane[0].src.stride; |
2497 | 27.1M | const uint8_t *const src_buf = x->plane[0].src.buf; |
2498 | 27.1M | const MACROBLOCKD *const xd = &x->e_mbd; |
2499 | 27.1M | const int pred_buf_stride = xd->plane[0].pre[0].stride; |
2500 | 27.1M | uint8_t *pred_buf; |
2501 | 27.1M | vp9_sad_fn_ptr_t sad_fn_ptr; |
2502 | 27.1M | unsigned int start_mv_sad, start_mv_sad_even_rows, start_mv_sad_odd_rows; |
2503 | 27.1M | const MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 }; |
2504 | 27.1M | clamp_mv(mvp_full, x->mv_limits.col_min, x->mv_limits.col_max, |
2505 | 27.1M | x->mv_limits.row_min, x->mv_limits.row_max); |
2506 | | |
2507 | 27.1M | pred_buf = |
2508 | 27.1M | xd->plane[0].pre[0].buf + mvp_full->row * pred_buf_stride + mvp_full->col; |
2509 | 27.1M | start_mv_sad_even_rows = |
2510 | 27.1M | fn_ptr->sdsf(src_buf, src_buf_stride, pred_buf, pred_buf_stride); |
2511 | 27.1M | start_mv_sad_odd_rows = |
2512 | 27.1M | fn_ptr->sdsf(src_buf + src_buf_stride, src_buf_stride, |
2513 | 27.1M | pred_buf + pred_buf_stride, pred_buf_stride); |
2514 | 27.1M | start_mv_sad = (start_mv_sad_even_rows + start_mv_sad_odd_rows) >> 1; |
2515 | 27.1M | start_mv_sad += mvsad_err_cost(x, mvp_full, &ref_mv_full, sadpb); |
2516 | | |
2517 | 27.1M | sad_fn_ptr.sdf = fn_ptr->sdf; |
2518 | 27.1M | sad_fn_ptr.sdx4df = fn_ptr->sdx4df; |
2519 | 27.1M | if (use_downsampled_sad && num_4x4_blocks_high_lookup[bsize] >= 2) { |
2520 | | // If the absolute difference between the pred-to-src SAD of even rows and |
2521 | | // the pred-to-src SAD of odd rows is small, skip every other row in sad |
2522 | | // computation. |
2523 | 7.32M | const int odd_to_even_diff_sad = |
2524 | 7.32M | abs((int)start_mv_sad_even_rows - (int)start_mv_sad_odd_rows); |
2525 | 7.32M | const int mult_thresh = 10; |
2526 | 7.32M | if (odd_to_even_diff_sad * mult_thresh < (int)start_mv_sad_even_rows) { |
2527 | 2.00M | sad_fn_ptr.sdf = fn_ptr->sdsf; |
2528 | 2.00M | sad_fn_ptr.sdx4df = fn_ptr->sdsx4df; |
2529 | 2.00M | } |
2530 | 7.32M | } |
2531 | | |
2532 | 27.1M | bestsme = |
2533 | 27.1M | cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, start_mv_sad, &temp_mv, |
2534 | 27.1M | step_param, sadpb, &n, &sad_fn_ptr, ref_mv); |
2535 | 27.1M | if (bestsme < INT_MAX) |
2536 | 27.1M | bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); |
2537 | 27.1M | *dst_mv = temp_mv; |
2538 | | |
2539 | | // If there won't be more n-step search, check to see if refining search is |
2540 | | // needed. |
2541 | 27.1M | if (n > further_steps) do_refine = 0; |
2542 | | |
2543 | 68.9M | while (n < further_steps) { |
2544 | 41.7M | ++n; |
2545 | | |
2546 | 41.7M | if (num00) { |
2547 | 4.30M | num00--; |
2548 | 37.4M | } else { |
2549 | 37.4M | thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, start_mv_sad, |
2550 | 37.4M | &temp_mv, step_param + n, sadpb, &num00, |
2551 | 37.4M | &sad_fn_ptr, ref_mv); |
2552 | 37.4M | if (thissme < INT_MAX) |
2553 | 37.4M | thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); |
2554 | | |
2555 | | // check to see if refining search is needed. |
2556 | 37.4M | if (num00 > further_steps - n) do_refine = 0; |
2557 | | |
2558 | 37.4M | if (thissme < bestsme) { |
2559 | 8.27M | bestsme = thissme; |
2560 | 8.27M | *dst_mv = temp_mv; |
2561 | 8.27M | } |
2562 | 37.4M | } |
2563 | 41.7M | } |
2564 | | |
2565 | | // final 1-away diamond refining search |
2566 | 27.1M | if (do_refine) { |
2567 | 15.1M | const int search_range = 8; |
2568 | 15.1M | MV best_mv = *dst_mv; |
2569 | 15.1M | thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, |
2570 | 15.1M | &sad_fn_ptr, ref_mv); |
2571 | 15.1M | if (thissme < INT_MAX) |
2572 | 15.1M | thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); |
2573 | 15.1M | if (thissme < bestsme) { |
2574 | 1.01M | bestsme = thissme; |
2575 | 1.01M | *dst_mv = best_mv; |
2576 | 1.01M | } |
2577 | 15.1M | } |
2578 | | |
2579 | 27.1M | if (sad_fn_ptr.sdf != fn_ptr->sdf) { |
2580 | | // If we are skipping rows when we perform the motion search, we need to |
2581 | | // check the quality of skipping. If it's bad, then we run search with |
2582 | | // skip row features off. |
2583 | 2.00M | const uint8_t *best_address = get_buf_from_mv(&xd->plane[0].pre[0], dst_mv); |
2584 | 2.00M | const int sad = |
2585 | 2.00M | fn_ptr->sdf(src_buf, src_buf_stride, best_address, pred_buf_stride); |
2586 | 2.00M | const int skip_sad = |
2587 | 2.00M | fn_ptr->sdsf(src_buf, src_buf_stride, best_address, pred_buf_stride); |
2588 | | // We will keep the result of skipping rows if it's good enough. |
2589 | 2.00M | const int kSADThresh = |
2590 | 2.00M | 1 << (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); |
2591 | 2.00M | if (sad > kSADThresh && abs(skip_sad - sad) * 10 >= VPXMAX(sad, 1) * 9) { |
2592 | | // There is a large discrepancy between skipping and not skipping, so we |
2593 | | // need to redo the motion search. |
2594 | 11.5k | return full_pixel_diamond(cpi, x, bsize, mvp_full, step_param, sadpb, |
2595 | 11.5k | further_steps, do_refine, 0, cost_list, fn_ptr, |
2596 | 11.5k | ref_mv, dst_mv); |
2597 | 11.5k | } |
2598 | 2.00M | } |
2599 | | |
2600 | | // Return cost list. |
2601 | 27.1M | if (cost_list) { |
2602 | 0 | calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); |
2603 | 0 | } |
2604 | 27.1M | return bestsme; |
2605 | 27.1M | } |
2606 | | |
2607 | | // Runs an limited range exhaustive mesh search using a pattern set |
2608 | | // according to the encode speed profile. |
2609 | | static int full_pixel_exhaustive(const VP9_COMP *const cpi, |
2610 | | const MACROBLOCK *const x, MV *centre_mv_full, |
2611 | | int sadpb, int *cost_list, |
2612 | | const vp9_variance_fn_ptr_t *fn_ptr, |
2613 | 0 | const MV *ref_mv, MV *dst_mv) { |
2614 | 0 | const SPEED_FEATURES *const sf = &cpi->sf; |
2615 | 0 | MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; |
2616 | 0 | MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; |
2617 | 0 | int bestsme; |
2618 | 0 | int i; |
2619 | 0 | int interval = sf->mesh_patterns[0].interval; |
2620 | 0 | int range = sf->mesh_patterns[0].range; |
2621 | 0 | int baseline_interval_divisor; |
2622 | | |
2623 | | // Trap illegal values for interval and range for this function. |
2624 | 0 | if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || |
2625 | 0 | (interval > range)) |
2626 | 0 | return INT_MAX; |
2627 | | |
2628 | 0 | baseline_interval_divisor = range / interval; |
2629 | | |
2630 | | // Check size of proposed first range against magnitude of the centre |
2631 | | // value used as a starting point. |
2632 | 0 | range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); |
2633 | 0 | range = VPXMIN(range, MAX_RANGE); |
2634 | 0 | interval = VPXMAX(interval, range / baseline_interval_divisor); |
2635 | | |
2636 | | // initial search |
2637 | 0 | bestsme = exhaustive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval, |
2638 | 0 | sadpb, fn_ptr, &temp_mv); |
2639 | |
|
2640 | 0 | if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { |
2641 | | // Progressive searches with range and step size decreasing each time |
2642 | | // till we reach a step size of 1. Then break out. |
2643 | 0 | for (i = 1; i < MAX_MESH_STEP; ++i) { |
2644 | | // First pass with coarser step and longer range |
2645 | 0 | bestsme = exhaustive_mesh_search( |
2646 | 0 | x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range, |
2647 | 0 | sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv); |
2648 | |
|
2649 | 0 | if (sf->mesh_patterns[i].interval == 1) break; |
2650 | 0 | } |
2651 | 0 | } |
2652 | |
|
2653 | 0 | if (bestsme < INT_MAX) |
2654 | 0 | bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); |
2655 | 0 | *dst_mv = temp_mv; |
2656 | | |
2657 | | // Return cost list. |
2658 | 0 | if (cost_list) { |
2659 | 0 | calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); |
2660 | 0 | } |
2661 | 0 | return bestsme; |
2662 | 0 | } |
2663 | | |
2664 | | #if CONFIG_NON_GREEDY_MV |
2665 | | int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, |
2666 | | int lambda, int search_range, |
2667 | | const vp9_variance_fn_ptr_t *fn_ptr, |
2668 | | const int_mv *nb_full_mvs, |
2669 | | int full_mv_num) { |
2670 | | const MACROBLOCKD *const xd = &x->e_mbd; |
2671 | | const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; |
2672 | | const struct buf_2d *const what = &x->plane[0].src; |
2673 | | const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
2674 | | const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv); |
2675 | | int64_t best_sad; |
2676 | | int i, j; |
2677 | | vpx_clear_system_state(); |
2678 | | { |
2679 | | const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride, |
2680 | | best_address, in_what->stride) |
2681 | | << LOG2_PRECISION; |
2682 | | const int64_t mv_cost = |
2683 | | vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); |
2684 | | best_sad = mv_dist + lambda * mv_cost; |
2685 | | } |
2686 | | |
2687 | | for (i = 0; i < search_range; i++) { |
2688 | | int best_site = -1; |
2689 | | const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) & |
2690 | | ((best_full_mv->row + 1) < x->mv_limits.row_max) & |
2691 | | ((best_full_mv->col - 1) > x->mv_limits.col_min) & |
2692 | | ((best_full_mv->col + 1) < x->mv_limits.col_max); |
2693 | | |
2694 | | if (all_in) { |
2695 | | unsigned int sads[4]; |
2696 | | const uint8_t *const positions[4] = { best_address - in_what->stride, |
2697 | | best_address - 1, best_address + 1, |
2698 | | best_address + in_what->stride }; |
2699 | | |
2700 | | fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); |
2701 | | |
2702 | | for (j = 0; j < 4; ++j) { |
2703 | | const MV mv = { best_full_mv->row + neighbors[j].row, |
2704 | | best_full_mv->col + neighbors[j].col }; |
2705 | | const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION; |
2706 | | const int64_t mv_cost = |
2707 | | vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); |
2708 | | const int64_t thissad = mv_dist + lambda * mv_cost; |
2709 | | if (thissad < best_sad) { |
2710 | | best_sad = thissad; |
2711 | | best_site = j; |
2712 | | } |
2713 | | } |
2714 | | } else { |
2715 | | for (j = 0; j < 4; ++j) { |
2716 | | const MV mv = { best_full_mv->row + neighbors[j].row, |
2717 | | best_full_mv->col + neighbors[j].col }; |
2718 | | |
2719 | | if (is_mv_in(&x->mv_limits, &mv)) { |
2720 | | const int64_t mv_dist = |
2721 | | (int64_t)fn_ptr->sdf(what->buf, what->stride, |
2722 | | get_buf_from_mv(in_what, &mv), |
2723 | | in_what->stride) |
2724 | | << LOG2_PRECISION; |
2725 | | const int64_t mv_cost = |
2726 | | vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); |
2727 | | const int64_t thissad = mv_dist + lambda * mv_cost; |
2728 | | if (thissad < best_sad) { |
2729 | | best_sad = thissad; |
2730 | | best_site = j; |
2731 | | } |
2732 | | } |
2733 | | } |
2734 | | } |
2735 | | |
2736 | | if (best_site == -1) { |
2737 | | break; |
2738 | | } else { |
2739 | | best_full_mv->row += neighbors[best_site].row; |
2740 | | best_full_mv->col += neighbors[best_site].col; |
2741 | | best_address = get_buf_from_mv(in_what, best_full_mv); |
2742 | | } |
2743 | | } |
2744 | | |
2745 | | return best_sad; |
2746 | | } |
2747 | | #endif // CONFIG_NON_GREEDY_MV |
2748 | | |
2749 | | int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, |
2750 | | int search_range, |
2751 | | const vp9_sad_fn_ptr_t *sad_fn_ptr, |
2752 | 15.1M | const MV *center_mv) { |
2753 | 15.1M | const MACROBLOCKD *const xd = &x->e_mbd; |
2754 | 15.1M | const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; |
2755 | 15.1M | const struct buf_2d *const what = &x->plane[0].src; |
2756 | 15.1M | const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
2757 | 15.1M | const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; |
2758 | 15.1M | const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); |
2759 | 15.1M | unsigned int best_sad = |
2760 | 15.1M | sad_fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) + |
2761 | 15.1M | mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); |
2762 | 15.1M | int i, j; |
2763 | | |
2764 | 19.5M | for (i = 0; i < search_range; i++) { |
2765 | 19.4M | int best_site = -1; |
2766 | 19.4M | const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) & |
2767 | 19.4M | ((ref_mv->row + 1) < x->mv_limits.row_max) & |
2768 | 19.4M | ((ref_mv->col - 1) > x->mv_limits.col_min) & |
2769 | 19.4M | ((ref_mv->col + 1) < x->mv_limits.col_max); |
2770 | | |
2771 | 19.4M | if (all_in) { |
2772 | 18.4M | unsigned int sads[4]; |
2773 | 18.4M | const uint8_t *const positions[4] = { best_address - in_what->stride, |
2774 | 18.4M | best_address - 1, best_address + 1, |
2775 | 18.4M | best_address + in_what->stride }; |
2776 | | |
2777 | 18.4M | sad_fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, |
2778 | 18.4M | sads); |
2779 | | |
2780 | 92.3M | for (j = 0; j < 4; ++j) { |
2781 | 73.8M | if (sads[j] < best_sad) { |
2782 | 31.7M | const MV mv = { ref_mv->row + neighbors[j].row, |
2783 | 31.7M | ref_mv->col + neighbors[j].col }; |
2784 | 31.7M | sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); |
2785 | 31.7M | if (sads[j] < best_sad) { |
2786 | 4.36M | best_sad = sads[j]; |
2787 | 4.36M | best_site = j; |
2788 | 4.36M | } |
2789 | 31.7M | } |
2790 | 73.8M | } |
2791 | 18.4M | } else { |
2792 | 4.97M | for (j = 0; j < 4; ++j) { |
2793 | 3.98M | const MV mv = { ref_mv->row + neighbors[j].row, |
2794 | 3.98M | ref_mv->col + neighbors[j].col }; |
2795 | | |
2796 | 3.98M | if (is_mv_in(&x->mv_limits, &mv)) { |
2797 | 3.14M | unsigned int sad = |
2798 | 3.14M | sad_fn_ptr->sdf(what->buf, what->stride, |
2799 | 3.14M | get_buf_from_mv(in_what, &mv), in_what->stride); |
2800 | 3.14M | if (sad < best_sad) { |
2801 | 2.38M | sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); |
2802 | 2.38M | if (sad < best_sad) { |
2803 | 278k | best_sad = sad; |
2804 | 278k | best_site = j; |
2805 | 278k | } |
2806 | 2.38M | } |
2807 | 3.14M | } |
2808 | 3.98M | } |
2809 | 995k | } |
2810 | | |
2811 | 19.4M | if (best_site == -1) { |
2812 | 15.0M | break; |
2813 | 15.0M | } else { |
2814 | 4.43M | ref_mv->row += neighbors[best_site].row; |
2815 | 4.43M | ref_mv->col += neighbors[best_site].col; |
2816 | 4.43M | best_address = get_buf_from_mv(in_what, ref_mv); |
2817 | 4.43M | } |
2818 | 19.4M | } |
2819 | | |
2820 | 15.1M | return best_sad; |
2821 | 15.1M | } |
2822 | | |
2823 | | // This function is called when we do joint motion search in comp_inter_inter |
2824 | | // mode. |
2825 | | int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, |
2826 | | int search_range, |
2827 | | const vp9_variance_fn_ptr_t *fn_ptr, |
2828 | 0 | const MV *center_mv, const uint8_t *second_pred) { |
2829 | 0 | const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, |
2830 | 0 | { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; |
2831 | 0 | const MACROBLOCKD *const xd = &x->e_mbd; |
2832 | 0 | const struct buf_2d *const what = &x->plane[0].src; |
2833 | 0 | const struct buf_2d *const in_what = &xd->plane[0].pre[0]; |
2834 | 0 | const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; |
2835 | 0 | unsigned int best_sad = INT_MAX; |
2836 | 0 | int i, j; |
2837 | 0 | clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, |
2838 | 0 | x->mv_limits.row_min, x->mv_limits.row_max); |
2839 | 0 | best_sad = |
2840 | 0 | fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), |
2841 | 0 | in_what->stride, second_pred) + |
2842 | 0 | mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); |
2843 | |
|
2844 | 0 | for (i = 0; i < search_range; ++i) { |
2845 | 0 | int best_site = -1; |
2846 | |
|
2847 | 0 | for (j = 0; j < 8; ++j) { |
2848 | 0 | const MV mv = { ref_mv->row + neighbors[j].row, |
2849 | 0 | ref_mv->col + neighbors[j].col }; |
2850 | |
|
2851 | 0 | if (is_mv_in(&x->mv_limits, &mv)) { |
2852 | 0 | unsigned int sad = |
2853 | 0 | fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), |
2854 | 0 | in_what->stride, second_pred); |
2855 | 0 | if (sad < best_sad) { |
2856 | 0 | sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); |
2857 | 0 | if (sad < best_sad) { |
2858 | 0 | best_sad = sad; |
2859 | 0 | best_site = j; |
2860 | 0 | } |
2861 | 0 | } |
2862 | 0 | } |
2863 | 0 | } |
2864 | |
|
2865 | 0 | if (best_site == -1) { |
2866 | 0 | break; |
2867 | 0 | } else { |
2868 | 0 | ref_mv->row += neighbors[best_site].row; |
2869 | 0 | ref_mv->col += neighbors[best_site].col; |
2870 | 0 | } |
2871 | 0 | } |
2872 | 0 | return best_sad; |
2873 | 0 | } |
2874 | | |
2875 | | int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x, |
2876 | | BLOCK_SIZE bsize, MV *mvp_full, int step_param, |
2877 | | int search_method, int error_per_bit, int *cost_list, |
2878 | 27.1M | const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { |
2879 | 27.1M | const SPEED_FEATURES *const sf = &cpi->sf; |
2880 | 27.1M | const SEARCH_METHODS method = (SEARCH_METHODS)search_method; |
2881 | 27.1M | const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; |
2882 | 27.1M | int var = 0; |
2883 | 27.1M | int run_exhaustive_search = 0; |
2884 | | |
2885 | 27.1M | if (cost_list) { |
2886 | 0 | cost_list[0] = INT_MAX; |
2887 | 0 | cost_list[1] = INT_MAX; |
2888 | 0 | cost_list[2] = INT_MAX; |
2889 | 0 | cost_list[3] = INT_MAX; |
2890 | 0 | cost_list[4] = INT_MAX; |
2891 | 0 | } |
2892 | | |
2893 | 27.1M | switch (method) { |
2894 | 0 | case FAST_DIAMOND: |
2895 | 0 | var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, |
2896 | 0 | cost_list, fn_ptr, 1, ref_mv, tmp_mv); |
2897 | 0 | break; |
2898 | 0 | case FAST_HEX: |
2899 | 0 | var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, |
2900 | 0 | cost_list, fn_ptr, 1, ref_mv, tmp_mv); |
2901 | 0 | break; |
2902 | 0 | case HEX: |
2903 | 0 | var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, |
2904 | 0 | fn_ptr, 1, ref_mv, tmp_mv); |
2905 | 0 | break; |
2906 | 0 | case SQUARE: |
2907 | 0 | var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, |
2908 | 0 | fn_ptr, 1, ref_mv, tmp_mv); |
2909 | 0 | break; |
2910 | 0 | case BIGDIA: |
2911 | 0 | var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, |
2912 | 0 | fn_ptr, 1, ref_mv, tmp_mv); |
2913 | 0 | break; |
2914 | 27.1M | case NSTEP: |
2915 | 27.1M | case MESH: |
2916 | 27.1M | var = full_pixel_diamond( |
2917 | 27.1M | cpi, x, bsize, mvp_full, step_param, error_per_bit, |
2918 | 27.1M | MAX_MVSEARCH_STEPS - 1 - step_param, 1, |
2919 | 27.1M | cpi->sf.mv.use_downsampled_sad, cost_list, fn_ptr, ref_mv, tmp_mv); |
2920 | 27.1M | break; |
2921 | 0 | default: assert(0 && "Unknown search method"); |
2922 | 27.1M | } |
2923 | | |
2924 | 27.1M | if (method == NSTEP) { |
2925 | 27.1M | if (sf->exhaustive_searches_thresh < INT_MAX && |
2926 | 27.1M | !cpi->rc.is_src_frame_alt_ref) { |
2927 | 0 | const int64_t exhaustive_thr = |
2928 | 0 | get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); |
2929 | 0 | if (var > exhaustive_thr) { |
2930 | 0 | run_exhaustive_search = 1; |
2931 | 0 | } |
2932 | 0 | } |
2933 | 27.1M | } else if (method == MESH) { |
2934 | 0 | run_exhaustive_search = 1; |
2935 | 0 | } |
2936 | | |
2937 | 27.1M | if (run_exhaustive_search) { |
2938 | 0 | int var_ex; |
2939 | 0 | MV tmp_mv_ex; |
2940 | 0 | var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list, |
2941 | 0 | fn_ptr, ref_mv, &tmp_mv_ex); |
2942 | 0 | if (var_ex < var) { |
2943 | 0 | var = var_ex; |
2944 | 0 | *tmp_mv = tmp_mv_ex; |
2945 | 0 | } |
2946 | 0 | } |
2947 | | |
2948 | 27.1M | if (method != NSTEP && method != MESH && rd && var < var_max) |
2949 | 0 | var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); |
2950 | | |
2951 | 27.1M | return var; |
2952 | 27.1M | } |
2953 | | |
2954 | | // Note(yunqingwang): The following 2 functions are only used in the motion |
2955 | | // vector unit test, which return extreme motion vectors allowed by the MV |
2956 | | // limits. |
2957 | | #define COMMON_MV_TEST \ |
2958 | 0 | SETUP_SUBPEL_SEARCH; \ |
2959 | 0 | \ |
2960 | 0 | (void)error_per_bit; \ |
2961 | 0 | (void)vfp; \ |
2962 | 0 | (void)z; \ |
2963 | 0 | (void)src_stride; \ |
2964 | 0 | (void)y; \ |
2965 | 0 | (void)y_stride; \ |
2966 | 0 | (void)second_pred; \ |
2967 | 0 | (void)w; \ |
2968 | 0 | (void)h; \ |
2969 | 0 | (void)offset; \ |
2970 | 0 | (void)mvjcost; \ |
2971 | 0 | (void)mvcost; \ |
2972 | 0 | (void)sse1; \ |
2973 | 0 | (void)distortion; \ |
2974 | 0 | \ |
2975 | 0 | (void)halfiters; \ |
2976 | 0 | (void)quarteriters; \ |
2977 | 0 | (void)eighthiters; \ |
2978 | 0 | (void)whichdir; \ |
2979 | 0 | (void)allow_hp; \ |
2980 | 0 | (void)forced_stop; \ |
2981 | 0 | (void)hstep; \ |
2982 | 0 | (void)rr; \ |
2983 | 0 | (void)rc; \ |
2984 | 0 | \ |
2985 | 0 | (void)tr; \ |
2986 | 0 | (void)tc; \ |
2987 | 0 | (void)sse; \ |
2988 | 0 | (void)thismse; \ |
2989 | 0 | (void)cost_list; \ |
2990 | 0 | (void)use_accurate_subpel_search |
2991 | | |
2992 | | // Return the maximum MV. |
2993 | | uint32_t vp9_return_max_sub_pixel_mv( |
2994 | | const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, |
2995 | | int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, |
2996 | | int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], |
2997 | | uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, |
2998 | 0 | int h, int use_accurate_subpel_search) { |
2999 | 0 | COMMON_MV_TEST; |
3000 | |
|
3001 | 0 | (void)minr; |
3002 | 0 | (void)minc; |
3003 | |
|
3004 | 0 | bestmv->row = maxr; |
3005 | 0 | bestmv->col = maxc; |
3006 | 0 | besterr = 0; |
3007 | | |
3008 | | // In the sub-pel motion search, if hp is not used, then the last bit of mv |
3009 | | // has to be 0. |
3010 | 0 | lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); |
3011 | |
|
3012 | 0 | return besterr; |
3013 | 0 | } |
3014 | | // Return the minimum MV. |
3015 | | uint32_t vp9_return_min_sub_pixel_mv( |
3016 | | const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, |
3017 | | int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, |
3018 | | int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], |
3019 | | uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, |
3020 | 0 | int h, int use_accurate_subpel_search) { |
3021 | 0 | COMMON_MV_TEST; |
3022 | |
|
3023 | 0 | (void)maxr; |
3024 | 0 | (void)maxc; |
3025 | |
|
3026 | 0 | bestmv->row = minr; |
3027 | 0 | bestmv->col = minc; |
3028 | 0 | besterr = 0; |
3029 | | |
3030 | | // In the sub-pel motion search, if hp is not used, then the last bit of mv |
3031 | | // has to be 0. |
3032 | 0 | lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); |
3033 | |
|
3034 | 0 | return besterr; |
3035 | 0 | } |