/src/libvpx/vp8/encoder/mcomp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "./vp8_rtcd.h" |
12 | | #include "./vpx_dsp_rtcd.h" |
13 | | #include "onyx_int.h" |
14 | | #include "mcomp.h" |
15 | | #include "vpx_mem/vpx_mem.h" |
16 | | #include "vpx_config.h" |
17 | | #include <stdio.h> |
18 | | #include <limits.h> |
19 | | #include <math.h> |
20 | | #include "vp8/common/findnearmv.h" |
21 | | #include "vp8/common/common.h" |
22 | | #include "vpx_dsp/vpx_dsp_common.h" |
23 | | |
24 | 7.03M | int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) { |
25 | | /* MV costing is based on the distribution of vectors in the previous |
26 | | * frame and as such will tend to over state the cost of vectors. In |
27 | | * addition coding a new vector can have a knock on effect on the cost |
28 | | * of subsequent vectors and the quality of prediction from NEAR and |
29 | | * NEAREST for subsequent blocks. The "Weight" parameter allows, to a |
30 | | * limited extent, for some account to be taken of these factors. |
31 | | */ |
32 | 7.03M | const int mv_idx_row = |
33 | 7.03M | clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); |
34 | 7.03M | const int mv_idx_col = |
35 | 7.03M | clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); |
36 | 7.03M | return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7; |
37 | 7.03M | } |
38 | | |
39 | | static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], |
40 | 24.8M | int error_per_bit) { |
41 | | /* Ignore mv costing if mvcost is NULL */ |
42 | 24.8M | if (mvcost) { |
43 | 24.8M | const int mv_idx_row = |
44 | 24.8M | clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); |
45 | 24.8M | const int mv_idx_col = |
46 | 24.8M | clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); |
47 | 24.8M | return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit + |
48 | 24.8M | 128) >> |
49 | 24.8M | 8; |
50 | 24.8M | } |
51 | 0 | return 0; |
52 | 24.8M | } |
53 | | |
54 | | static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], |
55 | 187M | int error_per_bit) { |
56 | | /* Calculate sad error cost on full pixel basis. */ |
57 | | /* Ignore mv costing if mvsadcost is NULL */ |
58 | 187M | if (mvsadcost) { |
59 | 187M | return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + |
60 | 187M | mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) * |
61 | 187M | error_per_bit + |
62 | 187M | 128) >> |
63 | 187M | 8; |
64 | 187M | } |
65 | 0 | return 0; |
66 | 187M | } |
67 | | |
68 | 0 | void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) { |
69 | 0 | int Len; |
70 | 0 | int search_site_count = 0; |
71 | | |
72 | | /* Generate offsets for 4 search sites per step. */ |
73 | 0 | Len = MAX_FIRST_STEP; |
74 | 0 | x->ss[search_site_count].mv.col = 0; |
75 | 0 | x->ss[search_site_count].mv.row = 0; |
76 | 0 | x->ss[search_site_count].offset = 0; |
77 | 0 | search_site_count++; |
78 | |
|
79 | 0 | while (Len > 0) { |
80 | | /* Compute offsets for search sites. */ |
81 | 0 | x->ss[search_site_count].mv.col = 0; |
82 | 0 | x->ss[search_site_count].mv.row = -Len; |
83 | 0 | x->ss[search_site_count].offset = -Len * stride; |
84 | 0 | search_site_count++; |
85 | | |
86 | | /* Compute offsets for search sites. */ |
87 | 0 | x->ss[search_site_count].mv.col = 0; |
88 | 0 | x->ss[search_site_count].mv.row = Len; |
89 | 0 | x->ss[search_site_count].offset = Len * stride; |
90 | 0 | search_site_count++; |
91 | | |
92 | | /* Compute offsets for search sites. */ |
93 | 0 | x->ss[search_site_count].mv.col = -Len; |
94 | 0 | x->ss[search_site_count].mv.row = 0; |
95 | 0 | x->ss[search_site_count].offset = -Len; |
96 | 0 | search_site_count++; |
97 | | |
98 | | /* Compute offsets for search sites. */ |
99 | 0 | x->ss[search_site_count].mv.col = Len; |
100 | 0 | x->ss[search_site_count].mv.row = 0; |
101 | 0 | x->ss[search_site_count].offset = Len; |
102 | 0 | search_site_count++; |
103 | | |
104 | | /* Contract. */ |
105 | 0 | Len /= 2; |
106 | 0 | } |
107 | |
|
108 | 0 | x->ss_count = search_site_count; |
109 | 0 | x->searches_per_step = 4; |
110 | 0 | } |
111 | | |
112 | 92.9k | void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) { |
113 | 92.9k | int Len; |
114 | 92.9k | int search_site_count = 0; |
115 | | |
116 | | /* Generate offsets for 8 search sites per step. */ |
117 | 92.9k | Len = MAX_FIRST_STEP; |
118 | 92.9k | x->ss[search_site_count].mv.col = 0; |
119 | 92.9k | x->ss[search_site_count].mv.row = 0; |
120 | 92.9k | x->ss[search_site_count].offset = 0; |
121 | 92.9k | search_site_count++; |
122 | | |
123 | 836k | while (Len > 0) { |
124 | | /* Compute offsets for search sites. */ |
125 | 743k | x->ss[search_site_count].mv.col = 0; |
126 | 743k | x->ss[search_site_count].mv.row = -Len; |
127 | 743k | x->ss[search_site_count].offset = -Len * stride; |
128 | 743k | search_site_count++; |
129 | | |
130 | | /* Compute offsets for search sites. */ |
131 | 743k | x->ss[search_site_count].mv.col = 0; |
132 | 743k | x->ss[search_site_count].mv.row = Len; |
133 | 743k | x->ss[search_site_count].offset = Len * stride; |
134 | 743k | search_site_count++; |
135 | | |
136 | | /* Compute offsets for search sites. */ |
137 | 743k | x->ss[search_site_count].mv.col = -Len; |
138 | 743k | x->ss[search_site_count].mv.row = 0; |
139 | 743k | x->ss[search_site_count].offset = -Len; |
140 | 743k | search_site_count++; |
141 | | |
142 | | /* Compute offsets for search sites. */ |
143 | 743k | x->ss[search_site_count].mv.col = Len; |
144 | 743k | x->ss[search_site_count].mv.row = 0; |
145 | 743k | x->ss[search_site_count].offset = Len; |
146 | 743k | search_site_count++; |
147 | | |
148 | | /* Compute offsets for search sites. */ |
149 | 743k | x->ss[search_site_count].mv.col = -Len; |
150 | 743k | x->ss[search_site_count].mv.row = -Len; |
151 | 743k | x->ss[search_site_count].offset = -Len * stride - Len; |
152 | 743k | search_site_count++; |
153 | | |
154 | | /* Compute offsets for search sites. */ |
155 | 743k | x->ss[search_site_count].mv.col = Len; |
156 | 743k | x->ss[search_site_count].mv.row = -Len; |
157 | 743k | x->ss[search_site_count].offset = -Len * stride + Len; |
158 | 743k | search_site_count++; |
159 | | |
160 | | /* Compute offsets for search sites. */ |
161 | 743k | x->ss[search_site_count].mv.col = -Len; |
162 | 743k | x->ss[search_site_count].mv.row = Len; |
163 | 743k | x->ss[search_site_count].offset = Len * stride - Len; |
164 | 743k | search_site_count++; |
165 | | |
166 | | /* Compute offsets for search sites. */ |
167 | 743k | x->ss[search_site_count].mv.col = Len; |
168 | 743k | x->ss[search_site_count].mv.row = Len; |
169 | 743k | x->ss[search_site_count].offset = Len * stride + Len; |
170 | 743k | search_site_count++; |
171 | | |
172 | | /* Contract. */ |
173 | 743k | Len /= 2; |
174 | 743k | } |
175 | | |
176 | 92.9k | x->ss_count = search_site_count; |
177 | 92.9k | x->searches_per_step = 8; |
178 | 92.9k | } |
179 | | |
180 | | /* |
181 | | * To avoid the penalty for crossing cache-line read, preload the reference |
182 | | * area in a small buffer, which is aligned to make sure there won't be crossing |
183 | | * cache-line read while reading from this buffer. This reduced the cpu |
184 | | * cycles spent on reading ref data in sub-pixel filter functions. |
185 | | * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x |
186 | | * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we |
187 | | * could reduce the area. |
188 | | */ |
189 | | |
190 | | /* estimated cost of a motion vector (r,c) */ |
191 | | #define MVC(r, c) \ |
192 | | (mvcost \ |
193 | | ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \ |
194 | | : 0) |
195 | | /* pointer to predictor base of a motionvector */ |
196 | | #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset))) |
197 | | /* convert motion vector component to offset for svf calc */ |
198 | | #define SP(x) (((x)&3) << 1) |
199 | | /* returns subpixel variance error function. */ |
200 | | #define DIST(r, c) \ |
201 | | vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse) |
202 | | #define IFMVCV(r, c, s, e) \ |
203 | 315M | if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; |
204 | | /* returns distortion + motion vector cost */ |
205 | | #define ERR(r, c) (MVC(r, c) + DIST(r, c)) |
206 | | /* checks if (r,c) has better score than previous best */ |
207 | | #define CHECK_BETTER(v, r, c) \ |
208 | 77.0M | do { \ |
209 | 77.0M | IFMVCV( \ |
210 | 77.0M | r, c, \ |
211 | 77.0M | { \ |
212 | 77.0M | thismse = DIST(r, c); \ |
213 | 77.0M | if ((v = (MVC(r, c) + thismse)) < besterr) { \ |
214 | 77.0M | besterr = v; \ |
215 | 77.0M | br = r; \ |
216 | 77.0M | bc = c; \ |
217 | 77.0M | *distortion = thismse; \ |
218 | 77.0M | *sse1 = sse; \ |
219 | 77.0M | } \ |
220 | 77.0M | }, \ |
221 | 77.0M | v = UINT_MAX;) \ |
222 | 77.0M | } while (0) |
223 | | |
224 | | int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
225 | | int_mv *bestmv, int_mv *ref_mv, |
226 | | int error_per_bit, |
227 | | const vp8_variance_fn_ptr_t *vfp, |
228 | | int *mvcost[2], int *distortion, |
229 | 5.25M | unsigned int *sse1) { |
230 | 5.25M | unsigned char *z = (*(b->base_src) + b->src); |
231 | | |
232 | 5.25M | int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; |
233 | 5.25M | int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4; |
234 | 5.25M | int tr = br, tc = bc; |
235 | 5.25M | unsigned int besterr; |
236 | 5.25M | unsigned int left, right, up, down, diag; |
237 | 5.25M | unsigned int sse; |
238 | 5.25M | unsigned int whichdir; |
239 | 5.25M | unsigned int halfiters = 4; |
240 | 5.25M | unsigned int quarteriters = 4; |
241 | 5.25M | int thismse; |
242 | | |
243 | 5.25M | int minc = VPXMAX(x->mv_col_min * 4, |
244 | 5.25M | (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); |
245 | 5.25M | int maxc = VPXMIN(x->mv_col_max * 4, |
246 | 5.25M | (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); |
247 | 5.25M | int minr = VPXMAX(x->mv_row_min * 4, |
248 | 5.25M | (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); |
249 | 5.25M | int maxr = VPXMIN(x->mv_row_max * 4, |
250 | 5.25M | (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); |
251 | | |
252 | 5.25M | int y_stride; |
253 | 5.25M | int offset; |
254 | 5.25M | int pre_stride = x->e_mbd.pre.y_stride; |
255 | 5.25M | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
256 | | |
257 | 5.25M | #if VPX_ARCH_X86 || VPX_ARCH_X86_64 |
258 | 5.25M | MACROBLOCKD *xd = &x->e_mbd; |
259 | 5.25M | unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
260 | 5.25M | bestmv->as_mv.col; |
261 | 5.25M | unsigned char *y; |
262 | 5.25M | int buf_r1, buf_r2, buf_c1; |
263 | | |
264 | | /* Clamping to avoid out-of-range data access */ |
265 | 5.25M | buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min) |
266 | 5.25M | ? (bestmv->as_mv.row - x->mv_row_min) |
267 | 5.25M | : 3; |
268 | 5.25M | buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max) |
269 | 5.25M | ? (x->mv_row_max - bestmv->as_mv.row) |
270 | 5.25M | : 3; |
271 | 5.25M | buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min) |
272 | 5.25M | ? (bestmv->as_mv.col - x->mv_col_min) |
273 | 5.25M | : 3; |
274 | 5.25M | y_stride = 32; |
275 | | |
276 | | /* Copy to intermediate buffer before searching. */ |
277 | 5.25M | vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf, |
278 | 5.25M | y_stride, 16 + buf_r1 + buf_r2); |
279 | 5.25M | y = xd->y_buf + y_stride * buf_r1 + buf_c1; |
280 | | #else |
281 | | unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
282 | | bestmv->as_mv.col; |
283 | | y_stride = pre_stride; |
284 | | #endif |
285 | | |
286 | 5.25M | offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; |
287 | | |
288 | | /* central mv */ |
289 | 5.25M | bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
290 | 5.25M | bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
291 | | |
292 | | /* calculate central point error */ |
293 | 5.25M | besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
294 | 5.25M | *distortion = besterr; |
295 | 5.25M | besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); |
296 | | |
297 | | /* TODO: Each subsequent iteration checks at least one point in common |
298 | | * with the last iteration could be 2 ( if diag selected) |
299 | | */ |
300 | 8.17M | while (--halfiters) { |
301 | | /* 1/2 pel */ |
302 | 8.04M | CHECK_BETTER(left, tr, tc - 2); |
303 | 8.04M | CHECK_BETTER(right, tr, tc + 2); |
304 | 8.04M | CHECK_BETTER(up, tr - 2, tc); |
305 | 8.04M | CHECK_BETTER(down, tr + 2, tc); |
306 | | |
307 | 8.04M | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
308 | | |
309 | 8.04M | switch (whichdir) { |
310 | 1.73M | case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break; |
311 | 1.88M | case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break; |
312 | 2.02M | case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break; |
313 | 2.39M | case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break; |
314 | 8.04M | } |
315 | | |
316 | | /* no reason to check the same one again. */ |
317 | 8.04M | if (tr == br && tc == bc) break; |
318 | | |
319 | 2.92M | tr = br; |
320 | 2.92M | tc = bc; |
321 | 2.92M | } |
322 | | |
323 | | /* TODO: Each subsequent iteration checks at least one point in common |
324 | | * with the last iteration could be 2 ( if diag selected) |
325 | | */ |
326 | | |
327 | | /* 1/4 pel */ |
328 | 7.40M | while (--quarteriters) { |
329 | 7.36M | CHECK_BETTER(left, tr, tc - 1); |
330 | 7.36M | CHECK_BETTER(right, tr, tc + 1); |
331 | 7.36M | CHECK_BETTER(up, tr - 1, tc); |
332 | 7.36M | CHECK_BETTER(down, tr + 1, tc); |
333 | | |
334 | 7.36M | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
335 | | |
336 | 7.36M | switch (whichdir) { |
337 | 1.58M | case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break; |
338 | 1.72M | case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break; |
339 | 1.79M | case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break; |
340 | 2.25M | case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break; |
341 | 7.36M | } |
342 | | |
343 | | /* no reason to check the same one again. */ |
344 | 7.36M | if (tr == br && tc == bc) break; |
345 | | |
346 | 2.15M | tr = br; |
347 | 2.15M | tc = bc; |
348 | 2.15M | } |
349 | | |
350 | 5.25M | bestmv->as_mv.row = clamp(br * 2, SHRT_MIN, SHRT_MAX); |
351 | 5.25M | bestmv->as_mv.col = clamp(bc * 2, SHRT_MIN, SHRT_MAX); |
352 | | |
353 | 5.25M | if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || |
354 | 5.25M | (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) { |
355 | 1.10k | return INT_MAX; |
356 | 1.10k | } |
357 | | |
358 | 5.25M | return besterr; |
359 | 5.25M | } |
360 | | #undef MVC |
361 | | #undef PRE |
362 | | #undef SP |
363 | | #undef DIST |
364 | | #undef IFMVCV |
365 | | #undef ERR |
366 | | #undef CHECK_BETTER |
367 | | |
368 | | int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
369 | | int_mv *bestmv, int_mv *ref_mv, |
370 | | int error_per_bit, |
371 | | const vp8_variance_fn_ptr_t *vfp, |
372 | | int *mvcost[2], int *distortion, |
373 | 12.5k | unsigned int *sse1) { |
374 | 12.5k | int bestmse = INT_MAX; |
375 | 12.5k | int_mv startmv; |
376 | 12.5k | int_mv this_mv; |
377 | 12.5k | unsigned char *z = (*(b->base_src) + b->src); |
378 | 12.5k | int left, right, up, down, diag; |
379 | 12.5k | unsigned int sse; |
380 | 12.5k | int whichdir; |
381 | 12.5k | int thismse; |
382 | 12.5k | int y_stride; |
383 | 12.5k | int pre_stride = x->e_mbd.pre.y_stride; |
384 | 12.5k | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
385 | | |
386 | 12.5k | #if VPX_ARCH_X86 || VPX_ARCH_X86_64 |
387 | 12.5k | MACROBLOCKD *xd = &x->e_mbd; |
388 | 12.5k | unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
389 | 12.5k | bestmv->as_mv.col; |
390 | 12.5k | unsigned char *y; |
391 | | |
392 | 12.5k | y_stride = 32; |
393 | | /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ |
394 | 12.5k | vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); |
395 | 12.5k | y = xd->y_buf + y_stride + 1; |
396 | | #else |
397 | | unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
398 | | bestmv->as_mv.col; |
399 | | y_stride = pre_stride; |
400 | | #endif |
401 | | |
402 | | /* central mv */ |
403 | 12.5k | bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
404 | 12.5k | bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
405 | 12.5k | startmv = *bestmv; |
406 | | |
407 | | /* calculate central point error */ |
408 | 12.5k | bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
409 | 12.5k | *distortion = bestmse; |
410 | 12.5k | bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); |
411 | | |
412 | | /* go left then right and check error */ |
413 | 12.5k | this_mv.as_mv.row = startmv.as_mv.row; |
414 | 12.5k | this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); |
415 | | /* "halfpix" horizontal variance */ |
416 | 12.5k | thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse); |
417 | 12.5k | left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
418 | | |
419 | 12.5k | if (left < bestmse) { |
420 | 11.4k | *bestmv = this_mv; |
421 | 11.4k | bestmse = left; |
422 | 11.4k | *distortion = thismse; |
423 | 11.4k | *sse1 = sse; |
424 | 11.4k | } |
425 | | |
426 | 12.5k | this_mv.as_mv.col += 8; |
427 | | /* "halfpix" horizontal variance */ |
428 | 12.5k | thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse); |
429 | 12.5k | right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
430 | | |
431 | 12.5k | if (right < bestmse) { |
432 | 5.86k | *bestmv = this_mv; |
433 | 5.86k | bestmse = right; |
434 | 5.86k | *distortion = thismse; |
435 | 5.86k | *sse1 = sse; |
436 | 5.86k | } |
437 | | |
438 | | /* go up then down and check error */ |
439 | 12.5k | this_mv.as_mv.col = startmv.as_mv.col; |
440 | 12.5k | this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); |
441 | | /* "halfpix" vertical variance */ |
442 | 12.5k | thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse); |
443 | 12.5k | up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
444 | | |
445 | 12.5k | if (up < bestmse) { |
446 | 7.46k | *bestmv = this_mv; |
447 | 7.46k | bestmse = up; |
448 | 7.46k | *distortion = thismse; |
449 | 7.46k | *sse1 = sse; |
450 | 7.46k | } |
451 | | |
452 | 12.5k | this_mv.as_mv.row += 8; |
453 | | /* "halfpix" vertical variance */ |
454 | 12.5k | thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse); |
455 | 12.5k | down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
456 | | |
457 | 12.5k | if (down < bestmse) { |
458 | 4.25k | *bestmv = this_mv; |
459 | 4.25k | bestmse = down; |
460 | 4.25k | *distortion = thismse; |
461 | 4.25k | *sse1 = sse; |
462 | 4.25k | } |
463 | | |
464 | | /* now check 1 more diagonal */ |
465 | 12.5k | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
466 | 12.5k | this_mv = startmv; |
467 | | |
468 | 12.5k | switch (whichdir) { |
469 | 3.53k | case 0: |
470 | 3.53k | this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
471 | 3.53k | this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
472 | | /* "halfpix" horizontal/vertical variance */ |
473 | 3.53k | thismse = |
474 | 3.53k | vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); |
475 | 3.53k | break; |
476 | 3.32k | case 1: |
477 | 3.32k | this_mv.as_mv.col += 4; |
478 | 3.32k | this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
479 | | /* "halfpix" horizontal/vertical variance */ |
480 | 3.32k | thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); |
481 | 3.32k | break; |
482 | 2.75k | case 2: |
483 | 2.75k | this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
484 | 2.75k | this_mv.as_mv.row += 4; |
485 | | /* "halfpix" horizontal/vertical variance */ |
486 | 2.75k | thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse); |
487 | 2.75k | break; |
488 | 2.92k | case 3: |
489 | 2.92k | default: |
490 | 2.92k | this_mv.as_mv.col += 4; |
491 | 2.92k | this_mv.as_mv.row += 4; |
492 | | /* "halfpix" horizontal/vertical variance */ |
493 | 2.92k | thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse); |
494 | 2.92k | break; |
495 | 12.5k | } |
496 | | |
497 | 12.5k | diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
498 | | |
499 | 12.5k | if (diag < bestmse) { |
500 | 9.92k | *bestmv = this_mv; |
501 | 9.92k | bestmse = diag; |
502 | 9.92k | *distortion = thismse; |
503 | 9.92k | *sse1 = sse; |
504 | 9.92k | } |
505 | | |
506 | | /* time to check quarter pels. */ |
507 | 12.5k | if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride; |
508 | | |
509 | 12.5k | if (bestmv->as_mv.col < startmv.as_mv.col) y--; |
510 | | |
511 | 12.5k | startmv = *bestmv; |
512 | | |
513 | | /* go left then right and check error */ |
514 | 12.5k | this_mv.as_mv.row = startmv.as_mv.row; |
515 | | |
516 | 12.5k | if (startmv.as_mv.col & 7) { |
517 | 11.5k | this_mv.as_mv.col = startmv.as_mv.col - 2; |
518 | 11.5k | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
519 | 11.5k | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
520 | 11.5k | } else { |
521 | 964 | this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
522 | 964 | thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, |
523 | 964 | b->src_stride, &sse); |
524 | 964 | } |
525 | | |
526 | 12.5k | left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
527 | | |
528 | 12.5k | if (left < bestmse) { |
529 | 1.20k | *bestmv = this_mv; |
530 | 1.20k | bestmse = left; |
531 | 1.20k | *distortion = thismse; |
532 | 1.20k | *sse1 = sse; |
533 | 1.20k | } |
534 | | |
535 | 12.5k | this_mv.as_mv.col += 4; |
536 | 12.5k | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, |
537 | 12.5k | z, b->src_stride, &sse); |
538 | 12.5k | right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
539 | | |
540 | 12.5k | if (right < bestmse) { |
541 | 944 | *bestmv = this_mv; |
542 | 944 | bestmse = right; |
543 | 944 | *distortion = thismse; |
544 | 944 | *sse1 = sse; |
545 | 944 | } |
546 | | |
547 | | /* go up then down and check error */ |
548 | 12.5k | this_mv.as_mv.col = startmv.as_mv.col; |
549 | | |
550 | 12.5k | if (startmv.as_mv.row & 7) { |
551 | 10.1k | this_mv.as_mv.row = startmv.as_mv.row - 2; |
552 | 10.1k | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
553 | 10.1k | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
554 | 10.1k | } else { |
555 | 2.40k | this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
556 | 2.40k | thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, |
557 | 2.40k | b->src_stride, &sse); |
558 | 2.40k | } |
559 | | |
560 | 12.5k | up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
561 | | |
562 | 12.5k | if (up < bestmse) { |
563 | 1.81k | *bestmv = this_mv; |
564 | 1.81k | bestmse = up; |
565 | 1.81k | *distortion = thismse; |
566 | 1.81k | *sse1 = sse; |
567 | 1.81k | } |
568 | | |
569 | 12.5k | this_mv.as_mv.row += 4; |
570 | 12.5k | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, |
571 | 12.5k | z, b->src_stride, &sse); |
572 | 12.5k | down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
573 | | |
574 | 12.5k | if (down < bestmse) { |
575 | 1.61k | *bestmv = this_mv; |
576 | 1.61k | bestmse = down; |
577 | 1.61k | *distortion = thismse; |
578 | 1.61k | *sse1 = sse; |
579 | 1.61k | } |
580 | | |
581 | | /* now check 1 more diagonal */ |
582 | 12.5k | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
583 | | |
584 | 12.5k | this_mv = startmv; |
585 | | |
586 | 12.5k | switch (whichdir) { |
587 | 3.45k | case 0: |
588 | | |
589 | 3.45k | if (startmv.as_mv.row & 7) { |
590 | 2.49k | this_mv.as_mv.row -= 2; |
591 | | |
592 | 2.49k | if (startmv.as_mv.col & 7) { |
593 | 2.43k | this_mv.as_mv.col -= 2; |
594 | 2.43k | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
595 | 2.43k | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
596 | 2.43k | } else { |
597 | 62 | this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
598 | 62 | thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, |
599 | 62 | b->src_stride, &sse); |
600 | 62 | } |
601 | 2.49k | } else { |
602 | 957 | this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
603 | | |
604 | 957 | if (startmv.as_mv.col & 7) { |
605 | 616 | this_mv.as_mv.col -= 2; |
606 | 616 | thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, |
607 | 616 | z, b->src_stride, &sse); |
608 | 616 | } else { |
609 | 341 | this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
610 | 341 | thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, |
611 | 341 | &sse); |
612 | 341 | } |
613 | 957 | } |
614 | | |
615 | 3.45k | break; |
616 | 3.28k | case 1: |
617 | 3.28k | this_mv.as_mv.col += 2; |
618 | | |
619 | 3.28k | if (startmv.as_mv.row & 7) { |
620 | 2.50k | this_mv.as_mv.row -= 2; |
621 | 2.50k | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
622 | 2.50k | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
623 | 2.50k | } else { |
624 | 776 | this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
625 | 776 | thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, |
626 | 776 | b->src_stride, &sse); |
627 | 776 | } |
628 | | |
629 | 3.28k | break; |
630 | 2.88k | case 2: |
631 | 2.88k | this_mv.as_mv.row += 2; |
632 | | |
633 | 2.88k | if (startmv.as_mv.col & 7) { |
634 | 2.75k | this_mv.as_mv.col -= 2; |
635 | 2.75k | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
636 | 2.75k | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
637 | 2.75k | } else { |
638 | 131 | this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
639 | 131 | thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, |
640 | 131 | b->src_stride, &sse); |
641 | 131 | } |
642 | | |
643 | 2.88k | break; |
644 | 2.92k | case 3: |
645 | 2.92k | this_mv.as_mv.col += 2; |
646 | 2.92k | this_mv.as_mv.row += 2; |
647 | 2.92k | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
648 | 2.92k | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
649 | 2.92k | break; |
650 | 12.5k | } |
651 | | |
652 | 12.5k | diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
653 | | |
654 | 12.5k | if (diag < bestmse) { |
655 | 512 | *bestmv = this_mv; |
656 | 512 | bestmse = diag; |
657 | 512 | *distortion = thismse; |
658 | 512 | *sse1 = sse; |
659 | 512 | } |
660 | | |
661 | 12.5k | return bestmse; |
662 | 12.5k | } |
663 | | |
664 | | int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
665 | | int_mv *bestmv, int_mv *ref_mv, |
666 | | int error_per_bit, |
667 | | const vp8_variance_fn_ptr_t *vfp, |
668 | | int *mvcost[2], int *distortion, |
669 | 0 | unsigned int *sse1) { |
670 | 0 | int bestmse = INT_MAX; |
671 | 0 | int_mv startmv; |
672 | 0 | int_mv this_mv; |
673 | 0 | unsigned char *z = (*(b->base_src) + b->src); |
674 | 0 | int left, right, up, down, diag; |
675 | 0 | unsigned int sse; |
676 | 0 | int whichdir; |
677 | 0 | int thismse; |
678 | 0 | int y_stride; |
679 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
680 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
681 | |
|
682 | 0 | #if VPX_ARCH_X86 || VPX_ARCH_X86_64 |
683 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
684 | 0 | unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
685 | 0 | bestmv->as_mv.col; |
686 | 0 | unsigned char *y; |
687 | |
|
688 | 0 | y_stride = 32; |
689 | | /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ |
690 | 0 | vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); |
691 | 0 | y = xd->y_buf + y_stride + 1; |
692 | | #else |
693 | | unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
694 | | bestmv->as_mv.col; |
695 | | y_stride = pre_stride; |
696 | | #endif |
697 | | |
698 | | /* central mv */ |
699 | 0 | bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
700 | 0 | bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
701 | 0 | startmv = *bestmv; |
702 | | |
703 | | /* calculate central point error */ |
704 | 0 | bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
705 | 0 | *distortion = bestmse; |
706 | 0 | bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); |
707 | | |
708 | | /* go left then right and check error */ |
709 | 0 | this_mv.as_mv.row = startmv.as_mv.row; |
710 | 0 | this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); |
711 | | /* "halfpix" horizontal variance */ |
712 | 0 | thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse); |
713 | 0 | left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
714 | |
|
715 | 0 | if (left < bestmse) { |
716 | 0 | *bestmv = this_mv; |
717 | 0 | bestmse = left; |
718 | 0 | *distortion = thismse; |
719 | 0 | *sse1 = sse; |
720 | 0 | } |
721 | |
|
722 | 0 | this_mv.as_mv.col += 8; |
723 | | /* "halfpix" horizontal variance */ |
724 | 0 | thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse); |
725 | 0 | right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
726 | |
|
727 | 0 | if (right < bestmse) { |
728 | 0 | *bestmv = this_mv; |
729 | 0 | bestmse = right; |
730 | 0 | *distortion = thismse; |
731 | 0 | *sse1 = sse; |
732 | 0 | } |
733 | | |
734 | | /* go up then down and check error */ |
735 | 0 | this_mv.as_mv.col = startmv.as_mv.col; |
736 | 0 | this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); |
737 | | /* "halfpix" vertical variance */ |
738 | 0 | thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse); |
739 | 0 | up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
740 | |
|
741 | 0 | if (up < bestmse) { |
742 | 0 | *bestmv = this_mv; |
743 | 0 | bestmse = up; |
744 | 0 | *distortion = thismse; |
745 | 0 | *sse1 = sse; |
746 | 0 | } |
747 | |
|
748 | 0 | this_mv.as_mv.row += 8; |
749 | | /* "halfpix" vertical variance */ |
750 | 0 | thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse); |
751 | 0 | down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
752 | |
|
753 | 0 | if (down < bestmse) { |
754 | 0 | *bestmv = this_mv; |
755 | 0 | bestmse = down; |
756 | 0 | *distortion = thismse; |
757 | 0 | *sse1 = sse; |
758 | 0 | } |
759 | | |
760 | | /* now check 1 more diagonal - */ |
761 | 0 | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
762 | 0 | this_mv = startmv; |
763 | |
|
764 | 0 | switch (whichdir) { |
765 | 0 | case 0: |
766 | 0 | this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
767 | 0 | this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
768 | | /* "halfpix" horizontal/vertical variance */ |
769 | 0 | thismse = |
770 | 0 | vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); |
771 | 0 | break; |
772 | 0 | case 1: |
773 | 0 | this_mv.as_mv.col += 4; |
774 | 0 | this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
775 | | /* "halfpix" horizontal/vertical variance */ |
776 | 0 | thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); |
777 | 0 | break; |
778 | 0 | case 2: |
779 | 0 | this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
780 | 0 | this_mv.as_mv.row += 4; |
781 | | /* "halfpix" horizontal/vertical variance */ |
782 | 0 | thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse); |
783 | 0 | break; |
784 | 0 | case 3: |
785 | 0 | default: |
786 | 0 | this_mv.as_mv.col += 4; |
787 | 0 | this_mv.as_mv.row += 4; |
788 | | /* "halfpix" horizontal/vertical variance */ |
789 | 0 | thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse); |
790 | 0 | break; |
791 | 0 | } |
792 | | |
793 | 0 | diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
794 | |
|
795 | 0 | if (diag < bestmse) { |
796 | 0 | *bestmv = this_mv; |
797 | 0 | bestmse = diag; |
798 | 0 | *distortion = thismse; |
799 | 0 | *sse1 = sse; |
800 | 0 | } |
801 | |
|
802 | 0 | return bestmse; |
803 | 0 | } |
804 | | |
805 | | #define CHECK_BOUNDS(range) \ |
806 | 53.5k | do { \ |
807 | 53.5k | all_in = 1; \ |
808 | 53.5k | all_in &= ((br - range) >= x->mv_row_min); \ |
809 | 53.5k | all_in &= ((br + range) <= x->mv_row_max); \ |
810 | 53.5k | all_in &= ((bc - range) >= x->mv_col_min); \ |
811 | 53.5k | all_in &= ((bc + range) <= x->mv_col_max); \ |
812 | 53.5k | } while (0) |
813 | | |
814 | | #define CHECK_POINT \ |
815 | 1.03k | { \ |
816 | 1.03k | if (this_mv.as_mv.col < x->mv_col_min) continue; \ |
817 | 1.03k | if (this_mv.as_mv.col > x->mv_col_max) continue; \ |
818 | 980 | if (this_mv.as_mv.row < x->mv_row_min) continue; \ |
819 | 970 | if (this_mv.as_mv.row > x->mv_row_max) continue; \ |
820 | 644 | } |
821 | | |
822 | | #define CHECK_BETTER \ |
823 | 221k | do { \ |
824 | 221k | if (thissad < bestsad) { \ |
825 | 55.8k | thissad += \ |
826 | 55.8k | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \ |
827 | 55.8k | if (thissad < bestsad) { \ |
828 | 40.6k | bestsad = thissad; \ |
829 | 40.6k | best_site = i; \ |
830 | 40.6k | } \ |
831 | 55.8k | } \ |
832 | 221k | } while (0) |
833 | | |
834 | | static const MV next_chkpts[6][3] = { |
835 | | { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } }, |
836 | | { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } }, |
837 | | { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } } |
838 | | }; |
839 | | |
840 | | int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
841 | | int_mv *best_mv, int search_param, int sad_per_bit, |
842 | | const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], |
843 | 12.5k | int_mv *center_mv) { |
844 | 12.5k | MV hex[6] = { |
845 | 12.5k | { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } |
846 | 12.5k | }; |
847 | 12.5k | MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } }; |
848 | 12.5k | int i, j; |
849 | | |
850 | 12.5k | unsigned char *what = (*(b->base_src) + b->src); |
851 | 12.5k | int what_stride = b->src_stride; |
852 | 12.5k | int pre_stride = x->e_mbd.pre.y_stride; |
853 | 12.5k | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
854 | | |
855 | 12.5k | int in_what_stride = pre_stride; |
856 | 12.5k | int br, bc; |
857 | 12.5k | int_mv this_mv; |
858 | 12.5k | unsigned int bestsad; |
859 | 12.5k | unsigned int thissad; |
860 | 12.5k | unsigned char *base_offset; |
861 | 12.5k | unsigned char *this_offset; |
862 | 12.5k | int k = -1; |
863 | 12.5k | int all_in; |
864 | 12.5k | int best_site = -1; |
865 | 12.5k | int hex_range = 127; |
866 | 12.5k | int dia_range = 8; |
867 | | |
868 | 12.5k | int_mv fcenter_mv; |
869 | 12.5k | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
870 | 12.5k | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
871 | | |
872 | | /* adjust ref_mv to make sure it is within MV range */ |
873 | 12.5k | vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, |
874 | 12.5k | x->mv_row_max); |
875 | 12.5k | br = ref_mv->as_mv.row; |
876 | 12.5k | bc = ref_mv->as_mv.col; |
877 | | |
878 | | /* Work out the start point for the search */ |
879 | 12.5k | base_offset = (unsigned char *)(base_pre + d->offset); |
880 | 12.5k | this_offset = base_offset + (br * (pre_stride)) + bc; |
881 | 12.5k | this_mv.as_mv.row = br; |
882 | 12.5k | this_mv.as_mv.col = bc; |
883 | 12.5k | bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) + |
884 | 12.5k | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
885 | | |
886 | | #if CONFIG_MULTI_RES_ENCODING |
887 | | /* Lower search range based on prediction info */ |
888 | | if (search_param >= 6) |
889 | | goto cal_neighbors; |
890 | | else if (search_param >= 5) |
891 | | hex_range = 4; |
892 | | else if (search_param >= 4) |
893 | | hex_range = 6; |
894 | | else if (search_param >= 3) |
895 | | hex_range = 15; |
896 | | else if (search_param >= 2) |
897 | | hex_range = 31; |
898 | | else if (search_param >= 1) |
899 | | hex_range = 63; |
900 | | |
901 | | dia_range = 8; |
902 | | #else |
903 | 12.5k | (void)search_param; |
904 | 12.5k | #endif |
905 | | |
906 | | /* hex search */ |
907 | 12.5k | CHECK_BOUNDS(2); |
908 | | |
909 | 12.5k | if (all_in) { |
910 | 87.6k | for (i = 0; i < 6; ++i) { |
911 | 75.1k | this_mv.as_mv.row = br + hex[i].row; |
912 | 75.1k | this_mv.as_mv.col = bc + hex[i].col; |
913 | 75.1k | this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + |
914 | 75.1k | this_mv.as_mv.col; |
915 | 75.1k | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
916 | 75.1k | CHECK_BETTER; |
917 | 75.1k | } |
918 | 12.5k | } else { |
919 | 168 | for (i = 0; i < 6; ++i) { |
920 | 144 | this_mv.as_mv.row = br + hex[i].row; |
921 | 144 | this_mv.as_mv.col = bc + hex[i].col; |
922 | 144 | CHECK_POINT |
923 | 104 | this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + |
924 | 104 | this_mv.as_mv.col; |
925 | 104 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
926 | 104 | CHECK_BETTER; |
927 | 104 | } |
928 | 24 | } |
929 | | |
930 | 12.5k | if (best_site == -1) { |
931 | 3.51k | goto cal_neighbors; |
932 | 9.03k | } else { |
933 | 9.03k | br += hex[best_site].row; |
934 | 9.03k | bc += hex[best_site].col; |
935 | 9.03k | k = best_site; |
936 | 9.03k | } |
937 | | |
938 | 17.7k | for (j = 1; j < hex_range; ++j) { |
939 | 17.7k | best_site = -1; |
940 | 17.7k | CHECK_BOUNDS(2); |
941 | | |
942 | 17.7k | if (all_in) { |
943 | 70.1k | for (i = 0; i < 3; ++i) { |
944 | 52.6k | this_mv.as_mv.row = br + next_chkpts[k][i].row; |
945 | 52.6k | this_mv.as_mv.col = bc + next_chkpts[k][i].col; |
946 | 52.6k | this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + |
947 | 52.6k | this_mv.as_mv.col; |
948 | 52.6k | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
949 | 52.6k | CHECK_BETTER; |
950 | 52.6k | } |
951 | 17.5k | } else { |
952 | 896 | for (i = 0; i < 3; ++i) { |
953 | 672 | this_mv.as_mv.row = br + next_chkpts[k][i].row; |
954 | 672 | this_mv.as_mv.col = bc + next_chkpts[k][i].col; |
955 | 672 | CHECK_POINT |
956 | 378 | this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + |
957 | 378 | this_mv.as_mv.col; |
958 | 378 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
959 | 378 | CHECK_BETTER; |
960 | 378 | } |
961 | 224 | } |
962 | | |
963 | 17.7k | if (best_site == -1) { |
964 | 9.03k | break; |
965 | 9.03k | } else { |
966 | 8.73k | br += next_chkpts[k][best_site].row; |
967 | 8.73k | bc += next_chkpts[k][best_site].col; |
968 | 8.73k | k += 5 + best_site; |
969 | 8.73k | if (k >= 12) { |
970 | 592 | k -= 12; |
971 | 8.14k | } else if (k >= 6) { |
972 | 7.60k | k -= 6; |
973 | 7.60k | } |
974 | 8.73k | } |
975 | 17.7k | } |
976 | | |
977 | | /* check 4 1-away neighbors */ |
978 | 12.5k | cal_neighbors: |
979 | 23.2k | for (j = 0; j < dia_range; ++j) { |
980 | 23.2k | best_site = -1; |
981 | 23.2k | CHECK_BOUNDS(1); |
982 | | |
983 | 23.2k | if (all_in) { |
984 | 115k | for (i = 0; i < 4; ++i) { |
985 | 92.7k | this_mv.as_mv.row = br + neighbors[i].row; |
986 | 92.7k | this_mv.as_mv.col = bc + neighbors[i].col; |
987 | 92.7k | this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + |
988 | 92.7k | this_mv.as_mv.col; |
989 | 92.7k | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
990 | 92.7k | CHECK_BETTER; |
991 | 92.7k | } |
992 | 23.1k | } else { |
993 | 270 | for (i = 0; i < 4; ++i) { |
994 | 216 | this_mv.as_mv.row = br + neighbors[i].row; |
995 | 216 | this_mv.as_mv.col = bc + neighbors[i].col; |
996 | 216 | CHECK_POINT |
997 | 162 | this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + |
998 | 162 | this_mv.as_mv.col; |
999 | 162 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
1000 | 162 | CHECK_BETTER; |
1001 | 162 | } |
1002 | 54 | } |
1003 | | |
1004 | 23.2k | if (best_site == -1) { |
1005 | 12.4k | break; |
1006 | 12.4k | } else { |
1007 | 10.7k | br += neighbors[best_site].row; |
1008 | 10.7k | bc += neighbors[best_site].col; |
1009 | 10.7k | } |
1010 | 23.2k | } |
1011 | | |
1012 | 12.5k | best_mv->as_mv.row = br; |
1013 | 12.5k | best_mv->as_mv.col = bc; |
1014 | | |
1015 | 12.5k | return bestsad; |
1016 | 9.03k | } |
1017 | | #undef CHECK_BOUNDS |
1018 | | #undef CHECK_POINT |
1019 | | #undef CHECK_BETTER |
1020 | | |
1021 | | int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
1022 | | int_mv *best_mv, int search_param, int sad_per_bit, |
1023 | | int *num00, vp8_variance_fn_ptr_t *fn_ptr, |
1024 | 0 | int *mvcost[2], int_mv *center_mv) { |
1025 | 0 | int i, j, step; |
1026 | |
|
1027 | 0 | unsigned char *what = (*(b->base_src) + b->src); |
1028 | 0 | int what_stride = b->src_stride; |
1029 | 0 | unsigned char *in_what; |
1030 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
1031 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1032 | 0 | int in_what_stride = pre_stride; |
1033 | 0 | unsigned char *best_address; |
1034 | |
|
1035 | 0 | int tot_steps; |
1036 | 0 | int_mv this_mv; |
1037 | |
|
1038 | 0 | unsigned int bestsad; |
1039 | 0 | unsigned int thissad; |
1040 | 0 | int best_site = 0; |
1041 | 0 | int last_site = 0; |
1042 | |
|
1043 | 0 | int ref_row; |
1044 | 0 | int ref_col; |
1045 | 0 | int this_row_offset; |
1046 | 0 | int this_col_offset; |
1047 | 0 | search_site *ss; |
1048 | |
|
1049 | 0 | unsigned char *check_here; |
1050 | |
|
1051 | 0 | int *mvsadcost[2]; |
1052 | 0 | int_mv fcenter_mv; |
1053 | |
|
1054 | 0 | mvsadcost[0] = x->mvsadcost[0]; |
1055 | 0 | mvsadcost[1] = x->mvsadcost[1]; |
1056 | 0 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1057 | 0 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1058 | |
|
1059 | 0 | vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, |
1060 | 0 | x->mv_row_max); |
1061 | 0 | ref_row = ref_mv->as_mv.row; |
1062 | 0 | ref_col = ref_mv->as_mv.col; |
1063 | 0 | *num00 = 0; |
1064 | 0 | best_mv->as_mv.row = ref_row; |
1065 | 0 | best_mv->as_mv.col = ref_col; |
1066 | | |
1067 | | /* Work out the start point for the search */ |
1068 | 0 | in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + |
1069 | 0 | ref_col); |
1070 | 0 | best_address = in_what; |
1071 | | |
1072 | | /* Check the starting position */ |
1073 | 0 | bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + |
1074 | 0 | mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1075 | | |
1076 | | /* search_param determines the length of the initial step and hence |
1077 | | * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : |
1078 | | * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. |
1079 | | */ |
1080 | 0 | ss = &x->ss[search_param * x->searches_per_step]; |
1081 | 0 | tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
1082 | |
|
1083 | 0 | i = 1; |
1084 | |
|
1085 | 0 | for (step = 0; step < tot_steps; ++step) { |
1086 | 0 | for (j = 0; j < x->searches_per_step; ++j) { |
1087 | | /* Trap illegal vectors */ |
1088 | 0 | this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
1089 | 0 | this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
1090 | |
|
1091 | 0 | if ((this_col_offset > x->mv_col_min) && |
1092 | 0 | (this_col_offset < x->mv_col_max) && |
1093 | 0 | (this_row_offset > x->mv_row_min) && |
1094 | 0 | (this_row_offset < x->mv_row_max)) |
1095 | | |
1096 | 0 | { |
1097 | 0 | check_here = ss[i].offset + best_address; |
1098 | 0 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1099 | |
|
1100 | 0 | if (thissad < bestsad) { |
1101 | 0 | this_mv.as_mv.row = this_row_offset; |
1102 | 0 | this_mv.as_mv.col = this_col_offset; |
1103 | 0 | thissad += |
1104 | 0 | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1105 | |
|
1106 | 0 | if (thissad < bestsad) { |
1107 | 0 | bestsad = thissad; |
1108 | 0 | best_site = i; |
1109 | 0 | } |
1110 | 0 | } |
1111 | 0 | } |
1112 | |
|
1113 | 0 | i++; |
1114 | 0 | } |
1115 | |
|
1116 | 0 | if (best_site != last_site) { |
1117 | 0 | best_mv->as_mv.row += ss[best_site].mv.row; |
1118 | 0 | best_mv->as_mv.col += ss[best_site].mv.col; |
1119 | 0 | best_address += ss[best_site].offset; |
1120 | 0 | last_site = best_site; |
1121 | 0 | } else if (best_address == in_what) { |
1122 | 0 | (*num00)++; |
1123 | 0 | } |
1124 | 0 | } |
1125 | |
|
1126 | 0 | this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1127 | 0 | this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1128 | |
|
1129 | 0 | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + |
1130 | 0 | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1131 | 0 | } |
1132 | | |
1133 | | #if HAVE_SSE2 || HAVE_MSA || HAVE_LSX |
1134 | | int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
1135 | | int_mv *best_mv, int search_param, int sad_per_bit, |
1136 | | int *num00, vp8_variance_fn_ptr_t *fn_ptr, |
1137 | 18.9M | int *mvcost[2], int_mv *center_mv) { |
1138 | 18.9M | int i, j, step; |
1139 | | |
1140 | 18.9M | unsigned char *what = (*(b->base_src) + b->src); |
1141 | 18.9M | int what_stride = b->src_stride; |
1142 | 18.9M | unsigned char *in_what; |
1143 | 18.9M | int pre_stride = x->e_mbd.pre.y_stride; |
1144 | 18.9M | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1145 | 18.9M | int in_what_stride = pre_stride; |
1146 | 18.9M | unsigned char *best_address; |
1147 | | |
1148 | 18.9M | int tot_steps; |
1149 | 18.9M | int_mv this_mv; |
1150 | | |
1151 | 18.9M | unsigned int bestsad; |
1152 | 18.9M | unsigned int thissad; |
1153 | 18.9M | int best_site = 0; |
1154 | 18.9M | int last_site = 0; |
1155 | | |
1156 | 18.9M | int ref_row; |
1157 | 18.9M | int ref_col; |
1158 | 18.9M | int this_row_offset; |
1159 | 18.9M | int this_col_offset; |
1160 | 18.9M | search_site *ss; |
1161 | | |
1162 | 18.9M | unsigned char *check_here; |
1163 | | |
1164 | 18.9M | int *mvsadcost[2]; |
1165 | 18.9M | int_mv fcenter_mv; |
1166 | | |
1167 | 18.9M | mvsadcost[0] = x->mvsadcost[0]; |
1168 | 18.9M | mvsadcost[1] = x->mvsadcost[1]; |
1169 | 18.9M | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1170 | 18.9M | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1171 | | |
1172 | 18.9M | vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, |
1173 | 18.9M | x->mv_row_max); |
1174 | 18.9M | ref_row = ref_mv->as_mv.row; |
1175 | 18.9M | ref_col = ref_mv->as_mv.col; |
1176 | 18.9M | *num00 = 0; |
1177 | 18.9M | best_mv->as_mv.row = ref_row; |
1178 | 18.9M | best_mv->as_mv.col = ref_col; |
1179 | | |
1180 | | /* Work out the start point for the search */ |
1181 | 18.9M | in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + |
1182 | 18.9M | ref_col); |
1183 | 18.9M | best_address = in_what; |
1184 | | |
1185 | | /* Check the starting position */ |
1186 | 18.9M | bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + |
1187 | 18.9M | mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1188 | | |
1189 | | /* search_param determines the length of the initial step and hence the |
1190 | | * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = |
1191 | | * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. |
1192 | | */ |
1193 | 18.9M | ss = &x->ss[search_param * x->searches_per_step]; |
1194 | 18.9M | tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
1195 | | |
1196 | 18.9M | i = 1; |
1197 | | |
1198 | 90.4M | for (step = 0; step < tot_steps; ++step) { |
1199 | 71.5M | int all_in = 1, t; |
1200 | | |
1201 | | /* To know if all neighbor points are within the bounds, 4 bounds |
1202 | | * checking are enough instead of checking 4 bounds for each |
1203 | | * points. |
1204 | | */ |
1205 | 71.5M | all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); |
1206 | 71.5M | all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); |
1207 | 71.5M | all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); |
1208 | 71.5M | all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); |
1209 | | |
1210 | 71.5M | if (all_in) { |
1211 | 55.6M | unsigned int sad_array[4]; |
1212 | | |
1213 | 167M | for (j = 0; j < x->searches_per_step; j += 4) { |
1214 | 111M | const unsigned char *block_offset[4]; |
1215 | | |
1216 | 556M | for (t = 0; t < 4; ++t) { |
1217 | 445M | block_offset[t] = ss[i + t].offset + best_address; |
1218 | 445M | } |
1219 | | |
1220 | 111M | fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
1221 | 111M | sad_array); |
1222 | | |
1223 | 556M | for (t = 0; t < 4; t++, i++) { |
1224 | 445M | if (sad_array[t] < bestsad) { |
1225 | 148M | this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; |
1226 | 148M | this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; |
1227 | 148M | sad_array[t] += |
1228 | 148M | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1229 | | |
1230 | 148M | if (sad_array[t] < bestsad) { |
1231 | 49.1M | bestsad = sad_array[t]; |
1232 | 49.1M | best_site = i; |
1233 | 49.1M | } |
1234 | 148M | } |
1235 | 445M | } |
1236 | 111M | } |
1237 | 55.6M | } else { |
1238 | 142M | for (j = 0; j < x->searches_per_step; ++j) { |
1239 | | /* Trap illegal vectors */ |
1240 | 126M | this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
1241 | 126M | this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
1242 | | |
1243 | 126M | if ((this_col_offset > x->mv_col_min) && |
1244 | 126M | (this_col_offset < x->mv_col_max) && |
1245 | 126M | (this_row_offset > x->mv_row_min) && |
1246 | 126M | (this_row_offset < x->mv_row_max)) { |
1247 | 48.0M | check_here = ss[i].offset + best_address; |
1248 | 48.0M | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1249 | | |
1250 | 48.0M | if (thissad < bestsad) { |
1251 | 18.6M | this_mv.as_mv.row = this_row_offset; |
1252 | 18.6M | this_mv.as_mv.col = this_col_offset; |
1253 | 18.6M | thissad += |
1254 | 18.6M | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1255 | | |
1256 | 18.6M | if (thissad < bestsad) { |
1257 | 8.04M | bestsad = thissad; |
1258 | 8.04M | best_site = i; |
1259 | 8.04M | } |
1260 | 18.6M | } |
1261 | 48.0M | } |
1262 | 126M | i++; |
1263 | 126M | } |
1264 | 15.8M | } |
1265 | | |
1266 | 71.5M | if (best_site != last_site) { |
1267 | 37.6M | best_mv->as_mv.row += ss[best_site].mv.row; |
1268 | 37.6M | best_mv->as_mv.col += ss[best_site].mv.col; |
1269 | 37.6M | best_address += ss[best_site].offset; |
1270 | 37.6M | last_site = best_site; |
1271 | 37.6M | } else if (best_address == in_what) { |
1272 | 14.8M | (*num00)++; |
1273 | 14.8M | } |
1274 | 71.5M | } |
1275 | | |
1276 | 18.9M | this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1277 | 18.9M | this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1278 | | |
1279 | 18.9M | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + |
1280 | 18.9M | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1281 | 18.9M | } |
1282 | | #endif // HAVE_SSE2 || HAVE_MSA || HAVE_LSX |
1283 | | |
1284 | | int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
1285 | | int sad_per_bit, int distance, |
1286 | | vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], |
1287 | 0 | int_mv *center_mv) { |
1288 | 0 | unsigned char *what = (*(b->base_src) + b->src); |
1289 | 0 | int what_stride = b->src_stride; |
1290 | 0 | unsigned char *in_what; |
1291 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
1292 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1293 | 0 | int in_what_stride = pre_stride; |
1294 | 0 | int mv_stride = pre_stride; |
1295 | 0 | unsigned char *bestaddress; |
1296 | 0 | int_mv *best_mv = &d->bmi.mv; |
1297 | 0 | int_mv this_mv; |
1298 | 0 | unsigned int bestsad; |
1299 | 0 | unsigned int thissad; |
1300 | 0 | int r, c; |
1301 | |
|
1302 | 0 | unsigned char *check_here; |
1303 | |
|
1304 | 0 | int ref_row = ref_mv->as_mv.row; |
1305 | 0 | int ref_col = ref_mv->as_mv.col; |
1306 | |
|
1307 | 0 | int row_min = ref_row - distance; |
1308 | 0 | int row_max = ref_row + distance; |
1309 | 0 | int col_min = ref_col - distance; |
1310 | 0 | int col_max = ref_col + distance; |
1311 | |
|
1312 | 0 | int *mvsadcost[2]; |
1313 | 0 | int_mv fcenter_mv; |
1314 | |
|
1315 | 0 | mvsadcost[0] = x->mvsadcost[0]; |
1316 | 0 | mvsadcost[1] = x->mvsadcost[1]; |
1317 | 0 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1318 | 0 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1319 | | |
1320 | | /* Work out the mid point for the search */ |
1321 | 0 | in_what = base_pre + d->offset; |
1322 | 0 | bestaddress = in_what + (ref_row * pre_stride) + ref_col; |
1323 | |
|
1324 | 0 | best_mv->as_mv.row = ref_row; |
1325 | 0 | best_mv->as_mv.col = ref_col; |
1326 | | |
1327 | | /* Baseline value at the centre */ |
1328 | 0 | bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + |
1329 | 0 | mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1330 | | |
1331 | | /* Apply further limits to prevent us looking using vectors that stretch |
1332 | | * beyond the UMV border |
1333 | | */ |
1334 | 0 | if (col_min < x->mv_col_min) col_min = x->mv_col_min; |
1335 | |
|
1336 | 0 | if (col_max > x->mv_col_max) col_max = x->mv_col_max; |
1337 | |
|
1338 | 0 | if (row_min < x->mv_row_min) row_min = x->mv_row_min; |
1339 | |
|
1340 | 0 | if (row_max > x->mv_row_max) row_max = x->mv_row_max; |
1341 | |
|
1342 | 0 | for (r = row_min; r < row_max; ++r) { |
1343 | 0 | this_mv.as_mv.row = r; |
1344 | 0 | check_here = r * mv_stride + in_what + col_min; |
1345 | |
|
1346 | 0 | for (c = col_min; c < col_max; ++c) { |
1347 | 0 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1348 | |
|
1349 | 0 | if (thissad < bestsad) { |
1350 | 0 | this_mv.as_mv.col = c; |
1351 | 0 | thissad += |
1352 | 0 | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1353 | |
|
1354 | 0 | if (thissad < bestsad) { |
1355 | 0 | bestsad = thissad; |
1356 | 0 | best_mv->as_mv.row = r; |
1357 | 0 | best_mv->as_mv.col = c; |
1358 | 0 | bestaddress = check_here; |
1359 | 0 | } |
1360 | 0 | } |
1361 | |
|
1362 | 0 | check_here++; |
1363 | 0 | } |
1364 | 0 | } |
1365 | |
|
1366 | 0 | this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1367 | 0 | this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1368 | |
|
1369 | 0 | return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + |
1370 | 0 | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1371 | 0 | } |
1372 | | |
1373 | | int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
1374 | | int_mv *ref_mv, int error_per_bit, |
1375 | | int search_range, vp8_variance_fn_ptr_t *fn_ptr, |
1376 | 0 | int *mvcost[2], int_mv *center_mv) { |
1377 | 0 | MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; |
1378 | 0 | int i, j; |
1379 | 0 | short this_row_offset, this_col_offset; |
1380 | |
|
1381 | 0 | int what_stride = b->src_stride; |
1382 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
1383 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1384 | 0 | int in_what_stride = pre_stride; |
1385 | 0 | unsigned char *what = (*(b->base_src) + b->src); |
1386 | 0 | unsigned char *best_address = |
1387 | 0 | (unsigned char *)(base_pre + d->offset + |
1388 | 0 | (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); |
1389 | 0 | unsigned char *check_here; |
1390 | 0 | int_mv this_mv; |
1391 | 0 | unsigned int bestsad; |
1392 | 0 | unsigned int thissad; |
1393 | |
|
1394 | 0 | int *mvsadcost[2]; |
1395 | 0 | int_mv fcenter_mv; |
1396 | |
|
1397 | 0 | mvsadcost[0] = x->mvsadcost[0]; |
1398 | 0 | mvsadcost[1] = x->mvsadcost[1]; |
1399 | 0 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1400 | 0 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1401 | |
|
1402 | 0 | bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + |
1403 | 0 | mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1404 | |
|
1405 | 0 | for (i = 0; i < search_range; ++i) { |
1406 | 0 | int best_site = -1; |
1407 | |
|
1408 | 0 | for (j = 0; j < 4; ++j) { |
1409 | 0 | this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
1410 | 0 | this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
1411 | |
|
1412 | 0 | if ((this_col_offset > x->mv_col_min) && |
1413 | 0 | (this_col_offset < x->mv_col_max) && |
1414 | 0 | (this_row_offset > x->mv_row_min) && |
1415 | 0 | (this_row_offset < x->mv_row_max)) { |
1416 | 0 | check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
1417 | 0 | best_address; |
1418 | 0 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1419 | |
|
1420 | 0 | if (thissad < bestsad) { |
1421 | 0 | this_mv.as_mv.row = this_row_offset; |
1422 | 0 | this_mv.as_mv.col = this_col_offset; |
1423 | 0 | thissad += |
1424 | 0 | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1425 | |
|
1426 | 0 | if (thissad < bestsad) { |
1427 | 0 | bestsad = thissad; |
1428 | 0 | best_site = j; |
1429 | 0 | } |
1430 | 0 | } |
1431 | 0 | } |
1432 | 0 | } |
1433 | |
|
1434 | 0 | if (best_site == -1) { |
1435 | 0 | break; |
1436 | 0 | } else { |
1437 | 0 | ref_mv->as_mv.row += neighbors[best_site].row; |
1438 | 0 | ref_mv->as_mv.col += neighbors[best_site].col; |
1439 | 0 | best_address += (neighbors[best_site].row) * in_what_stride + |
1440 | 0 | neighbors[best_site].col; |
1441 | 0 | } |
1442 | 0 | } |
1443 | |
|
1444 | 0 | this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1445 | 0 | this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1446 | |
|
1447 | 0 | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + |
1448 | 0 | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1449 | 0 | } |
1450 | | |
1451 | | #if HAVE_SSE2 || HAVE_MSA |
1452 | | int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
1453 | | int_mv *ref_mv, int error_per_bit, |
1454 | | int search_range, vp8_variance_fn_ptr_t *fn_ptr, |
1455 | 503k | int *mvcost[2], int_mv *center_mv) { |
1456 | 503k | MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; |
1457 | 503k | int i, j; |
1458 | 503k | short this_row_offset, this_col_offset; |
1459 | | |
1460 | 503k | int what_stride = b->src_stride; |
1461 | 503k | int pre_stride = x->e_mbd.pre.y_stride; |
1462 | 503k | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1463 | 503k | int in_what_stride = pre_stride; |
1464 | 503k | unsigned char *what = (*(b->base_src) + b->src); |
1465 | 503k | unsigned char *best_address = |
1466 | 503k | (unsigned char *)(base_pre + d->offset + |
1467 | 503k | (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); |
1468 | 503k | unsigned char *check_here; |
1469 | 503k | int_mv this_mv; |
1470 | 503k | unsigned int bestsad; |
1471 | 503k | unsigned int thissad; |
1472 | | |
1473 | 503k | int *mvsadcost[2]; |
1474 | 503k | int_mv fcenter_mv; |
1475 | | |
1476 | 503k | mvsadcost[0] = x->mvsadcost[0]; |
1477 | 503k | mvsadcost[1] = x->mvsadcost[1]; |
1478 | 503k | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1479 | 503k | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1480 | | |
1481 | 503k | bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + |
1482 | 503k | mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1483 | | |
1484 | 642k | for (i = 0; i < search_range; ++i) { |
1485 | 635k | int best_site = -1; |
1486 | 635k | int all_in = 1; |
1487 | | |
1488 | 635k | all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min); |
1489 | 635k | all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max); |
1490 | 635k | all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min); |
1491 | 635k | all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max); |
1492 | | |
1493 | 635k | if (all_in) { |
1494 | 550k | unsigned int sad_array[4]; |
1495 | 550k | const unsigned char *block_offset[4]; |
1496 | 550k | block_offset[0] = best_address - in_what_stride; |
1497 | 550k | block_offset[1] = best_address - 1; |
1498 | 550k | block_offset[2] = best_address + 1; |
1499 | 550k | block_offset[3] = best_address + in_what_stride; |
1500 | | |
1501 | 550k | fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
1502 | 550k | sad_array); |
1503 | | |
1504 | 2.75M | for (j = 0; j < 4; ++j) { |
1505 | 2.20M | if (sad_array[j] < bestsad) { |
1506 | 767k | this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; |
1507 | 767k | this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; |
1508 | 767k | sad_array[j] += |
1509 | 767k | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1510 | | |
1511 | 767k | if (sad_array[j] < bestsad) { |
1512 | 134k | bestsad = sad_array[j]; |
1513 | 134k | best_site = j; |
1514 | 134k | } |
1515 | 767k | } |
1516 | 2.20M | } |
1517 | 550k | } else { |
1518 | 425k | for (j = 0; j < 4; ++j) { |
1519 | 340k | this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
1520 | 340k | this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
1521 | | |
1522 | 340k | if ((this_col_offset > x->mv_col_min) && |
1523 | 340k | (this_col_offset < x->mv_col_max) && |
1524 | 340k | (this_row_offset > x->mv_row_min) && |
1525 | 340k | (this_row_offset < x->mv_row_max)) { |
1526 | 252k | check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
1527 | 252k | best_address; |
1528 | 252k | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1529 | | |
1530 | 252k | if (thissad < bestsad) { |
1531 | 94.4k | this_mv.as_mv.row = this_row_offset; |
1532 | 94.4k | this_mv.as_mv.col = this_col_offset; |
1533 | 94.4k | thissad += |
1534 | 94.4k | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1535 | | |
1536 | 94.4k | if (thissad < bestsad) { |
1537 | 11.1k | bestsad = thissad; |
1538 | 11.1k | best_site = j; |
1539 | 11.1k | } |
1540 | 94.4k | } |
1541 | 252k | } |
1542 | 340k | } |
1543 | 85.1k | } |
1544 | | |
1545 | 635k | if (best_site == -1) { |
1546 | 496k | break; |
1547 | 496k | } else { |
1548 | 139k | ref_mv->as_mv.row += neighbors[best_site].row; |
1549 | 139k | ref_mv->as_mv.col += neighbors[best_site].col; |
1550 | 139k | best_address += (neighbors[best_site].row) * in_what_stride + |
1551 | 139k | neighbors[best_site].col; |
1552 | 139k | } |
1553 | 635k | } |
1554 | | |
1555 | 503k | this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1556 | 503k | this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1557 | | |
1558 | 503k | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + |
1559 | 503k | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1560 | 503k | } |
1561 | | #endif // HAVE_SSE2 || HAVE_MSA |