/src/libvpx/vp8/encoder/mcomp.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "./vp8_rtcd.h" |
12 | | #include "./vpx_dsp_rtcd.h" |
13 | | #include "onyx_int.h" |
14 | | #include "mcomp.h" |
15 | | #include "vpx_mem/vpx_mem.h" |
16 | | #include "vpx_config.h" |
17 | | #include <stdio.h> |
18 | | #include <limits.h> |
19 | | #include <math.h> |
20 | | #include "vp8/common/findnearmv.h" |
21 | | #include "vp8/common/common.h" |
22 | | #include "vpx_dsp/vpx_dsp_common.h" |
23 | | |
24 | 10.0M | int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) { |
25 | | /* MV costing is based on the distribution of vectors in the previous |
26 | | * frame and as such will tend to over state the cost of vectors. In |
27 | | * addition coding a new vector can have a knock on effect on the cost |
28 | | * of subsequent vectors and the quality of prediction from NEAR and |
29 | | * NEAREST for subsequent blocks. The "Weight" parameter allows, to a |
30 | | * limited extent, for some account to be taken of these factors. |
31 | | */ |
32 | 10.0M | const int mv_idx_row = |
33 | 10.0M | clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); |
34 | 10.0M | const int mv_idx_col = |
35 | 10.0M | clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); |
36 | 10.0M | return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7; |
37 | 10.0M | } |
38 | | |
39 | | static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], |
40 | 35.2M | int error_per_bit) { |
41 | | /* Ignore mv costing if mvcost is NULL */ |
42 | 35.2M | if (mvcost) { |
43 | 35.2M | const int mv_idx_row = |
44 | 35.2M | clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); |
45 | 35.2M | const int mv_idx_col = |
46 | 35.2M | clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); |
47 | 35.2M | return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit + |
48 | 35.2M | 128) >> |
49 | 35.2M | 8; |
50 | 35.2M | } |
51 | 0 | return 0; |
52 | 35.2M | } |
53 | | |
54 | | static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], |
55 | 250M | int error_per_bit) { |
56 | | /* Calculate sad error cost on full pixel basis. */ |
57 | | /* Ignore mv costing if mvsadcost is NULL */ |
58 | 250M | if (mvsadcost) { |
59 | 250M | return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + |
60 | 250M | mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) * |
61 | 250M | error_per_bit + |
62 | 250M | 128) >> |
63 | 250M | 8; |
64 | 250M | } |
65 | 0 | return 0; |
66 | 250M | } |
67 | | |
68 | 0 | void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) { |
69 | 0 | int Len; |
70 | 0 | int search_site_count = 0; |
71 | | |
72 | | /* Generate offsets for 4 search sites per step. */ |
73 | 0 | Len = MAX_FIRST_STEP; |
74 | 0 | x->ss[search_site_count].mv.col = 0; |
75 | 0 | x->ss[search_site_count].mv.row = 0; |
76 | 0 | x->ss[search_site_count].offset = 0; |
77 | 0 | search_site_count++; |
78 | |
|
79 | 0 | while (Len > 0) { |
80 | | /* Compute offsets for search sites. */ |
81 | 0 | x->ss[search_site_count].mv.col = 0; |
82 | 0 | x->ss[search_site_count].mv.row = -Len; |
83 | 0 | x->ss[search_site_count].offset = -Len * stride; |
84 | 0 | search_site_count++; |
85 | | |
86 | | /* Compute offsets for search sites. */ |
87 | 0 | x->ss[search_site_count].mv.col = 0; |
88 | 0 | x->ss[search_site_count].mv.row = Len; |
89 | 0 | x->ss[search_site_count].offset = Len * stride; |
90 | 0 | search_site_count++; |
91 | | |
92 | | /* Compute offsets for search sites. */ |
93 | 0 | x->ss[search_site_count].mv.col = -Len; |
94 | 0 | x->ss[search_site_count].mv.row = 0; |
95 | 0 | x->ss[search_site_count].offset = -Len; |
96 | 0 | search_site_count++; |
97 | | |
98 | | /* Compute offsets for search sites. */ |
99 | 0 | x->ss[search_site_count].mv.col = Len; |
100 | 0 | x->ss[search_site_count].mv.row = 0; |
101 | 0 | x->ss[search_site_count].offset = Len; |
102 | 0 | search_site_count++; |
103 | | |
104 | | /* Contract. */ |
105 | 0 | Len /= 2; |
106 | 0 | } |
107 | |
|
108 | 0 | x->ss_count = search_site_count; |
109 | 0 | x->searches_per_step = 4; |
110 | 0 | } |
111 | | |
112 | 121k | void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) { |
113 | 121k | int Len; |
114 | 121k | int search_site_count = 0; |
115 | | |
116 | | /* Generate offsets for 8 search sites per step. */ |
117 | 121k | Len = MAX_FIRST_STEP; |
118 | 121k | x->ss[search_site_count].mv.col = 0; |
119 | 121k | x->ss[search_site_count].mv.row = 0; |
120 | 121k | x->ss[search_site_count].offset = 0; |
121 | 121k | search_site_count++; |
122 | | |
123 | 1.09M | while (Len > 0) { |
124 | | /* Compute offsets for search sites. */ |
125 | 968k | x->ss[search_site_count].mv.col = 0; |
126 | 968k | x->ss[search_site_count].mv.row = -Len; |
127 | 968k | x->ss[search_site_count].offset = -Len * stride; |
128 | 968k | search_site_count++; |
129 | | |
130 | | /* Compute offsets for search sites. */ |
131 | 968k | x->ss[search_site_count].mv.col = 0; |
132 | 968k | x->ss[search_site_count].mv.row = Len; |
133 | 968k | x->ss[search_site_count].offset = Len * stride; |
134 | 968k | search_site_count++; |
135 | | |
136 | | /* Compute offsets for search sites. */ |
137 | 968k | x->ss[search_site_count].mv.col = -Len; |
138 | 968k | x->ss[search_site_count].mv.row = 0; |
139 | 968k | x->ss[search_site_count].offset = -Len; |
140 | 968k | search_site_count++; |
141 | | |
142 | | /* Compute offsets for search sites. */ |
143 | 968k | x->ss[search_site_count].mv.col = Len; |
144 | 968k | x->ss[search_site_count].mv.row = 0; |
145 | 968k | x->ss[search_site_count].offset = Len; |
146 | 968k | search_site_count++; |
147 | | |
148 | | /* Compute offsets for search sites. */ |
149 | 968k | x->ss[search_site_count].mv.col = -Len; |
150 | 968k | x->ss[search_site_count].mv.row = -Len; |
151 | 968k | x->ss[search_site_count].offset = -Len * stride - Len; |
152 | 968k | search_site_count++; |
153 | | |
154 | | /* Compute offsets for search sites. */ |
155 | 968k | x->ss[search_site_count].mv.col = Len; |
156 | 968k | x->ss[search_site_count].mv.row = -Len; |
157 | 968k | x->ss[search_site_count].offset = -Len * stride + Len; |
158 | 968k | search_site_count++; |
159 | | |
160 | | /* Compute offsets for search sites. */ |
161 | 968k | x->ss[search_site_count].mv.col = -Len; |
162 | 968k | x->ss[search_site_count].mv.row = Len; |
163 | 968k | x->ss[search_site_count].offset = Len * stride - Len; |
164 | 968k | search_site_count++; |
165 | | |
166 | | /* Compute offsets for search sites. */ |
167 | 968k | x->ss[search_site_count].mv.col = Len; |
168 | 968k | x->ss[search_site_count].mv.row = Len; |
169 | 968k | x->ss[search_site_count].offset = Len * stride + Len; |
170 | 968k | search_site_count++; |
171 | | |
172 | | /* Contract. */ |
173 | 968k | Len /= 2; |
174 | 968k | } |
175 | | |
176 | 121k | x->ss_count = search_site_count; |
177 | 121k | x->searches_per_step = 8; |
178 | 121k | } |
179 | | |
180 | | /* |
181 | | * To avoid the penalty for crossing cache-line read, preload the reference |
182 | | * area in a small buffer, which is aligned to make sure there won't be crossing |
183 | | * cache-line read while reading from this buffer. This reduced the cpu |
184 | | * cycles spent on reading ref data in sub-pixel filter functions. |
185 | | * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x |
186 | | * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we |
187 | | * could reduce the area. |
188 | | */ |
189 | | |
190 | | /* estimated cost of a motion vector (r,c) */ |
191 | | #define MVC(r, c) \ |
192 | | (mvcost ? ((mvcost[0][(r) - rr] + mvcost[1][(c) - rc]) * error_per_bit + \ |
193 | | 128) >> \ |
194 | | 8 \ |
195 | | : 0) |
196 | | /* pointer to predictor base of a motionvector */ |
197 | | #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset))) |
198 | | /* convert motion vector component to offset for svf calc */ |
199 | | #define SP(x) (((x) & 3) << 1) |
200 | | /* returns subpixel variance error function. */ |
201 | | #define DIST(r, c) \ |
202 | | vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse) |
203 | | #define IFMVCV(r, c, s, e) \ |
204 | 443M | if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; |
205 | | /* returns distortion + motion vector cost */ |
206 | | #define ERR(r, c) (MVC(r, c) + DIST(r, c)) |
207 | | /* checks if (r,c) has better score than previous best */ |
208 | | #define CHECK_BETTER(v, r, c) \ |
209 | 108M | do { \ |
210 | 108M | IFMVCV( \ |
211 | 108M | r, c, \ |
212 | 108M | { \ |
213 | 108M | thismse = DIST(r, c); \ |
214 | 108M | if ((v = (MVC(r, c) + thismse)) < besterr) { \ |
215 | 108M | besterr = v; \ |
216 | 108M | br = r; \ |
217 | 108M | bc = c; \ |
218 | 108M | *distortion = thismse; \ |
219 | 108M | *sse1 = sse; \ |
220 | 108M | } \ |
221 | 108M | }, \ |
222 | 108M | v = UINT_MAX;) \ |
223 | 108M | } while (0) |
224 | | |
225 | | int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
226 | | int_mv *bestmv, int_mv *ref_mv, |
227 | | int error_per_bit, |
228 | | const vp8_variance_fn_ptr_t *vfp, |
229 | | int *mvcost[2], int *distortion, |
230 | 7.31M | unsigned int *sse1) { |
231 | 7.31M | unsigned char *z = (*(b->base_src) + b->src); |
232 | | |
233 | 7.31M | int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; |
234 | 7.31M | int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4; |
235 | 7.31M | int tr = br, tc = bc; |
236 | 7.31M | unsigned int besterr; |
237 | 7.31M | unsigned int left, right, up, down, diag; |
238 | 7.31M | unsigned int sse; |
239 | 7.31M | unsigned int whichdir; |
240 | 7.31M | unsigned int halfiters = 4; |
241 | 7.31M | unsigned int quarteriters = 4; |
242 | 7.31M | int thismse; |
243 | | |
244 | 7.31M | int minc = VPXMAX(x->mv_col_min * 4, |
245 | 7.31M | (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); |
246 | 7.31M | int maxc = VPXMIN(x->mv_col_max * 4, |
247 | 7.31M | (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); |
248 | 7.31M | int minr = VPXMAX(x->mv_row_min * 4, |
249 | 7.31M | (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); |
250 | 7.31M | int maxr = VPXMIN(x->mv_row_max * 4, |
251 | 7.31M | (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); |
252 | | |
253 | 7.31M | int y_stride; |
254 | 7.31M | int offset; |
255 | 7.31M | int pre_stride = x->e_mbd.pre.y_stride; |
256 | 7.31M | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
257 | | |
258 | 7.31M | #if VPX_ARCH_X86 || VPX_ARCH_X86_64 |
259 | 7.31M | MACROBLOCKD *xd = &x->e_mbd; |
260 | 7.31M | unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
261 | 7.31M | bestmv->as_mv.col; |
262 | 7.31M | unsigned char *y; |
263 | 7.31M | int buf_r1, buf_r2, buf_c1; |
264 | | |
265 | | /* Clamping to avoid out-of-range data access */ |
266 | 7.31M | buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min) |
267 | 7.31M | ? (bestmv->as_mv.row - x->mv_row_min) |
268 | 7.31M | : 3; |
269 | 7.31M | buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max) |
270 | 7.31M | ? (x->mv_row_max - bestmv->as_mv.row) |
271 | 7.31M | : 3; |
272 | 7.31M | buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min) |
273 | 7.31M | ? (bestmv->as_mv.col - x->mv_col_min) |
274 | 7.31M | : 3; |
275 | 7.31M | y_stride = 32; |
276 | | |
277 | | /* Copy to intermediate buffer before searching. */ |
278 | 7.31M | vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf, |
279 | 7.31M | y_stride, 16 + buf_r1 + buf_r2); |
280 | 7.31M | y = xd->y_buf + y_stride * buf_r1 + buf_c1; |
281 | | #else |
282 | | unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
283 | | bestmv->as_mv.col; |
284 | | y_stride = pre_stride; |
285 | | #endif |
286 | | |
287 | 7.31M | offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; |
288 | | |
289 | | /* central mv */ |
290 | 7.31M | bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
291 | 7.31M | bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
292 | | |
293 | | /* calculate central point error */ |
294 | 7.31M | besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
295 | 7.31M | *distortion = besterr; |
296 | 7.31M | besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); |
297 | | |
298 | | /* TODO: Each subsequent iteration checks at least one point in common |
299 | | * with the last iteration could be 2 ( if diag selected) |
300 | | */ |
301 | 11.3M | while (--halfiters) { |
302 | | /* 1/2 pel */ |
303 | 11.2M | CHECK_BETTER(left, tr, tc - 2); |
304 | 11.2M | CHECK_BETTER(right, tr, tc + 2); |
305 | 11.2M | CHECK_BETTER(up, tr - 2, tc); |
306 | 11.2M | CHECK_BETTER(down, tr + 2, tc); |
307 | | |
308 | 11.2M | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
309 | | |
310 | 11.2M | switch (whichdir) { |
311 | 2.51M | case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break; |
312 | 2.62M | case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break; |
313 | 2.84M | case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break; |
314 | 3.22M | case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break; |
315 | 11.2M | } |
316 | | |
317 | | /* no reason to check the same one again. */ |
318 | 11.2M | if (tr == br && tc == bc) break; |
319 | | |
320 | 4.04M | tr = br; |
321 | 4.04M | tc = bc; |
322 | 4.04M | } |
323 | | |
324 | | /* TODO: Each subsequent iteration checks at least one point in common |
325 | | * with the last iteration could be 2 ( if diag selected) |
326 | | */ |
327 | | |
328 | | /* 1/4 pel */ |
329 | 10.4M | while (--quarteriters) { |
330 | 10.4M | CHECK_BETTER(left, tr, tc - 1); |
331 | 10.4M | CHECK_BETTER(right, tr, tc + 1); |
332 | 10.4M | CHECK_BETTER(up, tr - 1, tc); |
333 | 10.4M | CHECK_BETTER(down, tr + 1, tc); |
334 | | |
335 | 10.4M | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
336 | | |
337 | 10.4M | switch (whichdir) { |
338 | 2.33M | case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break; |
339 | 2.43M | case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break; |
340 | 2.56M | case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break; |
341 | 3.07M | case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break; |
342 | 10.4M | } |
343 | | |
344 | | /* no reason to check the same one again. */ |
345 | 10.4M | if (tr == br && tc == bc) break; |
346 | | |
347 | 3.14M | tr = br; |
348 | 3.14M | tc = bc; |
349 | 3.14M | } |
350 | | |
351 | 7.31M | bestmv->as_mv.row = clamp(br * 2, SHRT_MIN, SHRT_MAX); |
352 | 7.31M | bestmv->as_mv.col = clamp(bc * 2, SHRT_MIN, SHRT_MAX); |
353 | | |
354 | 7.31M | if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || |
355 | 7.31M | (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) { |
356 | 983 | return INT_MAX; |
357 | 983 | } |
358 | | |
359 | 7.31M | return besterr; |
360 | 7.31M | } |
361 | | #undef MVC |
362 | | #undef PRE |
363 | | #undef SP |
364 | | #undef DIST |
365 | | #undef IFMVCV |
366 | | #undef ERR |
367 | | #undef CHECK_BETTER |
368 | | |
369 | | int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
370 | | int_mv *bestmv, int_mv *ref_mv, |
371 | | int error_per_bit, |
372 | | const vp8_variance_fn_ptr_t *vfp, |
373 | | int *mvcost[2], int *distortion, |
374 | 0 | unsigned int *sse1) { |
375 | 0 | int bestmse = INT_MAX; |
376 | 0 | int_mv startmv; |
377 | 0 | int_mv this_mv; |
378 | 0 | unsigned char *z = (*(b->base_src) + b->src); |
379 | 0 | int left, right, up, down, diag; |
380 | 0 | unsigned int sse; |
381 | 0 | int whichdir; |
382 | 0 | int thismse; |
383 | 0 | int y_stride; |
384 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
385 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
386 | |
|
387 | 0 | #if VPX_ARCH_X86 || VPX_ARCH_X86_64 |
388 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
389 | 0 | unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
390 | 0 | bestmv->as_mv.col; |
391 | 0 | unsigned char *y; |
392 | |
|
393 | 0 | y_stride = 32; |
394 | | /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ |
395 | 0 | vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); |
396 | 0 | y = xd->y_buf + y_stride + 1; |
397 | | #else |
398 | | unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
399 | | bestmv->as_mv.col; |
400 | | y_stride = pre_stride; |
401 | | #endif |
402 | | |
403 | | /* central mv */ |
404 | 0 | bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
405 | 0 | bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
406 | 0 | startmv = *bestmv; |
407 | | |
408 | | /* calculate central point error */ |
409 | 0 | bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
410 | 0 | *distortion = bestmse; |
411 | 0 | bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); |
412 | | |
413 | | /* go left then right and check error */ |
414 | 0 | this_mv.as_mv.row = startmv.as_mv.row; |
415 | 0 | this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); |
416 | | /* "halfpix" horizontal variance */ |
417 | 0 | thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse); |
418 | 0 | left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
419 | |
|
420 | 0 | if (left < bestmse) { |
421 | 0 | *bestmv = this_mv; |
422 | 0 | bestmse = left; |
423 | 0 | *distortion = thismse; |
424 | 0 | *sse1 = sse; |
425 | 0 | } |
426 | |
|
427 | 0 | this_mv.as_mv.col += 8; |
428 | | /* "halfpix" horizontal variance */ |
429 | 0 | thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse); |
430 | 0 | right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
431 | |
|
432 | 0 | if (right < bestmse) { |
433 | 0 | *bestmv = this_mv; |
434 | 0 | bestmse = right; |
435 | 0 | *distortion = thismse; |
436 | 0 | *sse1 = sse; |
437 | 0 | } |
438 | | |
439 | | /* go up then down and check error */ |
440 | 0 | this_mv.as_mv.col = startmv.as_mv.col; |
441 | 0 | this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); |
442 | | /* "halfpix" vertical variance */ |
443 | 0 | thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse); |
444 | 0 | up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
445 | |
|
446 | 0 | if (up < bestmse) { |
447 | 0 | *bestmv = this_mv; |
448 | 0 | bestmse = up; |
449 | 0 | *distortion = thismse; |
450 | 0 | *sse1 = sse; |
451 | 0 | } |
452 | |
|
453 | 0 | this_mv.as_mv.row += 8; |
454 | | /* "halfpix" vertical variance */ |
455 | 0 | thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse); |
456 | 0 | down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
457 | |
|
458 | 0 | if (down < bestmse) { |
459 | 0 | *bestmv = this_mv; |
460 | 0 | bestmse = down; |
461 | 0 | *distortion = thismse; |
462 | 0 | *sse1 = sse; |
463 | 0 | } |
464 | | |
465 | | /* now check 1 more diagonal */ |
466 | 0 | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
467 | 0 | this_mv = startmv; |
468 | |
|
469 | 0 | switch (whichdir) { |
470 | 0 | case 0: |
471 | 0 | this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
472 | 0 | this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
473 | | /* "halfpix" horizontal/vertical variance */ |
474 | 0 | thismse = |
475 | 0 | vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); |
476 | 0 | break; |
477 | 0 | case 1: |
478 | 0 | this_mv.as_mv.col += 4; |
479 | 0 | this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
480 | | /* "halfpix" horizontal/vertical variance */ |
481 | 0 | thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); |
482 | 0 | break; |
483 | 0 | case 2: |
484 | 0 | this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
485 | 0 | this_mv.as_mv.row += 4; |
486 | | /* "halfpix" horizontal/vertical variance */ |
487 | 0 | thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse); |
488 | 0 | break; |
489 | 0 | case 3: |
490 | 0 | default: |
491 | 0 | this_mv.as_mv.col += 4; |
492 | 0 | this_mv.as_mv.row += 4; |
493 | | /* "halfpix" horizontal/vertical variance */ |
494 | 0 | thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse); |
495 | 0 | break; |
496 | 0 | } |
497 | | |
498 | 0 | diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
499 | |
|
500 | 0 | if (diag < bestmse) { |
501 | 0 | *bestmv = this_mv; |
502 | 0 | bestmse = diag; |
503 | 0 | *distortion = thismse; |
504 | 0 | *sse1 = sse; |
505 | 0 | } |
506 | | |
507 | | /* time to check quarter pels. */ |
508 | 0 | if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride; |
509 | |
|
510 | 0 | if (bestmv->as_mv.col < startmv.as_mv.col) y--; |
511 | |
|
512 | 0 | startmv = *bestmv; |
513 | | |
514 | | /* go left then right and check error */ |
515 | 0 | this_mv.as_mv.row = startmv.as_mv.row; |
516 | |
|
517 | 0 | if (startmv.as_mv.col & 7) { |
518 | 0 | this_mv.as_mv.col = startmv.as_mv.col - 2; |
519 | 0 | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
520 | 0 | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
521 | 0 | } else { |
522 | 0 | this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
523 | 0 | thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, |
524 | 0 | b->src_stride, &sse); |
525 | 0 | } |
526 | |
|
527 | 0 | left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
528 | |
|
529 | 0 | if (left < bestmse) { |
530 | 0 | *bestmv = this_mv; |
531 | 0 | bestmse = left; |
532 | 0 | *distortion = thismse; |
533 | 0 | *sse1 = sse; |
534 | 0 | } |
535 | |
|
536 | 0 | this_mv.as_mv.col += 4; |
537 | 0 | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, |
538 | 0 | z, b->src_stride, &sse); |
539 | 0 | right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
540 | |
|
541 | 0 | if (right < bestmse) { |
542 | 0 | *bestmv = this_mv; |
543 | 0 | bestmse = right; |
544 | 0 | *distortion = thismse; |
545 | 0 | *sse1 = sse; |
546 | 0 | } |
547 | | |
548 | | /* go up then down and check error */ |
549 | 0 | this_mv.as_mv.col = startmv.as_mv.col; |
550 | |
|
551 | 0 | if (startmv.as_mv.row & 7) { |
552 | 0 | this_mv.as_mv.row = startmv.as_mv.row - 2; |
553 | 0 | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
554 | 0 | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
555 | 0 | } else { |
556 | 0 | this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
557 | 0 | thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, |
558 | 0 | b->src_stride, &sse); |
559 | 0 | } |
560 | |
|
561 | 0 | up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
562 | |
|
563 | 0 | if (up < bestmse) { |
564 | 0 | *bestmv = this_mv; |
565 | 0 | bestmse = up; |
566 | 0 | *distortion = thismse; |
567 | 0 | *sse1 = sse; |
568 | 0 | } |
569 | |
|
570 | 0 | this_mv.as_mv.row += 4; |
571 | 0 | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, |
572 | 0 | z, b->src_stride, &sse); |
573 | 0 | down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
574 | |
|
575 | 0 | if (down < bestmse) { |
576 | 0 | *bestmv = this_mv; |
577 | 0 | bestmse = down; |
578 | 0 | *distortion = thismse; |
579 | 0 | *sse1 = sse; |
580 | 0 | } |
581 | | |
582 | | /* now check 1 more diagonal */ |
583 | 0 | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
584 | |
|
585 | 0 | this_mv = startmv; |
586 | |
|
587 | 0 | switch (whichdir) { |
588 | 0 | case 0: |
589 | |
|
590 | 0 | if (startmv.as_mv.row & 7) { |
591 | 0 | this_mv.as_mv.row -= 2; |
592 | |
|
593 | 0 | if (startmv.as_mv.col & 7) { |
594 | 0 | this_mv.as_mv.col -= 2; |
595 | 0 | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
596 | 0 | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
597 | 0 | } else { |
598 | 0 | this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
599 | 0 | thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, |
600 | 0 | b->src_stride, &sse); |
601 | 0 | } |
602 | 0 | } else { |
603 | 0 | this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
604 | |
|
605 | 0 | if (startmv.as_mv.col & 7) { |
606 | 0 | this_mv.as_mv.col -= 2; |
607 | 0 | thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, |
608 | 0 | z, b->src_stride, &sse); |
609 | 0 | } else { |
610 | 0 | this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
611 | 0 | thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, |
612 | 0 | &sse); |
613 | 0 | } |
614 | 0 | } |
615 | |
|
616 | 0 | break; |
617 | 0 | case 1: |
618 | 0 | this_mv.as_mv.col += 2; |
619 | |
|
620 | 0 | if (startmv.as_mv.row & 7) { |
621 | 0 | this_mv.as_mv.row -= 2; |
622 | 0 | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
623 | 0 | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
624 | 0 | } else { |
625 | 0 | this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; |
626 | 0 | thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, |
627 | 0 | b->src_stride, &sse); |
628 | 0 | } |
629 | |
|
630 | 0 | break; |
631 | 0 | case 2: |
632 | 0 | this_mv.as_mv.row += 2; |
633 | |
|
634 | 0 | if (startmv.as_mv.col & 7) { |
635 | 0 | this_mv.as_mv.col -= 2; |
636 | 0 | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
637 | 0 | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
638 | 0 | } else { |
639 | 0 | this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; |
640 | 0 | thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, |
641 | 0 | b->src_stride, &sse); |
642 | 0 | } |
643 | |
|
644 | 0 | break; |
645 | 0 | case 3: |
646 | 0 | this_mv.as_mv.col += 2; |
647 | 0 | this_mv.as_mv.row += 2; |
648 | 0 | thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, |
649 | 0 | this_mv.as_mv.row & 7, z, b->src_stride, &sse); |
650 | 0 | break; |
651 | 0 | } |
652 | | |
653 | 0 | diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
654 | |
|
655 | 0 | if (diag < bestmse) { |
656 | 0 | *bestmv = this_mv; |
657 | 0 | bestmse = diag; |
658 | 0 | *distortion = thismse; |
659 | 0 | *sse1 = sse; |
660 | 0 | } |
661 | |
|
662 | 0 | return bestmse; |
663 | 0 | } |
664 | | |
665 | | int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
666 | | int_mv *bestmv, int_mv *ref_mv, |
667 | | int error_per_bit, |
668 | | const vp8_variance_fn_ptr_t *vfp, |
669 | | int *mvcost[2], int *distortion, |
670 | 0 | unsigned int *sse1) { |
671 | 0 | int bestmse = INT_MAX; |
672 | 0 | int_mv startmv; |
673 | 0 | int_mv this_mv; |
674 | 0 | unsigned char *z = (*(b->base_src) + b->src); |
675 | 0 | int left, right, up, down, diag; |
676 | 0 | unsigned int sse; |
677 | 0 | int whichdir; |
678 | 0 | int thismse; |
679 | 0 | int y_stride; |
680 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
681 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
682 | |
|
683 | 0 | #if VPX_ARCH_X86 || VPX_ARCH_X86_64 |
684 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
685 | 0 | unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
686 | 0 | bestmv->as_mv.col; |
687 | 0 | unsigned char *y; |
688 | |
|
689 | 0 | y_stride = 32; |
690 | | /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ |
691 | 0 | vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); |
692 | 0 | y = xd->y_buf + y_stride + 1; |
693 | | #else |
694 | | unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + |
695 | | bestmv->as_mv.col; |
696 | | y_stride = pre_stride; |
697 | | #endif |
698 | | |
699 | | /* central mv */ |
700 | 0 | bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
701 | 0 | bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
702 | 0 | startmv = *bestmv; |
703 | | |
704 | | /* calculate central point error */ |
705 | 0 | bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); |
706 | 0 | *distortion = bestmse; |
707 | 0 | bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); |
708 | | |
709 | | /* go left then right and check error */ |
710 | 0 | this_mv.as_mv.row = startmv.as_mv.row; |
711 | 0 | this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); |
712 | | /* "halfpix" horizontal variance */ |
713 | 0 | thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse); |
714 | 0 | left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
715 | |
|
716 | 0 | if (left < bestmse) { |
717 | 0 | *bestmv = this_mv; |
718 | 0 | bestmse = left; |
719 | 0 | *distortion = thismse; |
720 | 0 | *sse1 = sse; |
721 | 0 | } |
722 | |
|
723 | 0 | this_mv.as_mv.col += 8; |
724 | | /* "halfpix" horizontal variance */ |
725 | 0 | thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse); |
726 | 0 | right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
727 | |
|
728 | 0 | if (right < bestmse) { |
729 | 0 | *bestmv = this_mv; |
730 | 0 | bestmse = right; |
731 | 0 | *distortion = thismse; |
732 | 0 | *sse1 = sse; |
733 | 0 | } |
734 | | |
735 | | /* go up then down and check error */ |
736 | 0 | this_mv.as_mv.col = startmv.as_mv.col; |
737 | 0 | this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); |
738 | | /* "halfpix" vertical variance */ |
739 | 0 | thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse); |
740 | 0 | up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
741 | |
|
742 | 0 | if (up < bestmse) { |
743 | 0 | *bestmv = this_mv; |
744 | 0 | bestmse = up; |
745 | 0 | *distortion = thismse; |
746 | 0 | *sse1 = sse; |
747 | 0 | } |
748 | |
|
749 | 0 | this_mv.as_mv.row += 8; |
750 | | /* "halfpix" vertical variance */ |
751 | 0 | thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse); |
752 | 0 | down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
753 | |
|
754 | 0 | if (down < bestmse) { |
755 | 0 | *bestmv = this_mv; |
756 | 0 | bestmse = down; |
757 | 0 | *distortion = thismse; |
758 | 0 | *sse1 = sse; |
759 | 0 | } |
760 | | |
761 | | /* now check 1 more diagonal - */ |
762 | 0 | whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); |
763 | 0 | this_mv = startmv; |
764 | |
|
765 | 0 | switch (whichdir) { |
766 | 0 | case 0: |
767 | 0 | this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
768 | 0 | this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
769 | | /* "halfpix" horizontal/vertical variance */ |
770 | 0 | thismse = |
771 | 0 | vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); |
772 | 0 | break; |
773 | 0 | case 1: |
774 | 0 | this_mv.as_mv.col += 4; |
775 | 0 | this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; |
776 | | /* "halfpix" horizontal/vertical variance */ |
777 | 0 | thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); |
778 | 0 | break; |
779 | 0 | case 2: |
780 | 0 | this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; |
781 | 0 | this_mv.as_mv.row += 4; |
782 | | /* "halfpix" horizontal/vertical variance */ |
783 | 0 | thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse); |
784 | 0 | break; |
785 | 0 | case 3: |
786 | 0 | default: |
787 | 0 | this_mv.as_mv.col += 4; |
788 | 0 | this_mv.as_mv.row += 4; |
789 | | /* "halfpix" horizontal/vertical variance */ |
790 | 0 | thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse); |
791 | 0 | break; |
792 | 0 | } |
793 | | |
794 | 0 | diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); |
795 | |
|
796 | 0 | if (diag < bestmse) { |
797 | 0 | *bestmv = this_mv; |
798 | 0 | bestmse = diag; |
799 | 0 | *distortion = thismse; |
800 | 0 | *sse1 = sse; |
801 | 0 | } |
802 | |
|
803 | 0 | return bestmse; |
804 | 0 | } |
805 | | |
806 | | #define CHECK_BOUNDS(range) \ |
807 | 0 | do { \ |
808 | 0 | all_in = 1; \ |
809 | 0 | all_in &= ((br - range) >= x->mv_row_min); \ |
810 | 0 | all_in &= ((br + range) <= x->mv_row_max); \ |
811 | 0 | all_in &= ((bc - range) >= x->mv_col_min); \ |
812 | 0 | all_in &= ((bc + range) <= x->mv_col_max); \ |
813 | 0 | } while (0) |
814 | | |
815 | | #define CHECK_POINT \ |
816 | 0 | { \ |
817 | 0 | if (this_mv.as_mv.col < x->mv_col_min) continue; \ |
818 | 0 | if (this_mv.as_mv.col > x->mv_col_max) continue; \ |
819 | 0 | if (this_mv.as_mv.row < x->mv_row_min) continue; \ |
820 | 0 | if (this_mv.as_mv.row > x->mv_row_max) continue; \ |
821 | 0 | } |
822 | | |
823 | | #define CHECK_BETTER \ |
824 | 0 | do { \ |
825 | 0 | if (thissad < bestsad) { \ |
826 | 0 | thissad += \ |
827 | 0 | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \ |
828 | 0 | if (thissad < bestsad) { \ |
829 | 0 | bestsad = thissad; \ |
830 | 0 | best_site = i; \ |
831 | 0 | } \ |
832 | 0 | } \ |
833 | 0 | } while (0) |
834 | | |
835 | | static const MV next_chkpts[6][3] = { |
836 | | { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } }, |
837 | | { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } }, |
838 | | { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } } |
839 | | }; |
840 | | |
841 | | int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
842 | | int_mv *best_mv, int search_param, int sad_per_bit, |
843 | | const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], |
844 | 0 | int_mv *center_mv) { |
845 | 0 | MV hex[6] = { |
846 | 0 | { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } |
847 | 0 | }; |
848 | 0 | MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } }; |
849 | 0 | int i, j; |
850 | |
|
851 | 0 | unsigned char *what = (*(b->base_src) + b->src); |
852 | 0 | int what_stride = b->src_stride; |
853 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
854 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
855 | |
|
856 | 0 | int in_what_stride = pre_stride; |
857 | 0 | int br, bc; |
858 | 0 | int_mv this_mv; |
859 | 0 | unsigned int bestsad; |
860 | 0 | unsigned int thissad; |
861 | 0 | unsigned char *base_offset; |
862 | 0 | unsigned char *this_offset; |
863 | 0 | int k = -1; |
864 | 0 | int all_in; |
865 | 0 | int best_site = -1; |
866 | 0 | int hex_range = 127; |
867 | 0 | int dia_range = 8; |
868 | |
|
869 | 0 | int_mv fcenter_mv; |
870 | 0 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
871 | 0 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
872 | | |
873 | | /* adjust ref_mv to make sure it is within MV range */ |
874 | 0 | vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, |
875 | 0 | x->mv_row_max); |
876 | 0 | br = ref_mv->as_mv.row; |
877 | 0 | bc = ref_mv->as_mv.col; |
878 | | |
879 | | /* Work out the start point for the search */ |
880 | 0 | base_offset = (unsigned char *)(base_pre + d->offset); |
881 | 0 | this_offset = base_offset + (br * (pre_stride)) + bc; |
882 | 0 | this_mv.as_mv.row = br; |
883 | 0 | this_mv.as_mv.col = bc; |
884 | 0 | bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) + |
885 | 0 | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
886 | |
|
887 | | #if CONFIG_MULTI_RES_ENCODING |
888 | | /* Lower search range based on prediction info */ |
889 | | if (search_param >= 6) |
890 | | goto cal_neighbors; |
891 | | else if (search_param >= 5) |
892 | | hex_range = 4; |
893 | | else if (search_param >= 4) |
894 | | hex_range = 6; |
895 | | else if (search_param >= 3) |
896 | | hex_range = 15; |
897 | | else if (search_param >= 2) |
898 | | hex_range = 31; |
899 | | else if (search_param >= 1) |
900 | | hex_range = 63; |
901 | | |
902 | | dia_range = 8; |
903 | | #else |
904 | 0 | (void)search_param; |
905 | 0 | #endif |
906 | | |
907 | | /* hex search */ |
908 | 0 | CHECK_BOUNDS(2); |
909 | |
|
910 | 0 | if (all_in) { |
911 | 0 | for (i = 0; i < 6; ++i) { |
912 | 0 | this_mv.as_mv.row = br + hex[i].row; |
913 | 0 | this_mv.as_mv.col = bc + hex[i].col; |
914 | 0 | this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + |
915 | 0 | this_mv.as_mv.col; |
916 | 0 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
917 | 0 | CHECK_BETTER; |
918 | 0 | } |
919 | 0 | } else { |
920 | 0 | for (i = 0; i < 6; ++i) { |
921 | 0 | this_mv.as_mv.row = br + hex[i].row; |
922 | 0 | this_mv.as_mv.col = bc + hex[i].col; |
923 | 0 | CHECK_POINT |
924 | 0 | this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + |
925 | 0 | this_mv.as_mv.col; |
926 | 0 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
927 | 0 | CHECK_BETTER; |
928 | 0 | } |
929 | 0 | } |
930 | |
|
931 | 0 | if (best_site == -1) { |
932 | 0 | goto cal_neighbors; |
933 | 0 | } else { |
934 | 0 | br += hex[best_site].row; |
935 | 0 | bc += hex[best_site].col; |
936 | 0 | k = best_site; |
937 | 0 | } |
938 | | |
939 | 0 | for (j = 1; j < hex_range; ++j) { |
940 | 0 | best_site = -1; |
941 | 0 | CHECK_BOUNDS(2); |
942 | |
|
943 | 0 | if (all_in) { |
944 | 0 | for (i = 0; i < 3; ++i) { |
945 | 0 | this_mv.as_mv.row = br + next_chkpts[k][i].row; |
946 | 0 | this_mv.as_mv.col = bc + next_chkpts[k][i].col; |
947 | 0 | this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + |
948 | 0 | this_mv.as_mv.col; |
949 | 0 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
950 | 0 | CHECK_BETTER; |
951 | 0 | } |
952 | 0 | } else { |
953 | 0 | for (i = 0; i < 3; ++i) { |
954 | 0 | this_mv.as_mv.row = br + next_chkpts[k][i].row; |
955 | 0 | this_mv.as_mv.col = bc + next_chkpts[k][i].col; |
956 | 0 | CHECK_POINT |
957 | 0 | this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + |
958 | 0 | this_mv.as_mv.col; |
959 | 0 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
960 | 0 | CHECK_BETTER; |
961 | 0 | } |
962 | 0 | } |
963 | |
|
964 | 0 | if (best_site == -1) { |
965 | 0 | break; |
966 | 0 | } else { |
967 | 0 | br += next_chkpts[k][best_site].row; |
968 | 0 | bc += next_chkpts[k][best_site].col; |
969 | 0 | k += 5 + best_site; |
970 | 0 | if (k >= 12) { |
971 | 0 | k -= 12; |
972 | 0 | } else if (k >= 6) { |
973 | 0 | k -= 6; |
974 | 0 | } |
975 | 0 | } |
976 | 0 | } |
977 | | |
978 | | /* check 4 1-away neighbors */ |
979 | 0 | cal_neighbors: |
980 | 0 | for (j = 0; j < dia_range; ++j) { |
981 | 0 | best_site = -1; |
982 | 0 | CHECK_BOUNDS(1); |
983 | |
|
984 | 0 | if (all_in) { |
985 | 0 | for (i = 0; i < 4; ++i) { |
986 | 0 | this_mv.as_mv.row = br + neighbors[i].row; |
987 | 0 | this_mv.as_mv.col = bc + neighbors[i].col; |
988 | 0 | this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + |
989 | 0 | this_mv.as_mv.col; |
990 | 0 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
991 | 0 | CHECK_BETTER; |
992 | 0 | } |
993 | 0 | } else { |
994 | 0 | for (i = 0; i < 4; ++i) { |
995 | 0 | this_mv.as_mv.row = br + neighbors[i].row; |
996 | 0 | this_mv.as_mv.col = bc + neighbors[i].col; |
997 | 0 | CHECK_POINT |
998 | 0 | this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + |
999 | 0 | this_mv.as_mv.col; |
1000 | 0 | thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); |
1001 | 0 | CHECK_BETTER; |
1002 | 0 | } |
1003 | 0 | } |
1004 | |
|
1005 | 0 | if (best_site == -1) { |
1006 | 0 | break; |
1007 | 0 | } else { |
1008 | 0 | br += neighbors[best_site].row; |
1009 | 0 | bc += neighbors[best_site].col; |
1010 | 0 | } |
1011 | 0 | } |
1012 | |
|
1013 | 0 | best_mv->as_mv.row = br; |
1014 | 0 | best_mv->as_mv.col = bc; |
1015 | |
|
1016 | 0 | return bestsad; |
1017 | 0 | } |
1018 | | #undef CHECK_BOUNDS |
1019 | | #undef CHECK_POINT |
1020 | | #undef CHECK_BETTER |
1021 | | |
1022 | | int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
1023 | | int_mv *best_mv, int search_param, int sad_per_bit, |
1024 | | int *num00, vp8_variance_fn_ptr_t *fn_ptr, |
1025 | 0 | int *mvcost[2], int_mv *center_mv) { |
1026 | 0 | int i, j, step; |
1027 | |
|
1028 | 0 | unsigned char *what = (*(b->base_src) + b->src); |
1029 | 0 | int what_stride = b->src_stride; |
1030 | 0 | unsigned char *in_what; |
1031 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
1032 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1033 | 0 | int in_what_stride = pre_stride; |
1034 | 0 | unsigned char *best_address; |
1035 | |
|
1036 | 0 | int tot_steps; |
1037 | 0 | int_mv this_mv; |
1038 | |
|
1039 | 0 | unsigned int bestsad; |
1040 | 0 | unsigned int thissad; |
1041 | 0 | int best_site = 0; |
1042 | 0 | int last_site = 0; |
1043 | |
|
1044 | 0 | int ref_row; |
1045 | 0 | int ref_col; |
1046 | 0 | int this_row_offset; |
1047 | 0 | int this_col_offset; |
1048 | 0 | search_site *ss; |
1049 | |
|
1050 | 0 | unsigned char *check_here; |
1051 | |
|
1052 | 0 | int *mvsadcost[2]; |
1053 | 0 | int_mv fcenter_mv; |
1054 | |
|
1055 | 0 | mvsadcost[0] = x->mvsadcost[0]; |
1056 | 0 | mvsadcost[1] = x->mvsadcost[1]; |
1057 | 0 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1058 | 0 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1059 | |
|
1060 | 0 | vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, |
1061 | 0 | x->mv_row_max); |
1062 | 0 | ref_row = ref_mv->as_mv.row; |
1063 | 0 | ref_col = ref_mv->as_mv.col; |
1064 | 0 | *num00 = 0; |
1065 | 0 | best_mv->as_mv.row = ref_row; |
1066 | 0 | best_mv->as_mv.col = ref_col; |
1067 | | |
1068 | | /* Work out the start point for the search */ |
1069 | 0 | in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + |
1070 | 0 | ref_col); |
1071 | 0 | best_address = in_what; |
1072 | | |
1073 | | /* Check the starting position */ |
1074 | 0 | bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + |
1075 | 0 | mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1076 | | |
1077 | | /* search_param determines the length of the initial step and hence |
1078 | | * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : |
1079 | | * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. |
1080 | | */ |
1081 | 0 | ss = &x->ss[search_param * x->searches_per_step]; |
1082 | 0 | tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
1083 | |
|
1084 | 0 | i = 1; |
1085 | |
|
1086 | 0 | for (step = 0; step < tot_steps; ++step) { |
1087 | 0 | for (j = 0; j < x->searches_per_step; ++j) { |
1088 | | /* Trap illegal vectors */ |
1089 | 0 | this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
1090 | 0 | this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
1091 | |
|
1092 | 0 | if ((this_col_offset > x->mv_col_min) && |
1093 | 0 | (this_col_offset < x->mv_col_max) && |
1094 | 0 | (this_row_offset > x->mv_row_min) && |
1095 | 0 | (this_row_offset < x->mv_row_max)) |
1096 | | |
1097 | 0 | { |
1098 | 0 | check_here = ss[i].offset + best_address; |
1099 | 0 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1100 | |
|
1101 | 0 | if (thissad < bestsad) { |
1102 | 0 | this_mv.as_mv.row = this_row_offset; |
1103 | 0 | this_mv.as_mv.col = this_col_offset; |
1104 | 0 | thissad += |
1105 | 0 | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1106 | |
|
1107 | 0 | if (thissad < bestsad) { |
1108 | 0 | bestsad = thissad; |
1109 | 0 | best_site = i; |
1110 | 0 | } |
1111 | 0 | } |
1112 | 0 | } |
1113 | |
|
1114 | 0 | i++; |
1115 | 0 | } |
1116 | |
|
1117 | 0 | if (best_site != last_site) { |
1118 | 0 | best_mv->as_mv.row += ss[best_site].mv.row; |
1119 | 0 | best_mv->as_mv.col += ss[best_site].mv.col; |
1120 | 0 | best_address += ss[best_site].offset; |
1121 | 0 | last_site = best_site; |
1122 | 0 | } else if (best_address == in_what) { |
1123 | 0 | (*num00)++; |
1124 | 0 | } |
1125 | 0 | } |
1126 | |
|
1127 | 0 | this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1128 | 0 | this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1129 | |
|
1130 | 0 | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + |
1131 | 0 | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1132 | 0 | } |
1133 | | |
1134 | | #if HAVE_SSE2 || HAVE_MSA || HAVE_LSX |
1135 | | int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
1136 | | int_mv *best_mv, int search_param, int sad_per_bit, |
1137 | | int *num00, vp8_variance_fn_ptr_t *fn_ptr, |
1138 | 27.3M | int *mvcost[2], int_mv *center_mv) { |
1139 | 27.3M | int i, j, step; |
1140 | | |
1141 | 27.3M | unsigned char *what = (*(b->base_src) + b->src); |
1142 | 27.3M | int what_stride = b->src_stride; |
1143 | 27.3M | unsigned char *in_what; |
1144 | 27.3M | int pre_stride = x->e_mbd.pre.y_stride; |
1145 | 27.3M | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1146 | 27.3M | int in_what_stride = pre_stride; |
1147 | 27.3M | unsigned char *best_address; |
1148 | | |
1149 | 27.3M | int tot_steps; |
1150 | 27.3M | int_mv this_mv; |
1151 | | |
1152 | 27.3M | unsigned int bestsad; |
1153 | 27.3M | unsigned int thissad; |
1154 | 27.3M | int best_site = 0; |
1155 | 27.3M | int last_site = 0; |
1156 | | |
1157 | 27.3M | int ref_row; |
1158 | 27.3M | int ref_col; |
1159 | 27.3M | int this_row_offset; |
1160 | 27.3M | int this_col_offset; |
1161 | 27.3M | search_site *ss; |
1162 | | |
1163 | 27.3M | unsigned char *check_here; |
1164 | | |
1165 | 27.3M | int *mvsadcost[2]; |
1166 | 27.3M | int_mv fcenter_mv; |
1167 | | |
1168 | 27.3M | mvsadcost[0] = x->mvsadcost[0]; |
1169 | 27.3M | mvsadcost[1] = x->mvsadcost[1]; |
1170 | 27.3M | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1171 | 27.3M | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1172 | | |
1173 | 27.3M | vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, |
1174 | 27.3M | x->mv_row_max); |
1175 | 27.3M | ref_row = ref_mv->as_mv.row; |
1176 | 27.3M | ref_col = ref_mv->as_mv.col; |
1177 | 27.3M | *num00 = 0; |
1178 | 27.3M | best_mv->as_mv.row = ref_row; |
1179 | 27.3M | best_mv->as_mv.col = ref_col; |
1180 | | |
1181 | | /* Work out the start point for the search */ |
1182 | 27.3M | in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + |
1183 | 27.3M | ref_col); |
1184 | 27.3M | best_address = in_what; |
1185 | | |
1186 | | /* Check the starting position */ |
1187 | 27.3M | bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + |
1188 | 27.3M | mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1189 | | |
1190 | | /* search_param determines the length of the initial step and hence the |
1191 | | * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = |
1192 | | * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. |
1193 | | */ |
1194 | 27.3M | ss = &x->ss[search_param * x->searches_per_step]; |
1195 | 27.3M | tot_steps = (x->ss_count / x->searches_per_step) - search_param; |
1196 | | |
1197 | 27.3M | i = 1; |
1198 | | |
1199 | 131M | for (step = 0; step < tot_steps; ++step) { |
1200 | 103M | int all_in = 1, t; |
1201 | | |
1202 | | /* To know if all neighbor points are within the bounds, 4 bounds |
1203 | | * checking are enough instead of checking 4 bounds for each |
1204 | | * points. |
1205 | | */ |
1206 | 103M | all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); |
1207 | 103M | all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); |
1208 | 103M | all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); |
1209 | 103M | all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); |
1210 | | |
1211 | 103M | if (all_in) { |
1212 | 82.4M | unsigned int sad_array[4]; |
1213 | | |
1214 | 247M | for (j = 0; j < x->searches_per_step; j += 4) { |
1215 | 164M | const unsigned char *block_offset[4]; |
1216 | | |
1217 | 824M | for (t = 0; t < 4; ++t) { |
1218 | 659M | block_offset[t] = ss[i + t].offset + best_address; |
1219 | 659M | } |
1220 | | |
1221 | 164M | fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
1222 | 164M | sad_array); |
1223 | | |
1224 | 824M | for (t = 0; t < 4; t++, i++) { |
1225 | 659M | if (sad_array[t] < bestsad) { |
1226 | 195M | this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; |
1227 | 195M | this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; |
1228 | 195M | sad_array[t] += |
1229 | 195M | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1230 | | |
1231 | 195M | if (sad_array[t] < bestsad) { |
1232 | 71.0M | bestsad = sad_array[t]; |
1233 | 71.0M | best_site = i; |
1234 | 71.0M | } |
1235 | 195M | } |
1236 | 659M | } |
1237 | 164M | } |
1238 | 82.4M | } else { |
1239 | 193M | for (j = 0; j < x->searches_per_step; ++j) { |
1240 | | /* Trap illegal vectors */ |
1241 | 171M | this_row_offset = best_mv->as_mv.row + ss[i].mv.row; |
1242 | 171M | this_col_offset = best_mv->as_mv.col + ss[i].mv.col; |
1243 | | |
1244 | 171M | if ((this_col_offset > x->mv_col_min) && |
1245 | 171M | (this_col_offset < x->mv_col_max) && |
1246 | 171M | (this_row_offset > x->mv_row_min) && |
1247 | 171M | (this_row_offset < x->mv_row_max)) { |
1248 | 70.2M | check_here = ss[i].offset + best_address; |
1249 | 70.2M | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1250 | | |
1251 | 70.2M | if (thissad < bestsad) { |
1252 | 25.6M | this_mv.as_mv.row = this_row_offset; |
1253 | 25.6M | this_mv.as_mv.col = this_col_offset; |
1254 | 25.6M | thissad += |
1255 | 25.6M | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1256 | | |
1257 | 25.6M | if (thissad < bestsad) { |
1258 | 11.6M | bestsad = thissad; |
1259 | 11.6M | best_site = i; |
1260 | 11.6M | } |
1261 | 25.6M | } |
1262 | 70.2M | } |
1263 | 171M | i++; |
1264 | 171M | } |
1265 | 21.4M | } |
1266 | | |
1267 | 103M | if (best_site != last_site) { |
1268 | 54.2M | best_mv->as_mv.row += ss[best_site].mv.row; |
1269 | 54.2M | best_mv->as_mv.col += ss[best_site].mv.col; |
1270 | 54.2M | best_address += ss[best_site].offset; |
1271 | 54.2M | last_site = best_site; |
1272 | 54.2M | } else if (best_address == in_what) { |
1273 | 19.5M | (*num00)++; |
1274 | 19.5M | } |
1275 | 103M | } |
1276 | | |
1277 | 27.3M | this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1278 | 27.3M | this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1279 | | |
1280 | 27.3M | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + |
1281 | 27.3M | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1282 | 27.3M | } |
1283 | | #endif // HAVE_SSE2 || HAVE_MSA || HAVE_LSX |
1284 | | |
1285 | | int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, |
1286 | | int sad_per_bit, int distance, |
1287 | | vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], |
1288 | 0 | int_mv *center_mv) { |
1289 | 0 | unsigned char *what = (*(b->base_src) + b->src); |
1290 | 0 | int what_stride = b->src_stride; |
1291 | 0 | unsigned char *in_what; |
1292 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
1293 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1294 | 0 | int in_what_stride = pre_stride; |
1295 | 0 | int mv_stride = pre_stride; |
1296 | 0 | unsigned char *bestaddress; |
1297 | 0 | int_mv *best_mv = &d->bmi.mv; |
1298 | 0 | int_mv this_mv; |
1299 | 0 | unsigned int bestsad; |
1300 | 0 | unsigned int thissad; |
1301 | 0 | int r, c; |
1302 | |
|
1303 | 0 | unsigned char *check_here; |
1304 | |
|
1305 | 0 | int ref_row = ref_mv->as_mv.row; |
1306 | 0 | int ref_col = ref_mv->as_mv.col; |
1307 | |
|
1308 | 0 | int row_min = ref_row - distance; |
1309 | 0 | int row_max = ref_row + distance; |
1310 | 0 | int col_min = ref_col - distance; |
1311 | 0 | int col_max = ref_col + distance; |
1312 | |
|
1313 | 0 | int *mvsadcost[2]; |
1314 | 0 | int_mv fcenter_mv; |
1315 | |
|
1316 | 0 | mvsadcost[0] = x->mvsadcost[0]; |
1317 | 0 | mvsadcost[1] = x->mvsadcost[1]; |
1318 | 0 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1319 | 0 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1320 | | |
1321 | | /* Work out the mid point for the search */ |
1322 | 0 | in_what = base_pre + d->offset; |
1323 | 0 | bestaddress = in_what + (ref_row * pre_stride) + ref_col; |
1324 | |
|
1325 | 0 | best_mv->as_mv.row = ref_row; |
1326 | 0 | best_mv->as_mv.col = ref_col; |
1327 | | |
1328 | | /* Baseline value at the centre */ |
1329 | 0 | bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + |
1330 | 0 | mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1331 | | |
1332 | | /* Apply further limits to prevent us looking using vectors that stretch |
1333 | | * beyond the UMV border |
1334 | | */ |
1335 | 0 | if (col_min < x->mv_col_min) col_min = x->mv_col_min; |
1336 | |
|
1337 | 0 | if (col_max > x->mv_col_max) col_max = x->mv_col_max; |
1338 | |
|
1339 | 0 | if (row_min < x->mv_row_min) row_min = x->mv_row_min; |
1340 | |
|
1341 | 0 | if (row_max > x->mv_row_max) row_max = x->mv_row_max; |
1342 | |
|
1343 | 0 | for (r = row_min; r < row_max; ++r) { |
1344 | 0 | this_mv.as_mv.row = r; |
1345 | 0 | check_here = r * mv_stride + in_what + col_min; |
1346 | |
|
1347 | 0 | for (c = col_min; c < col_max; ++c) { |
1348 | 0 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1349 | |
|
1350 | 0 | if (thissad < bestsad) { |
1351 | 0 | this_mv.as_mv.col = c; |
1352 | 0 | thissad += |
1353 | 0 | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); |
1354 | |
|
1355 | 0 | if (thissad < bestsad) { |
1356 | 0 | bestsad = thissad; |
1357 | 0 | best_mv->as_mv.row = r; |
1358 | 0 | best_mv->as_mv.col = c; |
1359 | 0 | bestaddress = check_here; |
1360 | 0 | } |
1361 | 0 | } |
1362 | |
|
1363 | 0 | check_here++; |
1364 | 0 | } |
1365 | 0 | } |
1366 | |
|
1367 | 0 | this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1368 | 0 | this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1369 | |
|
1370 | 0 | return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + |
1371 | 0 | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1372 | 0 | } |
1373 | | |
1374 | | int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
1375 | | int_mv *ref_mv, int error_per_bit, |
1376 | | int search_range, vp8_variance_fn_ptr_t *fn_ptr, |
1377 | 0 | int *mvcost[2], int_mv *center_mv) { |
1378 | 0 | MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; |
1379 | 0 | int i, j; |
1380 | 0 | short this_row_offset, this_col_offset; |
1381 | |
|
1382 | 0 | int what_stride = b->src_stride; |
1383 | 0 | int pre_stride = x->e_mbd.pre.y_stride; |
1384 | 0 | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1385 | 0 | int in_what_stride = pre_stride; |
1386 | 0 | unsigned char *what = (*(b->base_src) + b->src); |
1387 | 0 | unsigned char *best_address = |
1388 | 0 | (unsigned char *)(base_pre + d->offset + |
1389 | 0 | (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); |
1390 | 0 | unsigned char *check_here; |
1391 | 0 | int_mv this_mv; |
1392 | 0 | unsigned int bestsad; |
1393 | 0 | unsigned int thissad; |
1394 | |
|
1395 | 0 | int *mvsadcost[2]; |
1396 | 0 | int_mv fcenter_mv; |
1397 | |
|
1398 | 0 | mvsadcost[0] = x->mvsadcost[0]; |
1399 | 0 | mvsadcost[1] = x->mvsadcost[1]; |
1400 | 0 | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1401 | 0 | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1402 | |
|
1403 | 0 | bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + |
1404 | 0 | mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1405 | |
|
1406 | 0 | for (i = 0; i < search_range; ++i) { |
1407 | 0 | int best_site = -1; |
1408 | |
|
1409 | 0 | for (j = 0; j < 4; ++j) { |
1410 | 0 | this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
1411 | 0 | this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
1412 | |
|
1413 | 0 | if ((this_col_offset > x->mv_col_min) && |
1414 | 0 | (this_col_offset < x->mv_col_max) && |
1415 | 0 | (this_row_offset > x->mv_row_min) && |
1416 | 0 | (this_row_offset < x->mv_row_max)) { |
1417 | 0 | check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
1418 | 0 | best_address; |
1419 | 0 | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1420 | |
|
1421 | 0 | if (thissad < bestsad) { |
1422 | 0 | this_mv.as_mv.row = this_row_offset; |
1423 | 0 | this_mv.as_mv.col = this_col_offset; |
1424 | 0 | thissad += |
1425 | 0 | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1426 | |
|
1427 | 0 | if (thissad < bestsad) { |
1428 | 0 | bestsad = thissad; |
1429 | 0 | best_site = j; |
1430 | 0 | } |
1431 | 0 | } |
1432 | 0 | } |
1433 | 0 | } |
1434 | |
|
1435 | 0 | if (best_site == -1) { |
1436 | 0 | break; |
1437 | 0 | } else { |
1438 | 0 | ref_mv->as_mv.row += neighbors[best_site].row; |
1439 | 0 | ref_mv->as_mv.col += neighbors[best_site].col; |
1440 | 0 | best_address += (neighbors[best_site].row) * in_what_stride + |
1441 | 0 | neighbors[best_site].col; |
1442 | 0 | } |
1443 | 0 | } |
1444 | |
|
1445 | 0 | this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1446 | 0 | this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1447 | |
|
1448 | 0 | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + |
1449 | 0 | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1450 | 0 | } |
1451 | | |
1452 | | #if HAVE_SSE2 || HAVE_MSA |
1453 | | int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, |
1454 | | int_mv *ref_mv, int error_per_bit, |
1455 | | int search_range, vp8_variance_fn_ptr_t *fn_ptr, |
1456 | 628k | int *mvcost[2], int_mv *center_mv) { |
1457 | 628k | MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; |
1458 | 628k | int i, j; |
1459 | 628k | short this_row_offset, this_col_offset; |
1460 | | |
1461 | 628k | int what_stride = b->src_stride; |
1462 | 628k | int pre_stride = x->e_mbd.pre.y_stride; |
1463 | 628k | unsigned char *base_pre = x->e_mbd.pre.y_buffer; |
1464 | 628k | int in_what_stride = pre_stride; |
1465 | 628k | unsigned char *what = (*(b->base_src) + b->src); |
1466 | 628k | unsigned char *best_address = |
1467 | 628k | (unsigned char *)(base_pre + d->offset + |
1468 | 628k | (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); |
1469 | 628k | unsigned char *check_here; |
1470 | 628k | int_mv this_mv; |
1471 | 628k | unsigned int bestsad; |
1472 | 628k | unsigned int thissad; |
1473 | | |
1474 | 628k | int *mvsadcost[2]; |
1475 | 628k | int_mv fcenter_mv; |
1476 | | |
1477 | 628k | mvsadcost[0] = x->mvsadcost[0]; |
1478 | 628k | mvsadcost[1] = x->mvsadcost[1]; |
1479 | 628k | fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; |
1480 | 628k | fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; |
1481 | | |
1482 | 628k | bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + |
1483 | 628k | mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1484 | | |
1485 | 773k | for (i = 0; i < search_range; ++i) { |
1486 | 767k | int best_site = -1; |
1487 | 767k | int all_in = 1; |
1488 | | |
1489 | 767k | all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min); |
1490 | 767k | all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max); |
1491 | 767k | all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min); |
1492 | 767k | all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max); |
1493 | | |
1494 | 767k | if (all_in) { |
1495 | 645k | unsigned int sad_array[4]; |
1496 | 645k | const unsigned char *block_offset[4]; |
1497 | 645k | block_offset[0] = best_address - in_what_stride; |
1498 | 645k | block_offset[1] = best_address - 1; |
1499 | 645k | block_offset[2] = best_address + 1; |
1500 | 645k | block_offset[3] = best_address + in_what_stride; |
1501 | | |
1502 | 645k | fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, |
1503 | 645k | sad_array); |
1504 | | |
1505 | 3.22M | for (j = 0; j < 4; ++j) { |
1506 | 2.58M | if (sad_array[j] < bestsad) { |
1507 | 759k | this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; |
1508 | 759k | this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; |
1509 | 759k | sad_array[j] += |
1510 | 759k | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1511 | | |
1512 | 759k | if (sad_array[j] < bestsad) { |
1513 | 136k | bestsad = sad_array[j]; |
1514 | 136k | best_site = j; |
1515 | 136k | } |
1516 | 759k | } |
1517 | 2.58M | } |
1518 | 645k | } else { |
1519 | 610k | for (j = 0; j < 4; ++j) { |
1520 | 488k | this_row_offset = ref_mv->as_mv.row + neighbors[j].row; |
1521 | 488k | this_col_offset = ref_mv->as_mv.col + neighbors[j].col; |
1522 | | |
1523 | 488k | if ((this_col_offset > x->mv_col_min) && |
1524 | 488k | (this_col_offset < x->mv_col_max) && |
1525 | 488k | (this_row_offset > x->mv_row_min) && |
1526 | 488k | (this_row_offset < x->mv_row_max)) { |
1527 | 362k | check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + |
1528 | 362k | best_address; |
1529 | 362k | thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
1530 | | |
1531 | 362k | if (thissad < bestsad) { |
1532 | 117k | this_mv.as_mv.row = this_row_offset; |
1533 | 117k | this_mv.as_mv.col = this_col_offset; |
1534 | 117k | thissad += |
1535 | 117k | mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); |
1536 | | |
1537 | 117k | if (thissad < bestsad) { |
1538 | 13.9k | bestsad = thissad; |
1539 | 13.9k | best_site = j; |
1540 | 13.9k | } |
1541 | 117k | } |
1542 | 362k | } |
1543 | 488k | } |
1544 | 122k | } |
1545 | | |
1546 | 767k | if (best_site == -1) { |
1547 | 622k | break; |
1548 | 622k | } else { |
1549 | 144k | ref_mv->as_mv.row += neighbors[best_site].row; |
1550 | 144k | ref_mv->as_mv.col += neighbors[best_site].col; |
1551 | 144k | best_address += (neighbors[best_site].row) * in_what_stride + |
1552 | 144k | neighbors[best_site].col; |
1553 | 144k | } |
1554 | 767k | } |
1555 | | |
1556 | 628k | this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX); |
1557 | 628k | this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX); |
1558 | | |
1559 | 628k | return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + |
1560 | 628k | mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); |
1561 | 628k | } |
1562 | | #endif // HAVE_SSE2 || HAVE_MSA |