Coverage Report

Created: 2024-09-06 07:53

/src/libvpx/vp8/encoder/mcomp.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include "./vp8_rtcd.h"
12
#include "./vpx_dsp_rtcd.h"
13
#include "onyx_int.h"
14
#include "mcomp.h"
15
#include "vpx_mem/vpx_mem.h"
16
#include "vpx_config.h"
17
#include <stdio.h>
18
#include <limits.h>
19
#include <math.h>
20
#include "vp8/common/findnearmv.h"
21
#include "vp8/common/common.h"
22
#include "vpx_dsp/vpx_dsp_common.h"
23
24
7.03M
int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) {
25
  /* MV costing is based on the distribution of vectors in the previous
26
   * frame and as such will tend to over state the cost of vectors. In
27
   * addition coding a new vector can have a knock on effect on the cost
28
   * of subsequent vectors and the quality of prediction from NEAR and
29
   * NEAREST for subsequent blocks. The "Weight" parameter allows, to a
30
   * limited extent, for some account to be taken of these factors.
31
   */
32
7.03M
  const int mv_idx_row =
33
7.03M
      clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
34
7.03M
  const int mv_idx_col =
35
7.03M
      clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
36
7.03M
  return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7;
37
7.03M
}
38
39
static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2],
40
24.8M
                       int error_per_bit) {
41
  /* Ignore mv costing if mvcost is NULL */
42
24.8M
  if (mvcost) {
43
24.8M
    const int mv_idx_row =
44
24.8M
        clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals);
45
24.8M
    const int mv_idx_col =
46
24.8M
        clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals);
47
24.8M
    return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit +
48
24.8M
            128) >>
49
24.8M
           8;
50
24.8M
  }
51
0
  return 0;
52
24.8M
}
53
54
static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2],
55
187M
                          int error_per_bit) {
56
  /* Calculate sad error cost on full pixel basis. */
57
  /* Ignore mv costing if mvsadcost is NULL */
58
187M
  if (mvsadcost) {
59
187M
    return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] +
60
187M
             mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) *
61
187M
                error_per_bit +
62
187M
            128) >>
63
187M
           8;
64
187M
  }
65
0
  return 0;
66
187M
}
67
68
0
void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) {
69
0
  int Len;
70
0
  int search_site_count = 0;
71
72
  /* Generate offsets for 4 search sites per step. */
73
0
  Len = MAX_FIRST_STEP;
74
0
  x->ss[search_site_count].mv.col = 0;
75
0
  x->ss[search_site_count].mv.row = 0;
76
0
  x->ss[search_site_count].offset = 0;
77
0
  search_site_count++;
78
79
0
  while (Len > 0) {
80
    /* Compute offsets for search sites. */
81
0
    x->ss[search_site_count].mv.col = 0;
82
0
    x->ss[search_site_count].mv.row = -Len;
83
0
    x->ss[search_site_count].offset = -Len * stride;
84
0
    search_site_count++;
85
86
    /* Compute offsets for search sites. */
87
0
    x->ss[search_site_count].mv.col = 0;
88
0
    x->ss[search_site_count].mv.row = Len;
89
0
    x->ss[search_site_count].offset = Len * stride;
90
0
    search_site_count++;
91
92
    /* Compute offsets for search sites. */
93
0
    x->ss[search_site_count].mv.col = -Len;
94
0
    x->ss[search_site_count].mv.row = 0;
95
0
    x->ss[search_site_count].offset = -Len;
96
0
    search_site_count++;
97
98
    /* Compute offsets for search sites. */
99
0
    x->ss[search_site_count].mv.col = Len;
100
0
    x->ss[search_site_count].mv.row = 0;
101
0
    x->ss[search_site_count].offset = Len;
102
0
    search_site_count++;
103
104
    /* Contract. */
105
0
    Len /= 2;
106
0
  }
107
108
0
  x->ss_count = search_site_count;
109
0
  x->searches_per_step = 4;
110
0
}
111
112
92.9k
void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) {
113
92.9k
  int Len;
114
92.9k
  int search_site_count = 0;
115
116
  /* Generate offsets for 8 search sites per step. */
117
92.9k
  Len = MAX_FIRST_STEP;
118
92.9k
  x->ss[search_site_count].mv.col = 0;
119
92.9k
  x->ss[search_site_count].mv.row = 0;
120
92.9k
  x->ss[search_site_count].offset = 0;
121
92.9k
  search_site_count++;
122
123
836k
  while (Len > 0) {
124
    /* Compute offsets for search sites. */
125
743k
    x->ss[search_site_count].mv.col = 0;
126
743k
    x->ss[search_site_count].mv.row = -Len;
127
743k
    x->ss[search_site_count].offset = -Len * stride;
128
743k
    search_site_count++;
129
130
    /* Compute offsets for search sites. */
131
743k
    x->ss[search_site_count].mv.col = 0;
132
743k
    x->ss[search_site_count].mv.row = Len;
133
743k
    x->ss[search_site_count].offset = Len * stride;
134
743k
    search_site_count++;
135
136
    /* Compute offsets for search sites. */
137
743k
    x->ss[search_site_count].mv.col = -Len;
138
743k
    x->ss[search_site_count].mv.row = 0;
139
743k
    x->ss[search_site_count].offset = -Len;
140
743k
    search_site_count++;
141
142
    /* Compute offsets for search sites. */
143
743k
    x->ss[search_site_count].mv.col = Len;
144
743k
    x->ss[search_site_count].mv.row = 0;
145
743k
    x->ss[search_site_count].offset = Len;
146
743k
    search_site_count++;
147
148
    /* Compute offsets for search sites. */
149
743k
    x->ss[search_site_count].mv.col = -Len;
150
743k
    x->ss[search_site_count].mv.row = -Len;
151
743k
    x->ss[search_site_count].offset = -Len * stride - Len;
152
743k
    search_site_count++;
153
154
    /* Compute offsets for search sites. */
155
743k
    x->ss[search_site_count].mv.col = Len;
156
743k
    x->ss[search_site_count].mv.row = -Len;
157
743k
    x->ss[search_site_count].offset = -Len * stride + Len;
158
743k
    search_site_count++;
159
160
    /* Compute offsets for search sites. */
161
743k
    x->ss[search_site_count].mv.col = -Len;
162
743k
    x->ss[search_site_count].mv.row = Len;
163
743k
    x->ss[search_site_count].offset = Len * stride - Len;
164
743k
    search_site_count++;
165
166
    /* Compute offsets for search sites. */
167
743k
    x->ss[search_site_count].mv.col = Len;
168
743k
    x->ss[search_site_count].mv.row = Len;
169
743k
    x->ss[search_site_count].offset = Len * stride + Len;
170
743k
    search_site_count++;
171
172
    /* Contract. */
173
743k
    Len /= 2;
174
743k
  }
175
176
92.9k
  x->ss_count = search_site_count;
177
92.9k
  x->searches_per_step = 8;
178
92.9k
}
179
180
/*
181
 * To avoid the penalty for crossing cache-line read, preload the reference
182
 * area in a small buffer, which is aligned to make sure there won't be crossing
183
 * cache-line read while reading from this buffer. This reduced the cpu
184
 * cycles spent on reading ref data in sub-pixel filter functions.
185
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
186
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
187
 * could reduce the area.
188
 */
189
190
/* estimated cost of a motion vector (r,c) */
191
#define MVC(r, c)                                                             \
192
  (mvcost                                                                     \
193
       ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \
194
       : 0)
195
/* pointer to predictor base of a motionvector */
196
#define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset)))
197
/* convert motion vector component to offset for svf calc */
198
#define SP(x) (((x)&3) << 1)
199
/* returns subpixel variance error function. */
200
#define DIST(r, c) \
201
  vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse)
202
#define IFMVCV(r, c, s, e) \
203
315M
  if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
204
/* returns distortion + motion vector cost */
205
#define ERR(r, c) (MVC(r, c) + DIST(r, c))
206
/* checks if (r,c) has better score than previous best */
207
#define CHECK_BETTER(v, r, c)                          \
208
77.0M
  do {                                                 \
209
77.0M
    IFMVCV(                                            \
210
77.0M
        r, c,                                          \
211
77.0M
        {                                              \
212
77.0M
          thismse = DIST(r, c);                        \
213
77.0M
          if ((v = (MVC(r, c) + thismse)) < besterr) { \
214
77.0M
            besterr = v;                               \
215
77.0M
            br = r;                                    \
216
77.0M
            bc = c;                                    \
217
77.0M
            *distortion = thismse;                     \
218
77.0M
            *sse1 = sse;                               \
219
77.0M
          }                                            \
220
77.0M
        },                                             \
221
77.0M
        v = UINT_MAX;)                                 \
222
77.0M
  } while (0)
223
224
int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
225
                                             int_mv *bestmv, int_mv *ref_mv,
226
                                             int error_per_bit,
227
                                             const vp8_variance_fn_ptr_t *vfp,
228
                                             int *mvcost[2], int *distortion,
229
5.25M
                                             unsigned int *sse1) {
230
5.25M
  unsigned char *z = (*(b->base_src) + b->src);
231
232
5.25M
  int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
233
5.25M
  int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
234
5.25M
  int tr = br, tc = bc;
235
5.25M
  unsigned int besterr;
236
5.25M
  unsigned int left, right, up, down, diag;
237
5.25M
  unsigned int sse;
238
5.25M
  unsigned int whichdir;
239
5.25M
  unsigned int halfiters = 4;
240
5.25M
  unsigned int quarteriters = 4;
241
5.25M
  int thismse;
242
243
5.25M
  int minc = VPXMAX(x->mv_col_min * 4,
244
5.25M
                    (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
245
5.25M
  int maxc = VPXMIN(x->mv_col_max * 4,
246
5.25M
                    (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
247
5.25M
  int minr = VPXMAX(x->mv_row_min * 4,
248
5.25M
                    (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
249
5.25M
  int maxr = VPXMIN(x->mv_row_max * 4,
250
5.25M
                    (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
251
252
5.25M
  int y_stride;
253
5.25M
  int offset;
254
5.25M
  int pre_stride = x->e_mbd.pre.y_stride;
255
5.25M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
256
257
5.25M
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
258
5.25M
  MACROBLOCKD *xd = &x->e_mbd;
259
5.25M
  unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
260
5.25M
                       bestmv->as_mv.col;
261
5.25M
  unsigned char *y;
262
5.25M
  int buf_r1, buf_r2, buf_c1;
263
264
  /* Clamping to avoid out-of-range data access */
265
5.25M
  buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min)
266
5.25M
               ? (bestmv->as_mv.row - x->mv_row_min)
267
5.25M
               : 3;
268
5.25M
  buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max)
269
5.25M
               ? (x->mv_row_max - bestmv->as_mv.row)
270
5.25M
               : 3;
271
5.25M
  buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min)
272
5.25M
               ? (bestmv->as_mv.col - x->mv_col_min)
273
5.25M
               : 3;
274
5.25M
  y_stride = 32;
275
276
  /* Copy to intermediate buffer before searching. */
277
5.25M
  vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf,
278
5.25M
               y_stride, 16 + buf_r1 + buf_r2);
279
5.25M
  y = xd->y_buf + y_stride * buf_r1 + buf_c1;
280
#else
281
  unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
282
                     bestmv->as_mv.col;
283
  y_stride = pre_stride;
284
#endif
285
286
5.25M
  offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;
287
288
  /* central mv */
289
5.25M
  bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
290
5.25M
  bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
291
292
  /* calculate central point error */
293
5.25M
  besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
294
5.25M
  *distortion = besterr;
295
5.25M
  besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
296
297
  /* TODO: Each subsequent iteration checks at least one point in common
298
   * with the last iteration could be 2 ( if diag selected)
299
   */
300
8.17M
  while (--halfiters) {
301
    /* 1/2 pel */
302
8.04M
    CHECK_BETTER(left, tr, tc - 2);
303
8.04M
    CHECK_BETTER(right, tr, tc + 2);
304
8.04M
    CHECK_BETTER(up, tr - 2, tc);
305
8.04M
    CHECK_BETTER(down, tr + 2, tc);
306
307
8.04M
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
308
309
8.04M
    switch (whichdir) {
310
1.73M
      case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break;
311
1.88M
      case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break;
312
2.02M
      case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break;
313
2.39M
      case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break;
314
8.04M
    }
315
316
    /* no reason to check the same one again. */
317
8.04M
    if (tr == br && tc == bc) break;
318
319
2.92M
    tr = br;
320
2.92M
    tc = bc;
321
2.92M
  }
322
323
  /* TODO: Each subsequent iteration checks at least one point in common
324
   * with the last iteration could be 2 ( if diag selected)
325
   */
326
327
  /* 1/4 pel */
328
7.40M
  while (--quarteriters) {
329
7.36M
    CHECK_BETTER(left, tr, tc - 1);
330
7.36M
    CHECK_BETTER(right, tr, tc + 1);
331
7.36M
    CHECK_BETTER(up, tr - 1, tc);
332
7.36M
    CHECK_BETTER(down, tr + 1, tc);
333
334
7.36M
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
335
336
7.36M
    switch (whichdir) {
337
1.58M
      case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break;
338
1.72M
      case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break;
339
1.79M
      case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break;
340
2.25M
      case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break;
341
7.36M
    }
342
343
    /* no reason to check the same one again. */
344
7.36M
    if (tr == br && tc == bc) break;
345
346
2.15M
    tr = br;
347
2.15M
    tc = bc;
348
2.15M
  }
349
350
5.25M
  bestmv->as_mv.row = clamp(br * 2, SHRT_MIN, SHRT_MAX);
351
5.25M
  bestmv->as_mv.col = clamp(bc * 2, SHRT_MIN, SHRT_MAX);
352
353
5.25M
  if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) ||
354
5.25M
      (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) {
355
1.10k
    return INT_MAX;
356
1.10k
  }
357
358
5.25M
  return besterr;
359
5.25M
}
360
#undef MVC
361
#undef PRE
362
#undef SP
363
#undef DIST
364
#undef IFMVCV
365
#undef ERR
366
#undef CHECK_BETTER
367
368
int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
369
                                 int_mv *bestmv, int_mv *ref_mv,
370
                                 int error_per_bit,
371
                                 const vp8_variance_fn_ptr_t *vfp,
372
                                 int *mvcost[2], int *distortion,
373
12.5k
                                 unsigned int *sse1) {
374
12.5k
  int bestmse = INT_MAX;
375
12.5k
  int_mv startmv;
376
12.5k
  int_mv this_mv;
377
12.5k
  unsigned char *z = (*(b->base_src) + b->src);
378
12.5k
  int left, right, up, down, diag;
379
12.5k
  unsigned int sse;
380
12.5k
  int whichdir;
381
12.5k
  int thismse;
382
12.5k
  int y_stride;
383
12.5k
  int pre_stride = x->e_mbd.pre.y_stride;
384
12.5k
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
385
386
12.5k
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
387
12.5k
  MACROBLOCKD *xd = &x->e_mbd;
388
12.5k
  unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
389
12.5k
                       bestmv->as_mv.col;
390
12.5k
  unsigned char *y;
391
392
12.5k
  y_stride = 32;
393
  /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
394
12.5k
  vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
395
12.5k
  y = xd->y_buf + y_stride + 1;
396
#else
397
  unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
398
                     bestmv->as_mv.col;
399
  y_stride = pre_stride;
400
#endif
401
402
  /* central mv */
403
12.5k
  bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
404
12.5k
  bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
405
12.5k
  startmv = *bestmv;
406
407
  /* calculate central point error */
408
12.5k
  bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
409
12.5k
  *distortion = bestmse;
410
12.5k
  bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
411
412
  /* go left then right and check error */
413
12.5k
  this_mv.as_mv.row = startmv.as_mv.row;
414
12.5k
  this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
415
  /* "halfpix" horizontal variance */
416
12.5k
  thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
417
12.5k
  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
418
419
12.5k
  if (left < bestmse) {
420
11.4k
    *bestmv = this_mv;
421
11.4k
    bestmse = left;
422
11.4k
    *distortion = thismse;
423
11.4k
    *sse1 = sse;
424
11.4k
  }
425
426
12.5k
  this_mv.as_mv.col += 8;
427
  /* "halfpix" horizontal variance */
428
12.5k
  thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
429
12.5k
  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
430
431
12.5k
  if (right < bestmse) {
432
5.86k
    *bestmv = this_mv;
433
5.86k
    bestmse = right;
434
5.86k
    *distortion = thismse;
435
5.86k
    *sse1 = sse;
436
5.86k
  }
437
438
  /* go up then down and check error */
439
12.5k
  this_mv.as_mv.col = startmv.as_mv.col;
440
12.5k
  this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
441
  /* "halfpix" vertical variance */
442
12.5k
  thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
443
12.5k
  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
444
445
12.5k
  if (up < bestmse) {
446
7.46k
    *bestmv = this_mv;
447
7.46k
    bestmse = up;
448
7.46k
    *distortion = thismse;
449
7.46k
    *sse1 = sse;
450
7.46k
  }
451
452
12.5k
  this_mv.as_mv.row += 8;
453
  /* "halfpix" vertical variance */
454
12.5k
  thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
455
12.5k
  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
456
457
12.5k
  if (down < bestmse) {
458
4.25k
    *bestmv = this_mv;
459
4.25k
    bestmse = down;
460
4.25k
    *distortion = thismse;
461
4.25k
    *sse1 = sse;
462
4.25k
  }
463
464
  /* now check 1 more diagonal */
465
12.5k
  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
466
12.5k
  this_mv = startmv;
467
468
12.5k
  switch (whichdir) {
469
3.53k
    case 0:
470
3.53k
      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
471
3.53k
      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
472
      /* "halfpix" horizontal/vertical variance */
473
3.53k
      thismse =
474
3.53k
          vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
475
3.53k
      break;
476
3.32k
    case 1:
477
3.32k
      this_mv.as_mv.col += 4;
478
3.32k
      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
479
      /* "halfpix" horizontal/vertical variance */
480
3.32k
      thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
481
3.32k
      break;
482
2.75k
    case 2:
483
2.75k
      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
484
2.75k
      this_mv.as_mv.row += 4;
485
      /* "halfpix" horizontal/vertical variance */
486
2.75k
      thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
487
2.75k
      break;
488
2.92k
    case 3:
489
2.92k
    default:
490
2.92k
      this_mv.as_mv.col += 4;
491
2.92k
      this_mv.as_mv.row += 4;
492
      /* "halfpix" horizontal/vertical variance */
493
2.92k
      thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
494
2.92k
      break;
495
12.5k
  }
496
497
12.5k
  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
498
499
12.5k
  if (diag < bestmse) {
500
9.92k
    *bestmv = this_mv;
501
9.92k
    bestmse = diag;
502
9.92k
    *distortion = thismse;
503
9.92k
    *sse1 = sse;
504
9.92k
  }
505
506
  /* time to check quarter pels. */
507
12.5k
  if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride;
508
509
12.5k
  if (bestmv->as_mv.col < startmv.as_mv.col) y--;
510
511
12.5k
  startmv = *bestmv;
512
513
  /* go left then right and check error */
514
12.5k
  this_mv.as_mv.row = startmv.as_mv.row;
515
516
12.5k
  if (startmv.as_mv.col & 7) {
517
11.5k
    this_mv.as_mv.col = startmv.as_mv.col - 2;
518
11.5k
    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
519
11.5k
                       this_mv.as_mv.row & 7, z, b->src_stride, &sse);
520
11.5k
  } else {
521
964
    this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
522
964
    thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
523
964
                       b->src_stride, &sse);
524
964
  }
525
526
12.5k
  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
527
528
12.5k
  if (left < bestmse) {
529
1.20k
    *bestmv = this_mv;
530
1.20k
    bestmse = left;
531
1.20k
    *distortion = thismse;
532
1.20k
    *sse1 = sse;
533
1.20k
  }
534
535
12.5k
  this_mv.as_mv.col += 4;
536
12.5k
  thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
537
12.5k
                     z, b->src_stride, &sse);
538
12.5k
  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
539
540
12.5k
  if (right < bestmse) {
541
944
    *bestmv = this_mv;
542
944
    bestmse = right;
543
944
    *distortion = thismse;
544
944
    *sse1 = sse;
545
944
  }
546
547
  /* go up then down and check error */
548
12.5k
  this_mv.as_mv.col = startmv.as_mv.col;
549
550
12.5k
  if (startmv.as_mv.row & 7) {
551
10.1k
    this_mv.as_mv.row = startmv.as_mv.row - 2;
552
10.1k
    thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
553
10.1k
                       this_mv.as_mv.row & 7, z, b->src_stride, &sse);
554
10.1k
  } else {
555
2.40k
    this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
556
2.40k
    thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
557
2.40k
                       b->src_stride, &sse);
558
2.40k
  }
559
560
12.5k
  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
561
562
12.5k
  if (up < bestmse) {
563
1.81k
    *bestmv = this_mv;
564
1.81k
    bestmse = up;
565
1.81k
    *distortion = thismse;
566
1.81k
    *sse1 = sse;
567
1.81k
  }
568
569
12.5k
  this_mv.as_mv.row += 4;
570
12.5k
  thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7,
571
12.5k
                     z, b->src_stride, &sse);
572
12.5k
  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
573
574
12.5k
  if (down < bestmse) {
575
1.61k
    *bestmv = this_mv;
576
1.61k
    bestmse = down;
577
1.61k
    *distortion = thismse;
578
1.61k
    *sse1 = sse;
579
1.61k
  }
580
581
  /* now check 1 more diagonal */
582
12.5k
  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
583
584
12.5k
  this_mv = startmv;
585
586
12.5k
  switch (whichdir) {
587
3.45k
    case 0:
588
589
3.45k
      if (startmv.as_mv.row & 7) {
590
2.49k
        this_mv.as_mv.row -= 2;
591
592
2.49k
        if (startmv.as_mv.col & 7) {
593
2.43k
          this_mv.as_mv.col -= 2;
594
2.43k
          thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
595
2.43k
                             this_mv.as_mv.row & 7, z, b->src_stride, &sse);
596
2.43k
        } else {
597
62
          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
598
62
          thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
599
62
                             b->src_stride, &sse);
600
62
        }
601
2.49k
      } else {
602
957
        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
603
604
957
        if (startmv.as_mv.col & 7) {
605
616
          this_mv.as_mv.col -= 2;
606
616
          thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6,
607
616
                             z, b->src_stride, &sse);
608
616
        } else {
609
341
          this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
610
341
          thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride,
611
341
                             &sse);
612
341
        }
613
957
      }
614
615
3.45k
      break;
616
3.28k
    case 1:
617
3.28k
      this_mv.as_mv.col += 2;
618
619
3.28k
      if (startmv.as_mv.row & 7) {
620
2.50k
        this_mv.as_mv.row -= 2;
621
2.50k
        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
622
2.50k
                           this_mv.as_mv.row & 7, z, b->src_stride, &sse);
623
2.50k
      } else {
624
776
        this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6;
625
776
        thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z,
626
776
                           b->src_stride, &sse);
627
776
      }
628
629
3.28k
      break;
630
2.88k
    case 2:
631
2.88k
      this_mv.as_mv.row += 2;
632
633
2.88k
      if (startmv.as_mv.col & 7) {
634
2.75k
        this_mv.as_mv.col -= 2;
635
2.75k
        thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
636
2.75k
                           this_mv.as_mv.row & 7, z, b->src_stride, &sse);
637
2.75k
      } else {
638
131
        this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6;
639
131
        thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z,
640
131
                           b->src_stride, &sse);
641
131
      }
642
643
2.88k
      break;
644
2.92k
    case 3:
645
2.92k
      this_mv.as_mv.col += 2;
646
2.92k
      this_mv.as_mv.row += 2;
647
2.92k
      thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7,
648
2.92k
                         this_mv.as_mv.row & 7, z, b->src_stride, &sse);
649
2.92k
      break;
650
12.5k
  }
651
652
12.5k
  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
653
654
12.5k
  if (diag < bestmse) {
655
512
    *bestmv = this_mv;
656
512
    bestmse = diag;
657
512
    *distortion = thismse;
658
512
    *sse1 = sse;
659
512
  }
660
661
12.5k
  return bestmse;
662
12.5k
}
663
664
int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
665
                                  int_mv *bestmv, int_mv *ref_mv,
666
                                  int error_per_bit,
667
                                  const vp8_variance_fn_ptr_t *vfp,
668
                                  int *mvcost[2], int *distortion,
669
0
                                  unsigned int *sse1) {
670
0
  int bestmse = INT_MAX;
671
0
  int_mv startmv;
672
0
  int_mv this_mv;
673
0
  unsigned char *z = (*(b->base_src) + b->src);
674
0
  int left, right, up, down, diag;
675
0
  unsigned int sse;
676
0
  int whichdir;
677
0
  int thismse;
678
0
  int y_stride;
679
0
  int pre_stride = x->e_mbd.pre.y_stride;
680
0
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
681
682
0
#if VPX_ARCH_X86 || VPX_ARCH_X86_64
683
0
  MACROBLOCKD *xd = &x->e_mbd;
684
0
  unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
685
0
                       bestmv->as_mv.col;
686
0
  unsigned char *y;
687
688
0
  y_stride = 32;
689
  /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */
690
0
  vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18);
691
0
  y = xd->y_buf + y_stride + 1;
692
#else
693
  unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
694
                     bestmv->as_mv.col;
695
  y_stride = pre_stride;
696
#endif
697
698
  /* central mv */
699
0
  bestmv->as_mv.row = clamp(bestmv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
700
0
  bestmv->as_mv.col = clamp(bestmv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
701
0
  startmv = *bestmv;
702
703
  /* calculate central point error */
704
0
  bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1);
705
0
  *distortion = bestmse;
706
0
  bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
707
708
  /* go left then right and check error */
709
0
  this_mv.as_mv.row = startmv.as_mv.row;
710
0
  this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4);
711
  /* "halfpix" horizontal variance */
712
0
  thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse);
713
0
  left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
714
715
0
  if (left < bestmse) {
716
0
    *bestmv = this_mv;
717
0
    bestmse = left;
718
0
    *distortion = thismse;
719
0
    *sse1 = sse;
720
0
  }
721
722
0
  this_mv.as_mv.col += 8;
723
  /* "halfpix" horizontal variance */
724
0
  thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse);
725
0
  right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
726
727
0
  if (right < bestmse) {
728
0
    *bestmv = this_mv;
729
0
    bestmse = right;
730
0
    *distortion = thismse;
731
0
    *sse1 = sse;
732
0
  }
733
734
  /* go up then down and check error */
735
0
  this_mv.as_mv.col = startmv.as_mv.col;
736
0
  this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4);
737
  /* "halfpix" vertical variance */
738
0
  thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse);
739
0
  up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
740
741
0
  if (up < bestmse) {
742
0
    *bestmv = this_mv;
743
0
    bestmse = up;
744
0
    *distortion = thismse;
745
0
    *sse1 = sse;
746
0
  }
747
748
0
  this_mv.as_mv.row += 8;
749
  /* "halfpix" vertical variance */
750
0
  thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse);
751
0
  down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
752
753
0
  if (down < bestmse) {
754
0
    *bestmv = this_mv;
755
0
    bestmse = down;
756
0
    *distortion = thismse;
757
0
    *sse1 = sse;
758
0
  }
759
760
  /* now check 1 more diagonal - */
761
0
  whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
762
0
  this_mv = startmv;
763
764
0
  switch (whichdir) {
765
0
    case 0:
766
0
      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
767
0
      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
768
      /* "halfpix" horizontal/vertical variance */
769
0
      thismse =
770
0
          vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
771
0
      break;
772
0
    case 1:
773
0
      this_mv.as_mv.col += 4;
774
0
      this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4;
775
      /* "halfpix" horizontal/vertical variance */
776
0
      thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse);
777
0
      break;
778
0
    case 2:
779
0
      this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4;
780
0
      this_mv.as_mv.row += 4;
781
      /* "halfpix" horizontal/vertical variance */
782
0
      thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse);
783
0
      break;
784
0
    case 3:
785
0
    default:
786
0
      this_mv.as_mv.col += 4;
787
0
      this_mv.as_mv.row += 4;
788
      /* "halfpix" horizontal/vertical variance */
789
0
      thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse);
790
0
      break;
791
0
  }
792
793
0
  diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
794
795
0
  if (diag < bestmse) {
796
0
    *bestmv = this_mv;
797
0
    bestmse = diag;
798
0
    *distortion = thismse;
799
0
    *sse1 = sse;
800
0
  }
801
802
0
  return bestmse;
803
0
}
804
805
#define CHECK_BOUNDS(range)                    \
806
53.5k
  do {                                         \
807
53.5k
    all_in = 1;                                \
808
53.5k
    all_in &= ((br - range) >= x->mv_row_min); \
809
53.5k
    all_in &= ((br + range) <= x->mv_row_max); \
810
53.5k
    all_in &= ((bc - range) >= x->mv_col_min); \
811
53.5k
    all_in &= ((bc + range) <= x->mv_col_max); \
812
53.5k
  } while (0)
813
814
#define CHECK_POINT                                  \
815
1.03k
  {                                                  \
816
1.03k
    if (this_mv.as_mv.col < x->mv_col_min) continue; \
817
1.03k
    if (this_mv.as_mv.col > x->mv_col_max) continue; \
818
980
    if (this_mv.as_mv.row < x->mv_row_min) continue; \
819
970
    if (this_mv.as_mv.row > x->mv_row_max) continue; \
820
644
  }
821
822
#define CHECK_BETTER                                                     \
823
221k
  do {                                                                   \
824
221k
    if (thissad < bestsad) {                                             \
825
55.8k
      thissad +=                                                         \
826
55.8k
          mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \
827
55.8k
      if (thissad < bestsad) {                                           \
828
40.6k
        bestsad = thissad;                                               \
829
40.6k
        best_site = i;                                                   \
830
40.6k
      }                                                                  \
831
55.8k
    }                                                                    \
832
221k
  } while (0)
833
834
static const MV next_chkpts[6][3] = {
835
  { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } },
836
  { { 1, -2 }, { 2, 0 }, { 1, 2 } },    { { 2, 0 }, { 1, 2 }, { -1, 2 } },
837
  { { 1, 2 }, { -1, 2 }, { -2, 0 } },   { { -1, 2 }, { -2, 0 }, { -1, -2 } }
838
};
839
840
int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
841
                   int_mv *best_mv, int search_param, int sad_per_bit,
842
                   const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2],
843
12.5k
                   int_mv *center_mv) {
844
12.5k
  MV hex[6] = {
845
12.5k
    { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 }
846
12.5k
  };
847
12.5k
  MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } };
848
12.5k
  int i, j;
849
850
12.5k
  unsigned char *what = (*(b->base_src) + b->src);
851
12.5k
  int what_stride = b->src_stride;
852
12.5k
  int pre_stride = x->e_mbd.pre.y_stride;
853
12.5k
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
854
855
12.5k
  int in_what_stride = pre_stride;
856
12.5k
  int br, bc;
857
12.5k
  int_mv this_mv;
858
12.5k
  unsigned int bestsad;
859
12.5k
  unsigned int thissad;
860
12.5k
  unsigned char *base_offset;
861
12.5k
  unsigned char *this_offset;
862
12.5k
  int k = -1;
863
12.5k
  int all_in;
864
12.5k
  int best_site = -1;
865
12.5k
  int hex_range = 127;
866
12.5k
  int dia_range = 8;
867
868
12.5k
  int_mv fcenter_mv;
869
12.5k
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
870
12.5k
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
871
872
  /* adjust ref_mv to make sure it is within MV range */
873
12.5k
  vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
874
12.5k
               x->mv_row_max);
875
12.5k
  br = ref_mv->as_mv.row;
876
12.5k
  bc = ref_mv->as_mv.col;
877
878
  /* Work out the start point for the search */
879
12.5k
  base_offset = (unsigned char *)(base_pre + d->offset);
880
12.5k
  this_offset = base_offset + (br * (pre_stride)) + bc;
881
12.5k
  this_mv.as_mv.row = br;
882
12.5k
  this_mv.as_mv.col = bc;
883
12.5k
  bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) +
884
12.5k
            mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
885
886
#if CONFIG_MULTI_RES_ENCODING
887
  /* Lower search range based on prediction info */
888
  if (search_param >= 6)
889
    goto cal_neighbors;
890
  else if (search_param >= 5)
891
    hex_range = 4;
892
  else if (search_param >= 4)
893
    hex_range = 6;
894
  else if (search_param >= 3)
895
    hex_range = 15;
896
  else if (search_param >= 2)
897
    hex_range = 31;
898
  else if (search_param >= 1)
899
    hex_range = 63;
900
901
  dia_range = 8;
902
#else
903
12.5k
  (void)search_param;
904
12.5k
#endif
905
906
  /* hex search */
907
12.5k
  CHECK_BOUNDS(2);
908
909
12.5k
  if (all_in) {
910
87.6k
    for (i = 0; i < 6; ++i) {
911
75.1k
      this_mv.as_mv.row = br + hex[i].row;
912
75.1k
      this_mv.as_mv.col = bc + hex[i].col;
913
75.1k
      this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
914
75.1k
                    this_mv.as_mv.col;
915
75.1k
      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
916
75.1k
      CHECK_BETTER;
917
75.1k
    }
918
12.5k
  } else {
919
168
    for (i = 0; i < 6; ++i) {
920
144
      this_mv.as_mv.row = br + hex[i].row;
921
144
      this_mv.as_mv.col = bc + hex[i].col;
922
144
      CHECK_POINT
923
104
      this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) +
924
104
                    this_mv.as_mv.col;
925
104
      thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
926
104
      CHECK_BETTER;
927
104
    }
928
24
  }
929
930
12.5k
  if (best_site == -1) {
931
3.51k
    goto cal_neighbors;
932
9.03k
  } else {
933
9.03k
    br += hex[best_site].row;
934
9.03k
    bc += hex[best_site].col;
935
9.03k
    k = best_site;
936
9.03k
  }
937
938
17.7k
  for (j = 1; j < hex_range; ++j) {
939
17.7k
    best_site = -1;
940
17.7k
    CHECK_BOUNDS(2);
941
942
17.7k
    if (all_in) {
943
70.1k
      for (i = 0; i < 3; ++i) {
944
52.6k
        this_mv.as_mv.row = br + next_chkpts[k][i].row;
945
52.6k
        this_mv.as_mv.col = bc + next_chkpts[k][i].col;
946
52.6k
        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
947
52.6k
                      this_mv.as_mv.col;
948
52.6k
        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
949
52.6k
        CHECK_BETTER;
950
52.6k
      }
951
17.5k
    } else {
952
896
      for (i = 0; i < 3; ++i) {
953
672
        this_mv.as_mv.row = br + next_chkpts[k][i].row;
954
672
        this_mv.as_mv.col = bc + next_chkpts[k][i].col;
955
672
        CHECK_POINT
956
378
        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
957
378
                      this_mv.as_mv.col;
958
378
        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
959
378
        CHECK_BETTER;
960
378
      }
961
224
    }
962
963
17.7k
    if (best_site == -1) {
964
9.03k
      break;
965
9.03k
    } else {
966
8.73k
      br += next_chkpts[k][best_site].row;
967
8.73k
      bc += next_chkpts[k][best_site].col;
968
8.73k
      k += 5 + best_site;
969
8.73k
      if (k >= 12) {
970
592
        k -= 12;
971
8.14k
      } else if (k >= 6) {
972
7.60k
        k -= 6;
973
7.60k
      }
974
8.73k
    }
975
17.7k
  }
976
977
/* check 4 1-away neighbors */
978
12.5k
cal_neighbors:
979
23.2k
  for (j = 0; j < dia_range; ++j) {
980
23.2k
    best_site = -1;
981
23.2k
    CHECK_BOUNDS(1);
982
983
23.2k
    if (all_in) {
984
115k
      for (i = 0; i < 4; ++i) {
985
92.7k
        this_mv.as_mv.row = br + neighbors[i].row;
986
92.7k
        this_mv.as_mv.col = bc + neighbors[i].col;
987
92.7k
        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
988
92.7k
                      this_mv.as_mv.col;
989
92.7k
        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
990
92.7k
        CHECK_BETTER;
991
92.7k
      }
992
23.1k
    } else {
993
270
      for (i = 0; i < 4; ++i) {
994
216
        this_mv.as_mv.row = br + neighbors[i].row;
995
216
        this_mv.as_mv.col = bc + neighbors[i].col;
996
216
        CHECK_POINT
997
162
        this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) +
998
162
                      this_mv.as_mv.col;
999
162
        thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride);
1000
162
        CHECK_BETTER;
1001
162
      }
1002
54
    }
1003
1004
23.2k
    if (best_site == -1) {
1005
12.4k
      break;
1006
12.4k
    } else {
1007
10.7k
      br += neighbors[best_site].row;
1008
10.7k
      bc += neighbors[best_site].col;
1009
10.7k
    }
1010
23.2k
  }
1011
1012
12.5k
  best_mv->as_mv.row = br;
1013
12.5k
  best_mv->as_mv.col = bc;
1014
1015
12.5k
  return bestsad;
1016
9.03k
}
1017
#undef CHECK_BOUNDS
1018
#undef CHECK_POINT
1019
#undef CHECK_BETTER
1020
1021
int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1022
                             int_mv *best_mv, int search_param, int sad_per_bit,
1023
                             int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1024
0
                             int *mvcost[2], int_mv *center_mv) {
1025
0
  int i, j, step;
1026
1027
0
  unsigned char *what = (*(b->base_src) + b->src);
1028
0
  int what_stride = b->src_stride;
1029
0
  unsigned char *in_what;
1030
0
  int pre_stride = x->e_mbd.pre.y_stride;
1031
0
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1032
0
  int in_what_stride = pre_stride;
1033
0
  unsigned char *best_address;
1034
1035
0
  int tot_steps;
1036
0
  int_mv this_mv;
1037
1038
0
  unsigned int bestsad;
1039
0
  unsigned int thissad;
1040
0
  int best_site = 0;
1041
0
  int last_site = 0;
1042
1043
0
  int ref_row;
1044
0
  int ref_col;
1045
0
  int this_row_offset;
1046
0
  int this_col_offset;
1047
0
  search_site *ss;
1048
1049
0
  unsigned char *check_here;
1050
1051
0
  int *mvsadcost[2];
1052
0
  int_mv fcenter_mv;
1053
1054
0
  mvsadcost[0] = x->mvsadcost[0];
1055
0
  mvsadcost[1] = x->mvsadcost[1];
1056
0
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1057
0
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1058
1059
0
  vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1060
0
               x->mv_row_max);
1061
0
  ref_row = ref_mv->as_mv.row;
1062
0
  ref_col = ref_mv->as_mv.col;
1063
0
  *num00 = 0;
1064
0
  best_mv->as_mv.row = ref_row;
1065
0
  best_mv->as_mv.col = ref_col;
1066
1067
  /* Work out the start point for the search */
1068
0
  in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1069
0
                              ref_col);
1070
0
  best_address = in_what;
1071
1072
  /* Check the starting position */
1073
0
  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1074
0
            mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1075
1076
  /* search_param determines the length of the initial step and hence
1077
   * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel :
1078
   * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1079
   */
1080
0
  ss = &x->ss[search_param * x->searches_per_step];
1081
0
  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1082
1083
0
  i = 1;
1084
1085
0
  for (step = 0; step < tot_steps; ++step) {
1086
0
    for (j = 0; j < x->searches_per_step; ++j) {
1087
      /* Trap illegal vectors */
1088
0
      this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1089
0
      this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1090
1091
0
      if ((this_col_offset > x->mv_col_min) &&
1092
0
          (this_col_offset < x->mv_col_max) &&
1093
0
          (this_row_offset > x->mv_row_min) &&
1094
0
          (this_row_offset < x->mv_row_max))
1095
1096
0
      {
1097
0
        check_here = ss[i].offset + best_address;
1098
0
        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1099
1100
0
        if (thissad < bestsad) {
1101
0
          this_mv.as_mv.row = this_row_offset;
1102
0
          this_mv.as_mv.col = this_col_offset;
1103
0
          thissad +=
1104
0
              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1105
1106
0
          if (thissad < bestsad) {
1107
0
            bestsad = thissad;
1108
0
            best_site = i;
1109
0
          }
1110
0
        }
1111
0
      }
1112
1113
0
      i++;
1114
0
    }
1115
1116
0
    if (best_site != last_site) {
1117
0
      best_mv->as_mv.row += ss[best_site].mv.row;
1118
0
      best_mv->as_mv.col += ss[best_site].mv.col;
1119
0
      best_address += ss[best_site].offset;
1120
0
      last_site = best_site;
1121
0
    } else if (best_address == in_what) {
1122
0
      (*num00)++;
1123
0
    }
1124
0
  }
1125
1126
0
  this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1127
0
  this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1128
1129
0
  return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1130
0
         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1131
0
}
1132
1133
#if HAVE_SSE2 || HAVE_MSA || HAVE_LSX
1134
int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1135
                             int_mv *best_mv, int search_param, int sad_per_bit,
1136
                             int *num00, vp8_variance_fn_ptr_t *fn_ptr,
1137
18.9M
                             int *mvcost[2], int_mv *center_mv) {
1138
18.9M
  int i, j, step;
1139
1140
18.9M
  unsigned char *what = (*(b->base_src) + b->src);
1141
18.9M
  int what_stride = b->src_stride;
1142
18.9M
  unsigned char *in_what;
1143
18.9M
  int pre_stride = x->e_mbd.pre.y_stride;
1144
18.9M
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1145
18.9M
  int in_what_stride = pre_stride;
1146
18.9M
  unsigned char *best_address;
1147
1148
18.9M
  int tot_steps;
1149
18.9M
  int_mv this_mv;
1150
1151
18.9M
  unsigned int bestsad;
1152
18.9M
  unsigned int thissad;
1153
18.9M
  int best_site = 0;
1154
18.9M
  int last_site = 0;
1155
1156
18.9M
  int ref_row;
1157
18.9M
  int ref_col;
1158
18.9M
  int this_row_offset;
1159
18.9M
  int this_col_offset;
1160
18.9M
  search_site *ss;
1161
1162
18.9M
  unsigned char *check_here;
1163
1164
18.9M
  int *mvsadcost[2];
1165
18.9M
  int_mv fcenter_mv;
1166
1167
18.9M
  mvsadcost[0] = x->mvsadcost[0];
1168
18.9M
  mvsadcost[1] = x->mvsadcost[1];
1169
18.9M
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1170
18.9M
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1171
1172
18.9M
  vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min,
1173
18.9M
               x->mv_row_max);
1174
18.9M
  ref_row = ref_mv->as_mv.row;
1175
18.9M
  ref_col = ref_mv->as_mv.col;
1176
18.9M
  *num00 = 0;
1177
18.9M
  best_mv->as_mv.row = ref_row;
1178
18.9M
  best_mv->as_mv.col = ref_col;
1179
1180
  /* Work out the start point for the search */
1181
18.9M
  in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) +
1182
18.9M
                              ref_col);
1183
18.9M
  best_address = in_what;
1184
1185
  /* Check the starting position */
1186
18.9M
  bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
1187
18.9M
            mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1188
1189
  /* search_param determines the length of the initial step and hence the
1190
   * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 =
1191
   * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
1192
   */
1193
18.9M
  ss = &x->ss[search_param * x->searches_per_step];
1194
18.9M
  tot_steps = (x->ss_count / x->searches_per_step) - search_param;
1195
1196
18.9M
  i = 1;
1197
1198
90.4M
  for (step = 0; step < tot_steps; ++step) {
1199
71.5M
    int all_in = 1, t;
1200
1201
    /* To know if all neighbor points are within the bounds, 4 bounds
1202
     * checking are enough instead of checking 4 bounds for each
1203
     * points.
1204
     */
1205
71.5M
    all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min);
1206
71.5M
    all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max);
1207
71.5M
    all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min);
1208
71.5M
    all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max);
1209
1210
71.5M
    if (all_in) {
1211
55.6M
      unsigned int sad_array[4];
1212
1213
167M
      for (j = 0; j < x->searches_per_step; j += 4) {
1214
111M
        const unsigned char *block_offset[4];
1215
1216
556M
        for (t = 0; t < 4; ++t) {
1217
445M
          block_offset[t] = ss[i + t].offset + best_address;
1218
445M
        }
1219
1220
111M
        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1221
111M
                       sad_array);
1222
1223
556M
        for (t = 0; t < 4; t++, i++) {
1224
445M
          if (sad_array[t] < bestsad) {
1225
148M
            this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row;
1226
148M
            this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col;
1227
148M
            sad_array[t] +=
1228
148M
                mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1229
1230
148M
            if (sad_array[t] < bestsad) {
1231
49.1M
              bestsad = sad_array[t];
1232
49.1M
              best_site = i;
1233
49.1M
            }
1234
148M
          }
1235
445M
        }
1236
111M
      }
1237
55.6M
    } else {
1238
142M
      for (j = 0; j < x->searches_per_step; ++j) {
1239
        /* Trap illegal vectors */
1240
126M
        this_row_offset = best_mv->as_mv.row + ss[i].mv.row;
1241
126M
        this_col_offset = best_mv->as_mv.col + ss[i].mv.col;
1242
1243
126M
        if ((this_col_offset > x->mv_col_min) &&
1244
126M
            (this_col_offset < x->mv_col_max) &&
1245
126M
            (this_row_offset > x->mv_row_min) &&
1246
126M
            (this_row_offset < x->mv_row_max)) {
1247
48.0M
          check_here = ss[i].offset + best_address;
1248
48.0M
          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1249
1250
48.0M
          if (thissad < bestsad) {
1251
18.6M
            this_mv.as_mv.row = this_row_offset;
1252
18.6M
            this_mv.as_mv.col = this_col_offset;
1253
18.6M
            thissad +=
1254
18.6M
                mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1255
1256
18.6M
            if (thissad < bestsad) {
1257
8.04M
              bestsad = thissad;
1258
8.04M
              best_site = i;
1259
8.04M
            }
1260
18.6M
          }
1261
48.0M
        }
1262
126M
        i++;
1263
126M
      }
1264
15.8M
    }
1265
1266
71.5M
    if (best_site != last_site) {
1267
37.6M
      best_mv->as_mv.row += ss[best_site].mv.row;
1268
37.6M
      best_mv->as_mv.col += ss[best_site].mv.col;
1269
37.6M
      best_address += ss[best_site].offset;
1270
37.6M
      last_site = best_site;
1271
37.6M
    } else if (best_address == in_what) {
1272
14.8M
      (*num00)++;
1273
14.8M
    }
1274
71.5M
  }
1275
1276
18.9M
  this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1277
18.9M
  this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1278
1279
18.9M
  return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1280
18.9M
         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1281
18.9M
}
1282
#endif  // HAVE_SSE2 || HAVE_MSA || HAVE_LSX
1283
1284
int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
1285
                        int sad_per_bit, int distance,
1286
                        vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2],
1287
0
                        int_mv *center_mv) {
1288
0
  unsigned char *what = (*(b->base_src) + b->src);
1289
0
  int what_stride = b->src_stride;
1290
0
  unsigned char *in_what;
1291
0
  int pre_stride = x->e_mbd.pre.y_stride;
1292
0
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1293
0
  int in_what_stride = pre_stride;
1294
0
  int mv_stride = pre_stride;
1295
0
  unsigned char *bestaddress;
1296
0
  int_mv *best_mv = &d->bmi.mv;
1297
0
  int_mv this_mv;
1298
0
  unsigned int bestsad;
1299
0
  unsigned int thissad;
1300
0
  int r, c;
1301
1302
0
  unsigned char *check_here;
1303
1304
0
  int ref_row = ref_mv->as_mv.row;
1305
0
  int ref_col = ref_mv->as_mv.col;
1306
1307
0
  int row_min = ref_row - distance;
1308
0
  int row_max = ref_row + distance;
1309
0
  int col_min = ref_col - distance;
1310
0
  int col_max = ref_col + distance;
1311
1312
0
  int *mvsadcost[2];
1313
0
  int_mv fcenter_mv;
1314
1315
0
  mvsadcost[0] = x->mvsadcost[0];
1316
0
  mvsadcost[1] = x->mvsadcost[1];
1317
0
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1318
0
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1319
1320
  /* Work out the mid point for the search */
1321
0
  in_what = base_pre + d->offset;
1322
0
  bestaddress = in_what + (ref_row * pre_stride) + ref_col;
1323
1324
0
  best_mv->as_mv.row = ref_row;
1325
0
  best_mv->as_mv.col = ref_col;
1326
1327
  /* Baseline value at the centre */
1328
0
  bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) +
1329
0
            mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1330
1331
  /* Apply further limits to prevent us looking using vectors that stretch
1332
   * beyond the UMV border
1333
   */
1334
0
  if (col_min < x->mv_col_min) col_min = x->mv_col_min;
1335
1336
0
  if (col_max > x->mv_col_max) col_max = x->mv_col_max;
1337
1338
0
  if (row_min < x->mv_row_min) row_min = x->mv_row_min;
1339
1340
0
  if (row_max > x->mv_row_max) row_max = x->mv_row_max;
1341
1342
0
  for (r = row_min; r < row_max; ++r) {
1343
0
    this_mv.as_mv.row = r;
1344
0
    check_here = r * mv_stride + in_what + col_min;
1345
1346
0
    for (c = col_min; c < col_max; ++c) {
1347
0
      thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1348
1349
0
      if (thissad < bestsad) {
1350
0
        this_mv.as_mv.col = c;
1351
0
        thissad +=
1352
0
            mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit);
1353
1354
0
        if (thissad < bestsad) {
1355
0
          bestsad = thissad;
1356
0
          best_mv->as_mv.row = r;
1357
0
          best_mv->as_mv.col = c;
1358
0
          bestaddress = check_here;
1359
0
        }
1360
0
      }
1361
1362
0
      check_here++;
1363
0
    }
1364
0
  }
1365
1366
0
  this_mv.as_mv.row = clamp(best_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1367
0
  this_mv.as_mv.col = clamp(best_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1368
1369
0
  return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) +
1370
0
         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1371
0
}
1372
1373
int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1374
                              int_mv *ref_mv, int error_per_bit,
1375
                              int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1376
0
                              int *mvcost[2], int_mv *center_mv) {
1377
0
  MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1378
0
  int i, j;
1379
0
  short this_row_offset, this_col_offset;
1380
1381
0
  int what_stride = b->src_stride;
1382
0
  int pre_stride = x->e_mbd.pre.y_stride;
1383
0
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1384
0
  int in_what_stride = pre_stride;
1385
0
  unsigned char *what = (*(b->base_src) + b->src);
1386
0
  unsigned char *best_address =
1387
0
      (unsigned char *)(base_pre + d->offset +
1388
0
                        (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1389
0
  unsigned char *check_here;
1390
0
  int_mv this_mv;
1391
0
  unsigned int bestsad;
1392
0
  unsigned int thissad;
1393
1394
0
  int *mvsadcost[2];
1395
0
  int_mv fcenter_mv;
1396
1397
0
  mvsadcost[0] = x->mvsadcost[0];
1398
0
  mvsadcost[1] = x->mvsadcost[1];
1399
0
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1400
0
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1401
1402
0
  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1403
0
            mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1404
1405
0
  for (i = 0; i < search_range; ++i) {
1406
0
    int best_site = -1;
1407
1408
0
    for (j = 0; j < 4; ++j) {
1409
0
      this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1410
0
      this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1411
1412
0
      if ((this_col_offset > x->mv_col_min) &&
1413
0
          (this_col_offset < x->mv_col_max) &&
1414
0
          (this_row_offset > x->mv_row_min) &&
1415
0
          (this_row_offset < x->mv_row_max)) {
1416
0
        check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1417
0
                     best_address;
1418
0
        thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1419
1420
0
        if (thissad < bestsad) {
1421
0
          this_mv.as_mv.row = this_row_offset;
1422
0
          this_mv.as_mv.col = this_col_offset;
1423
0
          thissad +=
1424
0
              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1425
1426
0
          if (thissad < bestsad) {
1427
0
            bestsad = thissad;
1428
0
            best_site = j;
1429
0
          }
1430
0
        }
1431
0
      }
1432
0
    }
1433
1434
0
    if (best_site == -1) {
1435
0
      break;
1436
0
    } else {
1437
0
      ref_mv->as_mv.row += neighbors[best_site].row;
1438
0
      ref_mv->as_mv.col += neighbors[best_site].col;
1439
0
      best_address += (neighbors[best_site].row) * in_what_stride +
1440
0
                      neighbors[best_site].col;
1441
0
    }
1442
0
  }
1443
1444
0
  this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1445
0
  this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1446
1447
0
  return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1448
0
         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1449
0
}
1450
1451
#if HAVE_SSE2 || HAVE_MSA
1452
int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
1453
                              int_mv *ref_mv, int error_per_bit,
1454
                              int search_range, vp8_variance_fn_ptr_t *fn_ptr,
1455
503k
                              int *mvcost[2], int_mv *center_mv) {
1456
503k
  MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
1457
503k
  int i, j;
1458
503k
  short this_row_offset, this_col_offset;
1459
1460
503k
  int what_stride = b->src_stride;
1461
503k
  int pre_stride = x->e_mbd.pre.y_stride;
1462
503k
  unsigned char *base_pre = x->e_mbd.pre.y_buffer;
1463
503k
  int in_what_stride = pre_stride;
1464
503k
  unsigned char *what = (*(b->base_src) + b->src);
1465
503k
  unsigned char *best_address =
1466
503k
      (unsigned char *)(base_pre + d->offset +
1467
503k
                        (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col);
1468
503k
  unsigned char *check_here;
1469
503k
  int_mv this_mv;
1470
503k
  unsigned int bestsad;
1471
503k
  unsigned int thissad;
1472
1473
503k
  int *mvsadcost[2];
1474
503k
  int_mv fcenter_mv;
1475
1476
503k
  mvsadcost[0] = x->mvsadcost[0];
1477
503k
  mvsadcost[1] = x->mvsadcost[1];
1478
503k
  fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3;
1479
503k
  fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3;
1480
1481
503k
  bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) +
1482
503k
            mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
1483
1484
642k
  for (i = 0; i < search_range; ++i) {
1485
635k
    int best_site = -1;
1486
635k
    int all_in = 1;
1487
1488
635k
    all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min);
1489
635k
    all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max);
1490
635k
    all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min);
1491
635k
    all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max);
1492
1493
635k
    if (all_in) {
1494
550k
      unsigned int sad_array[4];
1495
550k
      const unsigned char *block_offset[4];
1496
550k
      block_offset[0] = best_address - in_what_stride;
1497
550k
      block_offset[1] = best_address - 1;
1498
550k
      block_offset[2] = best_address + 1;
1499
550k
      block_offset[3] = best_address + in_what_stride;
1500
1501
550k
      fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1502
550k
                     sad_array);
1503
1504
2.75M
      for (j = 0; j < 4; ++j) {
1505
2.20M
        if (sad_array[j] < bestsad) {
1506
767k
          this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row;
1507
767k
          this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col;
1508
767k
          sad_array[j] +=
1509
767k
              mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1510
1511
767k
          if (sad_array[j] < bestsad) {
1512
134k
            bestsad = sad_array[j];
1513
134k
            best_site = j;
1514
134k
          }
1515
767k
        }
1516
2.20M
      }
1517
550k
    } else {
1518
425k
      for (j = 0; j < 4; ++j) {
1519
340k
        this_row_offset = ref_mv->as_mv.row + neighbors[j].row;
1520
340k
        this_col_offset = ref_mv->as_mv.col + neighbors[j].col;
1521
1522
340k
        if ((this_col_offset > x->mv_col_min) &&
1523
340k
            (this_col_offset < x->mv_col_max) &&
1524
340k
            (this_row_offset > x->mv_row_min) &&
1525
340k
            (this_row_offset < x->mv_row_max)) {
1526
252k
          check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col +
1527
252k
                       best_address;
1528
252k
          thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
1529
1530
252k
          if (thissad < bestsad) {
1531
94.4k
            this_mv.as_mv.row = this_row_offset;
1532
94.4k
            this_mv.as_mv.col = this_col_offset;
1533
94.4k
            thissad +=
1534
94.4k
                mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
1535
1536
94.4k
            if (thissad < bestsad) {
1537
11.1k
              bestsad = thissad;
1538
11.1k
              best_site = j;
1539
11.1k
            }
1540
94.4k
          }
1541
252k
        }
1542
340k
      }
1543
85.1k
    }
1544
1545
635k
    if (best_site == -1) {
1546
496k
      break;
1547
496k
    } else {
1548
139k
      ref_mv->as_mv.row += neighbors[best_site].row;
1549
139k
      ref_mv->as_mv.col += neighbors[best_site].col;
1550
139k
      best_address += (neighbors[best_site].row) * in_what_stride +
1551
139k
                      neighbors[best_site].col;
1552
139k
    }
1553
635k
  }
1554
1555
503k
  this_mv.as_mv.row = clamp(ref_mv->as_mv.row * 8, SHRT_MIN, SHRT_MAX);
1556
503k
  this_mv.as_mv.col = clamp(ref_mv->as_mv.col * 8, SHRT_MIN, SHRT_MAX);
1557
1558
503k
  return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) +
1559
503k
         mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
1560
503k
}
1561
#endif  // HAVE_SSE2 || HAVE_MSA