Coverage Report

Created: 2024-09-06 07:53

/src/libvpx/vp9/encoder/vp9_mcomp.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3
 *
4
 *  Use of this source code is governed by a BSD-style license
5
 *  that can be found in the LICENSE file in the root of the source
6
 *  tree. An additional intellectual property rights grant can be found
7
 *  in the file PATENTS.  All contributing project authors may
8
 *  be found in the AUTHORS file in the root of the source tree.
9
 */
10
11
#include <assert.h>
12
#include <limits.h>
13
#include <math.h>
14
#include <stdio.h>
15
16
#include "./vpx_config.h"
17
#include "./vpx_dsp_rtcd.h"
18
19
#include "vpx_dsp/vpx_dsp_common.h"
20
#include "vpx_mem/vpx_mem.h"
21
#include "vpx_ports/mem.h"
22
23
#include "vp9/common/vp9_common.h"
24
#include "vp9/common/vp9_mvref_common.h"
25
#include "vp9/common/vp9_reconinter.h"
26
27
#include "vp9/encoder/vp9_encoder.h"
28
#include "vp9/encoder/vp9_mcomp.h"
29
30
// #define NEW_DIAMOND_SEARCH
31
32
23.3M
void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) {
33
23.3M
  int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0);
34
23.3M
  int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0);
35
23.3M
  int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL;
36
23.3M
  int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
37
38
23.3M
  col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1);
39
23.3M
  row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1);
40
23.3M
  col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1);
41
23.3M
  row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1);
42
43
  // Get intersection of UMV window and valid MV window to reduce # of checks
44
  // in diamond search.
45
23.3M
  if (mv_limits->col_min < col_min) mv_limits->col_min = col_min;
46
23.3M
  if (mv_limits->col_max > col_max) mv_limits->col_max = col_max;
47
23.3M
  if (mv_limits->row_min < row_min) mv_limits->row_min = row_min;
48
23.3M
  if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
49
23.3M
}
50
51
void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
52
                                    const MvLimits *umv_window_limits,
53
22.2M
                                    const MV *ref_mv) {
54
22.2M
  subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8,
55
22.2M
                                     ref_mv->col - MAX_FULL_PEL_VAL * 8);
56
22.2M
  subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8,
57
22.2M
                                     ref_mv->col + MAX_FULL_PEL_VAL * 8);
58
22.2M
  subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8,
59
22.2M
                                     ref_mv->row - MAX_FULL_PEL_VAL * 8);
60
22.2M
  subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8,
61
22.2M
                                     ref_mv->row + MAX_FULL_PEL_VAL * 8);
62
63
22.2M
  subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min);
64
22.2M
  subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max);
65
22.2M
  subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min);
66
22.2M
  subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max);
67
22.2M
}
68
69
23.0M
int vp9_init_search_range(int size) {
70
23.0M
  int sr = 0;
71
  // Minimum search size no matter what the passed in value.
72
23.0M
  size = VPXMAX(16, size);
73
74
152M
  while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
75
76
23.0M
  sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2);
77
23.0M
  return sr;
78
23.0M
}
79
80
int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
81
28.6M
                    int *mvcost[2], int weight) {
82
28.6M
  const MV diff = { mv->row - ref->row, mv->col - ref->col };
83
28.6M
  return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7);
84
28.6M
}
85
86
#define PIXEL_TRANSFORM_ERROR_SCALE 4
87
static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
88
392M
                       int *mvcost[2], int error_per_bit) {
89
392M
  if (mvcost) {
90
392M
    const MV diff = { mv->row - ref->row, mv->col - ref->col };
91
392M
    return (int)ROUND64_POWER_OF_TWO(
92
392M
        (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
93
392M
        RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT +
94
392M
            PIXEL_TRANSFORM_ERROR_SCALE);
95
392M
  }
96
0
  return 0;
97
392M
}
98
0
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
99
0
  int len;
100
0
  int ss_count = 0;
101
102
0
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
103
    // Generate offsets for 4 search sites per step.
104
0
    const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } };
105
0
    int i;
106
0
    for (i = 0; i < 4; ++i, ++ss_count) {
107
0
      cfg->ss_mv[ss_count] = ss_mvs[i];
108
0
      cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
109
0
    }
110
0
  }
111
112
0
  cfg->searches_per_step = 4;
113
0
  cfg->total_steps = ss_count / cfg->searches_per_step;
114
0
}
115
116
84.5k
void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
117
84.5k
  int len;
118
84.5k
  int ss_count = 0;
119
120
1.01M
  for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
121
    // Generate offsets for 8 search sites per step.
122
930k
    const MV ss_mvs[8] = { { -len, 0 },   { len, 0 },     { 0, -len },
123
930k
                           { 0, len },    { -len, -len }, { -len, len },
124
930k
                           { len, -len }, { len, len } };
125
930k
    int i;
126
8.37M
    for (i = 0; i < 8; ++i, ++ss_count) {
127
7.44M
      cfg->ss_mv[ss_count] = ss_mvs[i];
128
7.44M
      cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col;
129
7.44M
    }
130
930k
  }
131
132
84.5k
  cfg->searches_per_step = 8;
133
84.5k
  cfg->total_steps = ss_count / cfg->searches_per_step;
134
84.5k
}
135
136
// convert motion vector component to offset for sv[a]f calc
137
356M
static INLINE int sp(int x) { return x & 7; }
138
139
21.7M
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
140
21.7M
  return &buf[(r >> 3) * stride + (c >> 3)];
141
21.7M
}
142
143
#if CONFIG_VP9_HIGHBITDEPTH
144
/* checks if (r, c) has better score than previous best */
145
#define CHECK_BETTER(v, r, c)                                                  \
146
22.1M
  do {                                                                         \
147
22.1M
    if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
148
21.7M
      int64_t tmpmse;                                                          \
149
21.7M
      const MV cb_mv = { r, c };                                               \
150
21.7M
      const MV cb_ref_mv = { rr, rc };                                         \
151
21.7M
      if (second_pred == NULL) {                                               \
152
21.7M
        thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
153
21.7M
                           src_stride, &sse);                                  \
154
21.7M
      } else {                                                                 \
155
0
        thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
156
0
                            src_stride, &sse, second_pred);                    \
157
0
      }                                                                        \
158
21.7M
      tmpmse = thismse;                                                        \
159
21.7M
      tmpmse +=                                                                \
160
21.7M
          mv_err_cost(&cb_mv, &cb_ref_mv, mvjcost, mvcost, error_per_bit);     \
161
21.7M
      if (tmpmse >= INT_MAX) {                                                 \
162
0
        v = INT_MAX;                                                           \
163
21.7M
      } else if ((v = (uint32_t)tmpmse) < besterr) {                           \
164
1.28M
        besterr = v;                                                           \
165
1.28M
        br = r;                                                                \
166
1.28M
        bc = c;                                                                \
167
1.28M
        *distortion = thismse;                                                 \
168
1.28M
        *sse1 = sse;                                                           \
169
1.28M
      }                                                                        \
170
21.7M
    } else {                                                                   \
171
394k
      v = INT_MAX;                                                             \
172
394k
    }                                                                          \
173
22.1M
  } while (0)
174
#else
175
/* checks if (r, c) has better score than previous best */
176
#define CHECK_BETTER(v, r, c)                                                  \
177
  do {                                                                         \
178
    if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                    \
179
      const MV cb_mv = { r, c };                                               \
180
      const MV cb_ref_mv = { rr, rc };                                         \
181
      if (second_pred == NULL)                                                 \
182
        thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z,  \
183
                           src_stride, &sse);                                  \
184
      else                                                                     \
185
        thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
186
                            src_stride, &sse, second_pred);                    \
187
      if ((v = mv_err_cost(&cb_mv, &cb_ref_mv, mvjcost, mvcost,                \
188
                           error_per_bit) +                                    \
189
               thismse) < besterr) {                                           \
190
        besterr = v;                                                           \
191
        br = r;                                                                \
192
        bc = c;                                                                \
193
        *distortion = thismse;                                                 \
194
        *sse1 = sse;                                                           \
195
      }                                                                        \
196
    } else {                                                                   \
197
      v = INT_MAX;                                                             \
198
    }                                                                          \
199
  } while (0)
200
201
#endif
202
#define FIRST_LEVEL_CHECKS                                       \
203
0
  do {                                                           \
204
0
    unsigned int left, right, up, down, diag;                    \
205
0
    CHECK_BETTER(left, tr, tc - hstep);                          \
206
0
    CHECK_BETTER(right, tr, tc + hstep);                         \
207
0
    CHECK_BETTER(up, tr - hstep, tc);                            \
208
0
    CHECK_BETTER(down, tr + hstep, tc);                          \
209
0
    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);     \
210
0
    switch (whichdir) {                                          \
211
0
      case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \
212
0
      case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \
213
0
      case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \
214
0
      case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \
215
0
    }                                                            \
216
0
  } while (0)
217
218
#define SECOND_LEVEL_CHECKS                                       \
219
0
  do {                                                            \
220
0
    int kr, kc;                                                   \
221
0
    unsigned int second;                                          \
222
0
    if (tr != br && tc != bc) {                                   \
223
0
      kr = br - tr;                                               \
224
0
      kc = bc - tc;                                               \
225
0
      CHECK_BETTER(second, tr + kr, tc + 2 * kc);                 \
226
0
      CHECK_BETTER(second, tr + 2 * kr, tc + kc);                 \
227
0
    } else if (tr == br && tc != bc) {                            \
228
0
      kc = bc - tc;                                               \
229
0
      CHECK_BETTER(second, tr + hstep, tc + 2 * kc);              \
230
0
      CHECK_BETTER(second, tr - hstep, tc + 2 * kc);              \
231
0
      switch (whichdir) {                                         \
232
0
        case 0:                                                   \
233
0
        case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \
234
0
        case 2:                                                   \
235
0
        case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \
236
0
      }                                                           \
237
0
    } else if (tr != br && tc == bc) {                            \
238
0
      kr = br - tr;                                               \
239
0
      CHECK_BETTER(second, tr + 2 * kr, tc + hstep);              \
240
0
      CHECK_BETTER(second, tr + 2 * kr, tc - hstep);              \
241
0
      switch (whichdir) {                                         \
242
0
        case 0:                                                   \
243
0
        case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \
244
0
        case 1:                                                   \
245
0
        case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \
246
0
      }                                                           \
247
0
    }                                                             \
248
0
  } while (0)
249
250
#define SETUP_SUBPEL_SEARCH                                                 \
251
0
  const uint8_t *const z = x->plane[0].src.buf;                             \
252
0
  const int src_stride = x->plane[0].src.stride;                            \
253
0
  const MACROBLOCKD *xd = &x->e_mbd;                                        \
254
0
  unsigned int besterr = UINT_MAX;                                          \
255
0
  unsigned int sse;                                                         \
256
0
  unsigned int whichdir;                                                    \
257
0
  int thismse;                                                              \
258
0
  const unsigned int halfiters = iters_per_step;                            \
259
0
  const unsigned int quarteriters = iters_per_step;                         \
260
0
  const unsigned int eighthiters = iters_per_step;                          \
261
0
  const int y_stride = xd->plane[0].pre[0].stride;                          \
262
0
  const int offset = bestmv->row * y_stride + bestmv->col;                  \
263
0
  const uint8_t *const y = xd->plane[0].pre[0].buf;                         \
264
0
                                                                            \
265
0
  int rr = ref_mv->row;                                                     \
266
0
  int rc = ref_mv->col;                                                     \
267
0
  int br = bestmv->row * 8;                                                 \
268
0
  int bc = bestmv->col * 8;                                                 \
269
0
  int hstep = 4;                                                            \
270
0
  int minc, maxc, minr, maxr;                                               \
271
0
  int tr = br;                                                              \
272
0
  int tc = bc;                                                              \
273
0
  MvLimits subpel_mv_limits;                                                \
274
0
                                                                            \
275
0
  vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \
276
0
  minc = subpel_mv_limits.col_min;                                          \
277
0
  maxc = subpel_mv_limits.col_max;                                          \
278
0
  minr = subpel_mv_limits.row_min;                                          \
279
0
  maxr = subpel_mv_limits.row_max;                                          \
280
0
                                                                            \
281
0
  bestmv->row *= 8;                                                         \
282
0
  bestmv->col *= 8
283
284
static unsigned int setup_center_error(
285
    const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv,
286
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp,
287
    const uint8_t *const src, const int src_stride, const uint8_t *const y,
288
    int y_stride, const uint8_t *second_pred, int w, int h, int offset,
289
22.2M
    int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) {
290
22.2M
#if CONFIG_VP9_HIGHBITDEPTH
291
22.2M
  uint64_t besterr;
292
22.2M
  if (second_pred != NULL) {
293
0
    if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
294
0
      DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
295
0
      vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
296
0
                               h, CONVERT_TO_SHORTPTR(y + offset), y_stride);
297
0
      besterr =
298
0
          vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
299
0
    } else {
300
0
      DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]);
301
0
      vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
302
0
      besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
303
0
    }
304
22.2M
  } else {
305
22.2M
    besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
306
22.2M
  }
307
22.2M
  *distortion = (uint32_t)besterr;
308
22.2M
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
309
22.2M
  if (besterr >= UINT_MAX) return UINT_MAX;
310
22.2M
  return (uint32_t)besterr;
311
#else
312
  uint32_t besterr;
313
  (void)xd;
314
  if (second_pred != NULL) {
315
    DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]);
316
    vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
317
    besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
318
  } else {
319
    besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
320
  }
321
  *distortion = besterr;
322
  besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
323
  return besterr;
324
#endif  // CONFIG_VP9_HIGHBITDEPTH
325
22.2M
}
326
327
0
static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) {
328
0
  return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d);
329
0
}
330
331
0
static INLINE int is_cost_list_wellbehaved(int *cost_list) {
332
0
  return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] &&
333
0
         cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4];
334
0
}
335
336
// Returns surface minima estimate at given precision in 1/2^n bits.
337
// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C
338
// For a given set of costs S0, S1, S2, S3, S4 at points
339
// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively,
340
// the solution for the location of the minima (x0, y0) is given by:
341
// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0),
342
// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0).
343
// The code below is an integerized version of that.
344
0
static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
345
0
  const int64_t x0 = (int64_t)cost_list[1] - cost_list[3];
346
0
  const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3];
347
0
  const int64_t x1 = (int64_t)cost_list[4] - cost_list[2];
348
0
  const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2];
349
0
  const int b = 1 << (bits - 1);
350
0
  *ic = (int)divide_and_round(x0 * b, y0);
351
0
  *ir = (int)divide_and_round(x1 * b, y1);
352
0
}
353
354
uint32_t vp9_skip_sub_pixel_tree(
355
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
356
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
357
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
358
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
359
0
    int h, int use_accurate_subpel_search) {
360
0
  SETUP_SUBPEL_SEARCH;
361
0
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
362
0
                               src_stride, y, y_stride, second_pred, w, h,
363
0
                               offset, mvjcost, mvcost, sse1, distortion);
364
0
  (void)halfiters;
365
0
  (void)quarteriters;
366
0
  (void)eighthiters;
367
0
  (void)whichdir;
368
0
  (void)allow_hp;
369
0
  (void)forced_stop;
370
0
  (void)hstep;
371
0
  (void)rr;
372
0
  (void)rc;
373
0
  (void)minr;
374
0
  (void)minc;
375
0
  (void)maxr;
376
0
  (void)maxc;
377
0
  (void)tr;
378
0
  (void)tc;
379
0
  (void)sse;
380
0
  (void)thismse;
381
0
  (void)cost_list;
382
0
  (void)use_accurate_subpel_search;
383
384
0
  return besterr;
385
0
}
386
387
uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
388
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
389
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
390
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
391
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
392
0
    int h, int use_accurate_subpel_search) {
393
0
  SETUP_SUBPEL_SEARCH;
394
0
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
395
0
                               src_stride, y, y_stride, second_pred, w, h,
396
0
                               offset, mvjcost, mvcost, sse1, distortion);
397
0
  (void)halfiters;
398
0
  (void)quarteriters;
399
0
  (void)eighthiters;
400
0
  (void)whichdir;
401
0
  (void)allow_hp;
402
0
  (void)forced_stop;
403
0
  (void)hstep;
404
0
  (void)use_accurate_subpel_search;
405
406
0
  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
407
0
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
408
0
      cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
409
0
    int ir, ic;
410
0
    unsigned int minpt = INT_MAX;
411
0
    get_cost_surf_min(cost_list, &ir, &ic, 2);
412
0
    if (ir != 0 || ic != 0) {
413
0
      CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
414
0
    }
415
0
  } else {
416
0
    FIRST_LEVEL_CHECKS;
417
0
    if (halfiters > 1) {
418
0
      SECOND_LEVEL_CHECKS;
419
0
    }
420
421
0
    tr = br;
422
0
    tc = bc;
423
424
    // Each subsequent iteration checks at least one point in common with
425
    // the last iteration could be 2 ( if diag selected) 1/4 pel
426
    // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
427
0
    if (forced_stop != 2) {
428
0
      hstep >>= 1;
429
0
      FIRST_LEVEL_CHECKS;
430
0
      if (quarteriters > 1) {
431
0
        SECOND_LEVEL_CHECKS;
432
0
      }
433
0
    }
434
0
  }
435
436
0
  tr = br;
437
0
  tc = bc;
438
439
0
  if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
440
0
    hstep >>= 1;
441
0
    FIRST_LEVEL_CHECKS;
442
0
    if (eighthiters > 1) {
443
0
      SECOND_LEVEL_CHECKS;
444
0
    }
445
0
  }
446
447
0
  bestmv->row = br;
448
0
  bestmv->col = bc;
449
450
0
  return besterr;
451
0
}
452
453
uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
454
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
455
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
456
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
457
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
458
0
    int h, int use_accurate_subpel_search) {
459
0
  SETUP_SUBPEL_SEARCH;
460
0
  (void)use_accurate_subpel_search;
461
462
0
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
463
0
                               src_stride, y, y_stride, second_pred, w, h,
464
0
                               offset, mvjcost, mvcost, sse1, distortion);
465
0
  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
466
0
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
467
0
      cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) {
468
0
    unsigned int minpt;
469
0
    int ir, ic;
470
0
    get_cost_surf_min(cost_list, &ir, &ic, 1);
471
0
    if (ir != 0 || ic != 0) {
472
0
      CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
473
0
    }
474
0
  } else {
475
0
    FIRST_LEVEL_CHECKS;
476
0
    if (halfiters > 1) {
477
0
      SECOND_LEVEL_CHECKS;
478
0
    }
479
0
  }
480
481
  // Each subsequent iteration checks at least one point in common with
482
  // the last iteration could be 2 ( if diag selected) 1/4 pel
483
484
  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
485
0
  if (forced_stop != 2) {
486
0
    tr = br;
487
0
    tc = bc;
488
0
    hstep >>= 1;
489
0
    FIRST_LEVEL_CHECKS;
490
0
    if (quarteriters > 1) {
491
0
      SECOND_LEVEL_CHECKS;
492
0
    }
493
0
  }
494
495
0
  if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
496
0
    tr = br;
497
0
    tc = bc;
498
0
    hstep >>= 1;
499
0
    FIRST_LEVEL_CHECKS;
500
0
    if (eighthiters > 1) {
501
0
      SECOND_LEVEL_CHECKS;
502
0
    }
503
0
  }
504
  // These lines insure static analysis doesn't warn that
505
  // tr and tc aren't used after the above point.
506
0
  (void)tr;
507
0
  (void)tc;
508
509
0
  bestmv->row = br;
510
0
  bestmv->col = bc;
511
512
0
  return besterr;
513
0
}
514
515
uint32_t vp9_find_best_sub_pixel_tree_pruned(
516
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
517
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
518
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
519
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
520
0
    int h, int use_accurate_subpel_search) {
521
0
  SETUP_SUBPEL_SEARCH;
522
0
  (void)use_accurate_subpel_search;
523
524
0
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
525
0
                               src_stride, y, y_stride, second_pred, w, h,
526
0
                               offset, mvjcost, mvcost, sse1, distortion);
527
0
  if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
528
0
      cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
529
0
      cost_list[4] != INT_MAX) {
530
0
    unsigned int left, right, up, down, diag;
531
0
    whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) +
532
0
               (cost_list[2] < cost_list[4] ? 0 : 2);
533
0
    switch (whichdir) {
534
0
      case 0:
535
0
        CHECK_BETTER(left, tr, tc - hstep);
536
0
        CHECK_BETTER(down, tr + hstep, tc);
537
0
        CHECK_BETTER(diag, tr + hstep, tc - hstep);
538
0
        break;
539
0
      case 1:
540
0
        CHECK_BETTER(right, tr, tc + hstep);
541
0
        CHECK_BETTER(down, tr + hstep, tc);
542
0
        CHECK_BETTER(diag, tr + hstep, tc + hstep);
543
0
        break;
544
0
      case 2:
545
0
        CHECK_BETTER(left, tr, tc - hstep);
546
0
        CHECK_BETTER(up, tr - hstep, tc);
547
0
        CHECK_BETTER(diag, tr - hstep, tc - hstep);
548
0
        break;
549
0
      case 3:
550
0
        CHECK_BETTER(right, tr, tc + hstep);
551
0
        CHECK_BETTER(up, tr - hstep, tc);
552
0
        CHECK_BETTER(diag, tr - hstep, tc + hstep);
553
0
        break;
554
0
    }
555
0
  } else {
556
0
    FIRST_LEVEL_CHECKS;
557
0
    if (halfiters > 1) {
558
0
      SECOND_LEVEL_CHECKS;
559
0
    }
560
0
  }
561
562
0
  tr = br;
563
0
  tc = bc;
564
565
  // Each subsequent iteration checks at least one point in common with
566
  // the last iteration could be 2 ( if diag selected) 1/4 pel
567
568
  // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only
569
0
  if (forced_stop != 2) {
570
0
    hstep >>= 1;
571
0
    FIRST_LEVEL_CHECKS;
572
0
    if (quarteriters > 1) {
573
0
      SECOND_LEVEL_CHECKS;
574
0
    }
575
0
    tr = br;
576
0
    tc = bc;
577
0
  }
578
579
0
  if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
580
0
    hstep >>= 1;
581
0
    FIRST_LEVEL_CHECKS;
582
0
    if (eighthiters > 1) {
583
0
      SECOND_LEVEL_CHECKS;
584
0
    }
585
0
    tr = br;
586
0
    tc = bc;
587
0
  }
588
  // These lines insure static analysis doesn't warn that
589
  // tr and tc aren't used after the above point.
590
0
  (void)tr;
591
0
  (void)tc;
592
593
0
  bestmv->row = br;
594
0
  bestmv->col = bc;
595
596
0
  return besterr;
597
0
}
598
599
/* clang-format off */
600
static const MV search_step_table[12] = {
601
  // left, right, up, down
602
  { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 },
603
  { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 },
604
  { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 }
605
};
606
/* clang-format on */
607
608
static int accurate_sub_pel_search(
609
    const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf,
610
    const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp,
611
    const uint8_t *const src_address, const int src_stride,
612
    const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred,
613
112M
    int w, int h, uint32_t *sse) {
614
112M
#if CONFIG_VP9_HIGHBITDEPTH
615
112M
  uint64_t besterr;
616
112M
  assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
617
112M
  assert(w != 0 && h != 0);
618
112M
  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
619
0
    DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
620
0
    vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride,
621
0
                                     pred16, w, this_mv, sf, w, h, 0, kernel,
622
0
                                     MV_PRECISION_Q3, 0, 0, xd->bd);
623
0
    if (second_pred != NULL) {
624
0
      DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
625
0
      vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
626
0
                               h, pred16, w);
627
0
      besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address,
628
0
                        src_stride, sse);
629
0
    } else {
630
0
      besterr =
631
0
          vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse);
632
0
    }
633
112M
  } else {
634
112M
    DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
635
112M
    vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
636
112M
                              0, kernel, MV_PRECISION_Q3, 0, 0);
637
112M
    if (second_pred != NULL) {
638
0
      DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]);
639
0
      vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
640
0
      besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
641
112M
    } else {
642
112M
      besterr = vfp->vf(pred, w, src_address, src_stride, sse);
643
112M
    }
644
112M
  }
645
112M
  if (besterr >= UINT_MAX) return UINT_MAX;
646
112M
  return (int)besterr;
647
#else
648
  int besterr;
649
  DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
650
  assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
651
  assert(w != 0 && h != 0);
652
  (void)xd;
653
654
  vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
655
                            0, kernel, MV_PRECISION_Q3, 0, 0);
656
  if (second_pred != NULL) {
657
    DECLARE_ALIGNED(32, uint8_t, comp_pred[64 * 64]);
658
    vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
659
    besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
660
  } else {
661
    besterr = vfp->vf(pred, w, src_address, src_stride, sse);
662
  }
663
  return besterr;
664
#endif  // CONFIG_VP9_HIGHBITDEPTH
665
112M
}
666
667
// TODO(yunqing): this part can be further refactored.
668
#if CONFIG_VP9_HIGHBITDEPTH
669
/* checks if (r, c) has better score than previous best */
670
#define CHECK_BETTER1(v, r, c)                                                \
671
7.42M
  do {                                                                        \
672
7.42M
    if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
673
7.18M
      int64_t tmpmse;                                                         \
674
7.18M
      const MV cb_mv = { r, c };                                              \
675
7.18M
      const MV cb_ref_mv = { rr, rc };                                        \
676
7.18M
      thismse = accurate_sub_pel_search(xd, &cb_mv, x->me_sf, kernel, vfp, z, \
677
7.18M
                                        src_stride, y, y_stride, second_pred, \
678
7.18M
                                        w, h, &sse);                          \
679
7.18M
      tmpmse = thismse;                                                       \
680
7.18M
      tmpmse +=                                                               \
681
7.18M
          mv_err_cost(&cb_mv, &cb_ref_mv, mvjcost, mvcost, error_per_bit);    \
682
7.18M
      if (tmpmse >= INT_MAX) {                                                \
683
0
        v = INT_MAX;                                                          \
684
7.18M
      } else if ((v = (uint32_t)tmpmse) < besterr) {                          \
685
351k
        besterr = v;                                                          \
686
351k
        br = r;                                                               \
687
351k
        bc = c;                                                               \
688
351k
        *distortion = thismse;                                                \
689
351k
        *sse1 = sse;                                                          \
690
351k
      }                                                                       \
691
7.18M
    } else {                                                                  \
692
239k
      v = INT_MAX;                                                            \
693
239k
    }                                                                         \
694
7.42M
  } while (0)
695
#else
696
/* checks if (r, c) has better score than previous best */
697
#define CHECK_BETTER1(v, r, c)                                                \
698
  do {                                                                        \
699
    if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                   \
700
      const MV cb_mv = { r, c };                                              \
701
      const MV cb_ref_mv = { rr, rc };                                        \
702
      thismse = accurate_sub_pel_search(xd, &cb_mv, x->me_sf, kernel, vfp, z, \
703
                                        src_stride, y, y_stride, second_pred, \
704
                                        w, h, &sse);                          \
705
      if ((v = mv_err_cost(&cb_mv, &cb_ref_mv, mvjcost, mvcost,               \
706
                           error_per_bit) +                                   \
707
               thismse) < besterr) {                                          \
708
        besterr = v;                                                          \
709
        br = r;                                                               \
710
        bc = c;                                                               \
711
        *distortion = thismse;                                                \
712
        *sse1 = sse;                                                          \
713
      }                                                                       \
714
    } else {                                                                  \
715
      v = INT_MAX;                                                            \
716
    }                                                                         \
717
  } while (0)
718
719
#endif
720
721
uint32_t vp9_find_best_sub_pixel_tree(
722
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
723
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
724
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
725
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
726
22.2M
    int h, int use_accurate_subpel_search) {
727
22.2M
  const uint8_t *const z = x->plane[0].src.buf;
728
22.2M
  const uint8_t *const src_address = z;
729
22.2M
  const int src_stride = x->plane[0].src.stride;
730
22.2M
  const MACROBLOCKD *xd = &x->e_mbd;
731
22.2M
  unsigned int besterr = UINT_MAX;
732
22.2M
  unsigned int sse;
733
22.2M
  int thismse;
734
22.2M
  const int y_stride = xd->plane[0].pre[0].stride;
735
22.2M
  const int offset = bestmv->row * y_stride + bestmv->col;
736
22.2M
  const uint8_t *const y = xd->plane[0].pre[0].buf;
737
738
22.2M
  int rr = ref_mv->row;
739
22.2M
  int rc = ref_mv->col;
740
22.2M
  int br = bestmv->row * 8;
741
22.2M
  int bc = bestmv->col * 8;
742
22.2M
  int hstep = 4;
743
22.2M
  int iter, round = 3 - forced_stop;
744
745
22.2M
  int minc, maxc, minr, maxr;
746
22.2M
  int tr = br;
747
22.2M
  int tc = bc;
748
22.2M
  const MV *search_step = search_step_table;
749
22.2M
  int idx, best_idx = -1;
750
22.2M
  unsigned int cost_array[5];
751
22.2M
  int kr, kc;
752
22.2M
  MvLimits subpel_mv_limits;
753
754
  // TODO(yunqing): need to add 4-tap filter optimization to speed up the
755
  // encoder.
756
22.2M
  const InterpKernel *kernel =
757
22.2M
      (use_accurate_subpel_search > 0)
758
22.2M
          ? ((use_accurate_subpel_search == USE_4_TAPS)
759
8.81M
                 ? vp9_filter_kernels[FOURTAP]
760
8.81M
                 : ((use_accurate_subpel_search == USE_8_TAPS)
761
0
                        ? vp9_filter_kernels[EIGHTTAP]
762
0
                        : vp9_filter_kernels[EIGHTTAP_SHARP]))
763
22.2M
          : vp9_filter_kernels[BILINEAR];
764
765
22.2M
  vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
766
22.2M
  minc = subpel_mv_limits.col_min;
767
22.2M
  maxc = subpel_mv_limits.col_max;
768
22.2M
  minr = subpel_mv_limits.row_min;
769
22.2M
  maxr = subpel_mv_limits.row_max;
770
771
22.2M
  if (!(allow_hp && use_mv_hp(ref_mv)))
772
13.7M
    if (round == 3) round = 2;
773
774
22.2M
  bestmv->row *= 8;
775
22.2M
  bestmv->col *= 8;
776
777
22.2M
  besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
778
22.2M
                               src_stride, y, y_stride, second_pred, w, h,
779
22.2M
                               offset, mvjcost, mvcost, sse1, distortion);
780
781
22.2M
  (void)cost_list;  // to silence compiler warning
782
783
75.2M
  for (iter = 0; iter < round; ++iter) {
784
    // Check vertical and horizontal sub-pixel positions.
785
265M
    for (idx = 0; idx < 4; ++idx) {
786
212M
      tr = br + search_step[idx].row;
787
212M
      tc = bc + search_step[idx].col;
788
212M
      if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
789
208M
        MV this_mv;
790
208M
        this_mv.row = tr;
791
208M
        this_mv.col = tc;
792
793
208M
        if (use_accurate_subpel_search) {
794
83.7M
          thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
795
83.7M
                                            src_address, src_stride, y,
796
83.7M
                                            y_stride, second_pred, w, h, &sse);
797
124M
        } else {
798
124M
          const uint8_t *const pre_address =
799
124M
              y + (tr >> 3) * y_stride + (tc >> 3);
800
124M
          if (second_pred == NULL)
801
124M
            thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
802
124M
                               src_address, src_stride, &sse);
803
0
          else
804
0
            thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
805
0
                                src_address, src_stride, &sse, second_pred);
806
124M
        }
807
808
208M
        cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
809
208M
                                                mvcost, error_per_bit);
810
811
208M
        if (cost_array[idx] < besterr) {
812
24.9M
          best_idx = idx;
813
24.9M
          besterr = cost_array[idx];
814
24.9M
          *distortion = thismse;
815
24.9M
          *sse1 = sse;
816
24.9M
        }
817
208M
      } else {
818
3.34M
        cost_array[idx] = UINT_MAX;
819
3.34M
      }
820
212M
    }
821
822
    // Check diagonal sub-pixel position
823
53.0M
    kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
824
53.0M
    kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
825
826
53.0M
    tc = bc + kc;
827
53.0M
    tr = br + kr;
828
53.0M
    if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
829
53.0M
      MV this_mv = { tr, tc };
830
53.0M
      if (use_accurate_subpel_search) {
831
21.2M
        thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
832
21.2M
                                          src_address, src_stride, y, y_stride,
833
21.2M
                                          second_pred, w, h, &sse);
834
31.7M
      } else {
835
31.7M
        const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
836
31.7M
        if (second_pred == NULL)
837
31.7M
          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
838
31.7M
                             src_stride, &sse);
839
0
        else
840
0
          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
841
0
                              src_address, src_stride, &sse, second_pred);
842
31.7M
      }
843
844
53.0M
      cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
845
53.0M
                                            error_per_bit);
846
847
53.0M
      if (cost_array[4] < besterr) {
848
4.09M
        best_idx = 4;
849
4.09M
        besterr = cost_array[4];
850
4.09M
        *distortion = thismse;
851
4.09M
        *sse1 = sse;
852
4.09M
      }
853
53.0M
    } else {
854
0
      cost_array[idx] = UINT_MAX;
855
0
    }
856
857
53.0M
    if (best_idx < 4 && best_idx >= 0) {
858
15.9M
      br += search_step[best_idx].row;
859
15.9M
      bc += search_step[best_idx].col;
860
37.0M
    } else if (best_idx == 4) {
861
4.09M
      br = tr;
862
4.09M
      bc = tc;
863
4.09M
    }
864
865
53.0M
    if (iters_per_step > 0 && best_idx != -1) {
866
20.0M
      unsigned int second;
867
20.0M
      const int br0 = br;
868
20.0M
      const int bc0 = bc;
869
20.0M
      assert(tr == br || tc == bc);
870
871
20.0M
      if (tr == br && tc != bc) {
872
7.82M
        kc = bc - tc;
873
7.82M
        if (iters_per_step == 1) {
874
3.68M
          if (use_accurate_subpel_search) {
875
3.68M
            CHECK_BETTER1(second, br0, bc0 + kc);
876
3.68M
          } else {
877
0
            CHECK_BETTER(second, br0, bc0 + kc);
878
0
          }
879
3.68M
        }
880
12.2M
      } else if (tr != br && tc == bc) {
881
8.16M
        kr = br - tr;
882
8.16M
        if (iters_per_step == 1) {
883
3.73M
          if (use_accurate_subpel_search) {
884
3.73M
            CHECK_BETTER1(second, br0 + kr, bc0);
885
3.73M
          } else {
886
0
            CHECK_BETTER(second, br0 + kr, bc0);
887
0
          }
888
3.73M
        }
889
8.16M
      }
890
891
20.0M
      if (iters_per_step > 1) {
892
10.4M
        if (use_accurate_subpel_search) {
893
0
          CHECK_BETTER1(second, br0 + kr, bc0);
894
0
          CHECK_BETTER1(second, br0, bc0 + kc);
895
0
          if (br0 != br || bc0 != bc) {
896
0
            CHECK_BETTER1(second, br0 + kr, bc0 + kc);
897
0
          }
898
10.4M
        } else {
899
10.4M
          CHECK_BETTER(second, br0 + kr, bc0);
900
10.4M
          CHECK_BETTER(second, br0, bc0 + kc);
901
10.4M
          if (br0 != br || bc0 != bc) {
902
1.20M
            CHECK_BETTER(second, br0 + kr, bc0 + kc);
903
1.20M
          }
904
10.4M
        }
905
10.4M
      }
906
20.0M
    }
907
908
53.0M
    search_step += 4;
909
53.0M
    hstep >>= 1;
910
53.0M
    best_idx = -1;
911
53.0M
  }
912
913
  // Each subsequent iteration checks at least one point in common with
914
  // the last iteration could be 2 ( if diag selected) 1/4 pel
915
916
  // These lines insure static analysis doesn't warn that
917
  // tr and tc aren't used after the above point.
918
22.2M
  (void)tr;
919
22.2M
  (void)tc;
920
921
22.2M
  bestmv->row = br;
922
22.2M
  bestmv->col = bc;
923
924
22.2M
  return besterr;
925
22.2M
}
926
927
#undef CHECK_BETTER
928
#undef CHECK_BETTER1
929
930
static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
931
0
                               int range) {
932
0
  return ((row - range) >= mv_limits->row_min) &
933
0
         ((row + range) <= mv_limits->row_max) &
934
0
         ((col - range) >= mv_limits->col_min) &
935
0
         ((col + range) <= mv_limits->col_max);
936
0
}
937
938
288M
static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) {
939
288M
  return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) &&
940
288M
         (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max);
941
288M
}
942
943
#define CHECK_BETTER                                                      \
944
0
  {                                                                       \
945
0
    if (thissad < bestsad) {                                              \
946
0
      if (use_mvcost)                                                     \
947
0
        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \
948
0
      if (thissad < bestsad) {                                            \
949
0
        bestsad = thissad;                                                \
950
0
        best_site = i;                                                    \
951
0
      }                                                                   \
952
0
    }                                                                     \
953
0
  }
954
955
#define MAX_PATTERN_SCALES 11
956
#define MAX_PATTERN_CANDIDATES 8  // max number of candidates per scale
957
0
#define PATTERN_CANDIDATES_REF 3  // number of refinement candidates
958
959
// Calculate and return a sad+mvcost list around an integer best pel.
960
static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv,
961
                                      int sadpb,
962
                                      const vp9_variance_fn_ptr_t *fn_ptr,
963
0
                                      const MV *best_mv, int *cost_list) {
964
0
  static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
965
0
  const struct buf_2d *const what = &x->plane[0].src;
966
0
  const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0];
967
0
  const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
968
0
  int br = best_mv->row;
969
0
  int bc = best_mv->col;
970
0
  const MV mv = { br, bc };
971
0
  int i;
972
0
  unsigned int sse;
973
974
0
  cost_list[0] =
975
0
      fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
976
0
                 in_what->stride, &sse) +
977
0
      mvsad_err_cost(x, &mv, &fcenter_mv, sadpb);
978
0
  if (check_bounds(&x->mv_limits, br, bc, 1)) {
979
0
    for (i = 0; i < 4; i++) {
980
0
      const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
981
0
      cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
982
0
                                    get_buf_from_mv(in_what, &this_mv),
983
0
                                    in_what->stride, &sse) +
984
0
                         mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
985
0
                                     x->mvcost, x->errorperbit);
986
0
    }
987
0
  } else {
988
0
    for (i = 0; i < 4; i++) {
989
0
      const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
990
0
      if (!is_mv_in(&x->mv_limits, &this_mv))
991
0
        cost_list[i + 1] = INT_MAX;
992
0
      else
993
0
        cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride,
994
0
                                      get_buf_from_mv(in_what, &this_mv),
995
0
                                      in_what->stride, &sse) +
996
0
                           mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost,
997
0
                                       x->mvcost, x->errorperbit);
998
0
    }
999
0
  }
1000
0
}
1001
1002
// Generic pattern search function that searches over multiple scales.
1003
// Each scale can have a different number of candidates and shape of
1004
// candidates as indicated in the num_candidates and candidates arrays
1005
// passed into this function
1006
//
1007
static int vp9_pattern_search(
1008
    const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
1009
    int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
1010
    int use_mvcost, const MV *center_mv, MV *best_mv,
1011
    const int num_candidates[MAX_PATTERN_SCALES],
1012
0
    const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
1013
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1014
0
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1015
0
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1016
0
  };
1017
0
  int i, s, t;
1018
0
  const struct buf_2d *const what = &x->plane[0].src;
1019
0
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1020
0
  int br, bc;
1021
0
  int bestsad = INT_MAX;
1022
0
  int thissad;
1023
0
  int k = -1;
1024
0
  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1025
0
  int best_init_s = search_param_to_steps[search_param];
1026
  // adjust ref_mv to make sure it is within MV range
1027
0
  clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1028
0
           x->mv_limits.row_min, x->mv_limits.row_max);
1029
0
  br = ref_mv->row;
1030
0
  bc = ref_mv->col;
1031
1032
  // Work out the start point for the search
1033
0
  bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
1034
0
                     in_what->stride) +
1035
0
            mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1036
1037
  // Search all possible scales up to the search param around the center point
1038
  // pick the scale of the point that is best as the starting scale of
1039
  // further steps around it.
1040
0
  if (do_init_search) {
1041
0
    s = best_init_s;
1042
0
    best_init_s = -1;
1043
0
    for (t = 0; t <= s; ++t) {
1044
0
      int best_site = -1;
1045
0
      if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
1046
0
        for (i = 0; i < num_candidates[t]; i++) {
1047
0
          const MV this_mv = { br + candidates[t][i].row,
1048
0
                               bc + candidates[t][i].col };
1049
0
          thissad =
1050
0
              vfp->sdf(what->buf, what->stride,
1051
0
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
1052
0
          CHECK_BETTER
1053
0
        }
1054
0
      } else {
1055
0
        for (i = 0; i < num_candidates[t]; i++) {
1056
0
          const MV this_mv = { br + candidates[t][i].row,
1057
0
                               bc + candidates[t][i].col };
1058
0
          if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1059
0
          thissad =
1060
0
              vfp->sdf(what->buf, what->stride,
1061
0
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
1062
0
          CHECK_BETTER
1063
0
        }
1064
0
      }
1065
0
      if (best_site == -1) {
1066
0
        continue;
1067
0
      } else {
1068
0
        best_init_s = t;
1069
0
        k = best_site;
1070
0
      }
1071
0
    }
1072
0
    if (best_init_s != -1) {
1073
0
      br += candidates[best_init_s][k].row;
1074
0
      bc += candidates[best_init_s][k].col;
1075
0
    }
1076
0
  }
1077
1078
  // If the center point is still the best, just skip this and move to
1079
  // the refinement step.
1080
0
  if (best_init_s != -1) {
1081
0
    int best_site = -1;
1082
0
    s = best_init_s;
1083
1084
0
    do {
1085
      // No need to search all 6 points the 1st time if initial search was used
1086
0
      if (!do_init_search || s != best_init_s) {
1087
0
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1088
0
          for (i = 0; i < num_candidates[s]; i++) {
1089
0
            const MV this_mv = { br + candidates[s][i].row,
1090
0
                                 bc + candidates[s][i].col };
1091
0
            thissad =
1092
0
                vfp->sdf(what->buf, what->stride,
1093
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1094
0
            CHECK_BETTER
1095
0
          }
1096
0
        } else {
1097
0
          for (i = 0; i < num_candidates[s]; i++) {
1098
0
            const MV this_mv = { br + candidates[s][i].row,
1099
0
                                 bc + candidates[s][i].col };
1100
0
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1101
0
            thissad =
1102
0
                vfp->sdf(what->buf, what->stride,
1103
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1104
0
            CHECK_BETTER
1105
0
          }
1106
0
        }
1107
1108
0
        if (best_site == -1) {
1109
0
          continue;
1110
0
        } else {
1111
0
          br += candidates[s][best_site].row;
1112
0
          bc += candidates[s][best_site].col;
1113
0
          k = best_site;
1114
0
        }
1115
0
      }
1116
1117
0
      do {
1118
0
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1119
0
        best_site = -1;
1120
0
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1121
0
        next_chkpts_indices[1] = k;
1122
0
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1123
1124
0
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1125
0
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1126
0
            const MV this_mv = {
1127
0
              br + candidates[s][next_chkpts_indices[i]].row,
1128
0
              bc + candidates[s][next_chkpts_indices[i]].col
1129
0
            };
1130
0
            thissad =
1131
0
                vfp->sdf(what->buf, what->stride,
1132
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1133
0
            CHECK_BETTER
1134
0
          }
1135
0
        } else {
1136
0
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1137
0
            const MV this_mv = {
1138
0
              br + candidates[s][next_chkpts_indices[i]].row,
1139
0
              bc + candidates[s][next_chkpts_indices[i]].col
1140
0
            };
1141
0
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1142
0
            thissad =
1143
0
                vfp->sdf(what->buf, what->stride,
1144
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1145
0
            CHECK_BETTER
1146
0
          }
1147
0
        }
1148
1149
0
        if (best_site != -1) {
1150
0
          k = next_chkpts_indices[best_site];
1151
0
          br += candidates[s][k].row;
1152
0
          bc += candidates[s][k].col;
1153
0
        }
1154
0
      } while (best_site != -1);
1155
0
    } while (s--);
1156
0
  }
1157
1158
0
  best_mv->row = br;
1159
0
  best_mv->col = bc;
1160
1161
  // Returns the one-away integer pel sad values around the best as follows:
1162
  // cost_list[0]: cost at the best integer pel
1163
  // cost_list[1]: cost at delta {0, -1} (left)   from the best integer pel
1164
  // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel
1165
  // cost_list[3]: cost at delta { 0, 1} (right)  from the best integer pel
1166
  // cost_list[4]: cost at delta {-1, 0} (top)    from the best integer pel
1167
0
  if (cost_list) {
1168
0
    calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, best_mv, cost_list);
1169
0
  }
1170
0
  return bestsad;
1171
0
}
1172
1173
// A specialized function where the smallest scale search candidates
1174
// are 4 1-away neighbors, and cost_list is non-null
1175
// TODO(debargha): Merge this function with the one above. Also remove
1176
// use_mvcost option since it is always 1, to save unnecessary branches.
1177
static int vp9_pattern_search_sad(
1178
    const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit,
1179
    int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp,
1180
    int use_mvcost, const MV *center_mv, MV *best_mv,
1181
    const int num_candidates[MAX_PATTERN_SCALES],
1182
0
    const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) {
1183
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1184
0
  static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = {
1185
0
    10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
1186
0
  };
1187
0
  int i, s, t;
1188
0
  const struct buf_2d *const what = &x->plane[0].src;
1189
0
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1190
0
  int br, bc;
1191
0
  int bestsad = INT_MAX;
1192
0
  int thissad;
1193
0
  int k = -1;
1194
0
  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
1195
0
  int best_init_s = search_param_to_steps[search_param];
1196
  // adjust ref_mv to make sure it is within MV range
1197
0
  clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1198
0
           x->mv_limits.row_min, x->mv_limits.row_max);
1199
0
  br = ref_mv->row;
1200
0
  bc = ref_mv->col;
1201
0
  if (cost_list != NULL) {
1202
0
    cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
1203
0
        INT_MAX;
1204
0
  }
1205
1206
  // Work out the start point for the search
1207
0
  bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
1208
0
                     in_what->stride) +
1209
0
            mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
1210
1211
  // Search all possible scales up to the search param around the center point
1212
  // pick the scale of the point that is best as the starting scale of
1213
  // further steps around it.
1214
0
  if (do_init_search) {
1215
0
    s = best_init_s;
1216
0
    best_init_s = -1;
1217
0
    for (t = 0; t <= s; ++t) {
1218
0
      int best_site = -1;
1219
0
      if (check_bounds(&x->mv_limits, br, bc, 1 << t)) {
1220
0
        for (i = 0; i < num_candidates[t]; i++) {
1221
0
          const MV this_mv = { br + candidates[t][i].row,
1222
0
                               bc + candidates[t][i].col };
1223
0
          thissad =
1224
0
              vfp->sdf(what->buf, what->stride,
1225
0
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
1226
0
          CHECK_BETTER
1227
0
        }
1228
0
      } else {
1229
0
        for (i = 0; i < num_candidates[t]; i++) {
1230
0
          const MV this_mv = { br + candidates[t][i].row,
1231
0
                               bc + candidates[t][i].col };
1232
0
          if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1233
0
          thissad =
1234
0
              vfp->sdf(what->buf, what->stride,
1235
0
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
1236
0
          CHECK_BETTER
1237
0
        }
1238
0
      }
1239
0
      if (best_site == -1) {
1240
0
        continue;
1241
0
      } else {
1242
0
        best_init_s = t;
1243
0
        k = best_site;
1244
0
      }
1245
0
    }
1246
0
    if (best_init_s != -1) {
1247
0
      br += candidates[best_init_s][k].row;
1248
0
      bc += candidates[best_init_s][k].col;
1249
0
    }
1250
0
  }
1251
1252
  // If the center point is still the best, just skip this and move to
1253
  // the refinement step.
1254
0
  if (best_init_s != -1) {
1255
0
    int do_sad = (num_candidates[0] == 4 && cost_list != NULL);
1256
0
    int best_site = -1;
1257
0
    s = best_init_s;
1258
1259
0
    for (; s >= do_sad; s--) {
1260
0
      if (!do_init_search || s != best_init_s) {
1261
0
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1262
0
          for (i = 0; i < num_candidates[s]; i++) {
1263
0
            const MV this_mv = { br + candidates[s][i].row,
1264
0
                                 bc + candidates[s][i].col };
1265
0
            thissad =
1266
0
                vfp->sdf(what->buf, what->stride,
1267
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1268
0
            CHECK_BETTER
1269
0
          }
1270
0
        } else {
1271
0
          for (i = 0; i < num_candidates[s]; i++) {
1272
0
            const MV this_mv = { br + candidates[s][i].row,
1273
0
                                 bc + candidates[s][i].col };
1274
0
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1275
0
            thissad =
1276
0
                vfp->sdf(what->buf, what->stride,
1277
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1278
0
            CHECK_BETTER
1279
0
          }
1280
0
        }
1281
1282
0
        if (best_site == -1) {
1283
0
          continue;
1284
0
        } else {
1285
0
          br += candidates[s][best_site].row;
1286
0
          bc += candidates[s][best_site].col;
1287
0
          k = best_site;
1288
0
        }
1289
0
      }
1290
1291
0
      do {
1292
0
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1293
0
        best_site = -1;
1294
0
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1295
0
        next_chkpts_indices[1] = k;
1296
0
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1297
1298
0
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1299
0
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1300
0
            const MV this_mv = {
1301
0
              br + candidates[s][next_chkpts_indices[i]].row,
1302
0
              bc + candidates[s][next_chkpts_indices[i]].col
1303
0
            };
1304
0
            thissad =
1305
0
                vfp->sdf(what->buf, what->stride,
1306
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1307
0
            CHECK_BETTER
1308
0
          }
1309
0
        } else {
1310
0
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1311
0
            const MV this_mv = {
1312
0
              br + candidates[s][next_chkpts_indices[i]].row,
1313
0
              bc + candidates[s][next_chkpts_indices[i]].col
1314
0
            };
1315
0
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1316
0
            thissad =
1317
0
                vfp->sdf(what->buf, what->stride,
1318
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1319
0
            CHECK_BETTER
1320
0
          }
1321
0
        }
1322
1323
0
        if (best_site != -1) {
1324
0
          k = next_chkpts_indices[best_site];
1325
0
          br += candidates[s][k].row;
1326
0
          bc += candidates[s][k].col;
1327
0
        }
1328
0
      } while (best_site != -1);
1329
0
    }
1330
1331
    // Note: If we enter the if below, then cost_list must be non-NULL.
1332
0
    if (s == 0) {
1333
0
      cost_list[0] = bestsad;
1334
0
      if (!do_init_search || s != best_init_s) {
1335
0
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1336
0
          for (i = 0; i < num_candidates[s]; i++) {
1337
0
            const MV this_mv = { br + candidates[s][i].row,
1338
0
                                 bc + candidates[s][i].col };
1339
0
            cost_list[i + 1] = thissad =
1340
0
                vfp->sdf(what->buf, what->stride,
1341
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1342
0
            CHECK_BETTER
1343
0
          }
1344
0
        } else {
1345
0
          for (i = 0; i < num_candidates[s]; i++) {
1346
0
            const MV this_mv = { br + candidates[s][i].row,
1347
0
                                 bc + candidates[s][i].col };
1348
0
            if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
1349
0
            cost_list[i + 1] = thissad =
1350
0
                vfp->sdf(what->buf, what->stride,
1351
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1352
0
            CHECK_BETTER
1353
0
          }
1354
0
        }
1355
1356
0
        if (best_site != -1) {
1357
0
          br += candidates[s][best_site].row;
1358
0
          bc += candidates[s][best_site].col;
1359
0
          k = best_site;
1360
0
        }
1361
0
      }
1362
0
      while (best_site != -1) {
1363
0
        int next_chkpts_indices[PATTERN_CANDIDATES_REF];
1364
0
        best_site = -1;
1365
0
        next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
1366
0
        next_chkpts_indices[1] = k;
1367
0
        next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
1368
0
        cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX;
1369
0
        cost_list[((k + 2) % 4) + 1] = cost_list[0];
1370
0
        cost_list[0] = bestsad;
1371
1372
0
        if (check_bounds(&x->mv_limits, br, bc, 1 << s)) {
1373
0
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1374
0
            const MV this_mv = {
1375
0
              br + candidates[s][next_chkpts_indices[i]].row,
1376
0
              bc + candidates[s][next_chkpts_indices[i]].col
1377
0
            };
1378
0
            cost_list[next_chkpts_indices[i] + 1] = thissad =
1379
0
                vfp->sdf(what->buf, what->stride,
1380
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1381
0
            CHECK_BETTER
1382
0
          }
1383
0
        } else {
1384
0
          for (i = 0; i < PATTERN_CANDIDATES_REF; i++) {
1385
0
            const MV this_mv = {
1386
0
              br + candidates[s][next_chkpts_indices[i]].row,
1387
0
              bc + candidates[s][next_chkpts_indices[i]].col
1388
0
            };
1389
0
            if (!is_mv_in(&x->mv_limits, &this_mv)) {
1390
0
              cost_list[next_chkpts_indices[i] + 1] = INT_MAX;
1391
0
              continue;
1392
0
            }
1393
0
            cost_list[next_chkpts_indices[i] + 1] = thissad =
1394
0
                vfp->sdf(what->buf, what->stride,
1395
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1396
0
            CHECK_BETTER
1397
0
          }
1398
0
        }
1399
1400
0
        if (best_site != -1) {
1401
0
          k = next_chkpts_indices[best_site];
1402
0
          br += candidates[s][k].row;
1403
0
          bc += candidates[s][k].col;
1404
0
        }
1405
0
      }
1406
0
    }
1407
0
  }
1408
1409
  // Returns the one-away integer pel sad values around the best as follows:
1410
  // cost_list[0]: sad at the best integer pel
1411
  // cost_list[1]: sad at delta {0, -1} (left)   from the best integer pel
1412
  // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel
1413
  // cost_list[3]: sad at delta { 0, 1} (right)  from the best integer pel
1414
  // cost_list[4]: sad at delta {-1, 0} (top)    from the best integer pel
1415
0
  if (cost_list) {
1416
0
    static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } };
1417
0
    if (cost_list[0] == INT_MAX) {
1418
0
      cost_list[0] = bestsad;
1419
0
      if (check_bounds(&x->mv_limits, br, bc, 1)) {
1420
0
        for (i = 0; i < 4; i++) {
1421
0
          const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1422
0
          cost_list[i + 1] =
1423
0
              vfp->sdf(what->buf, what->stride,
1424
0
                       get_buf_from_mv(in_what, &this_mv), in_what->stride);
1425
0
        }
1426
0
      } else {
1427
0
        for (i = 0; i < 4; i++) {
1428
0
          const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1429
0
          if (!is_mv_in(&x->mv_limits, &this_mv))
1430
0
            cost_list[i + 1] = INT_MAX;
1431
0
          else
1432
0
            cost_list[i + 1] =
1433
0
                vfp->sdf(what->buf, what->stride,
1434
0
                         get_buf_from_mv(in_what, &this_mv), in_what->stride);
1435
0
        }
1436
0
      }
1437
0
    } else {
1438
0
      if (use_mvcost) {
1439
0
        for (i = 0; i < 4; i++) {
1440
0
          const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
1441
0
          if (cost_list[i + 1] != INT_MAX) {
1442
0
            cost_list[i + 1] +=
1443
0
                mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
1444
0
          }
1445
0
        }
1446
0
      }
1447
0
    }
1448
0
  }
1449
0
  best_mv->row = br;
1450
0
  best_mv->col = bc;
1451
0
  return bestsad;
1452
0
}
1453
1454
int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv,
1455
                       const MV *center_mv, const vp9_variance_fn_ptr_t *vfp,
1456
79.7M
                       int use_mvcost) {
1457
79.7M
  const MACROBLOCKD *const xd = &x->e_mbd;
1458
79.7M
  const struct buf_2d *const what = &x->plane[0].src;
1459
79.7M
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1460
79.7M
  const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1461
79.7M
  uint32_t unused;
1462
79.7M
#if CONFIG_VP9_HIGHBITDEPTH
1463
79.7M
  uint64_t err =
1464
79.7M
      vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1465
79.7M
              in_what->stride, &unused);
1466
79.7M
  err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1467
79.7M
                                   x->errorperbit)
1468
79.7M
                     : 0);
1469
79.7M
  if (err >= INT_MAX) return INT_MAX;
1470
79.7M
  return (int)err;
1471
#else
1472
  return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv),
1473
                 in_what->stride, &unused) +
1474
         (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1475
                                   x->errorperbit)
1476
                     : 0);
1477
#endif
1478
79.7M
}
1479
1480
int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
1481
                          const MV *center_mv, const uint8_t *second_pred,
1482
0
                          const vp9_variance_fn_ptr_t *vfp, int use_mvcost) {
1483
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1484
0
  const struct buf_2d *const what = &x->plane[0].src;
1485
0
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1486
0
  const MV mv = { best_mv->row * 8, best_mv->col * 8 };
1487
0
  unsigned int unused;
1488
1489
0
  return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0,
1490
0
                   what->buf, what->stride, &unused, second_pred) +
1491
0
         (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost,
1492
0
                                   x->errorperbit)
1493
0
                     : 0);
1494
0
}
1495
1496
static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1497
                      int sad_per_bit, int do_init_search, int *cost_list,
1498
                      const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1499
0
                      const MV *center_mv, MV *best_mv) {
1500
  // First scale has 8-closest points, the rest have 6 points in hex shape
1501
  // at increasing scales
1502
0
  static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6,
1503
0
                                                              6, 6, 6, 6, 6 };
1504
  // Note that the largest candidate step at each scale is 2^scale
1505
  /* clang-format off */
1506
0
  static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1507
0
    { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 },
1508
0
      { -1, 0 } },
1509
0
    { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } },
1510
0
    { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } },
1511
0
    { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } },
1512
0
    { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } },
1513
0
    { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 },
1514
0
      { -32, 0 } },
1515
0
    { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 },
1516
0
      { -64, 0 } },
1517
0
    { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 },
1518
0
      { -128, 0 } },
1519
0
    { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 },
1520
0
      { -256, 0 } },
1521
0
    { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 },
1522
0
      { -512, 0 } },
1523
0
    { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 },
1524
0
      { -512, 1024 }, { -1024, 0 } }
1525
0
  };
1526
  /* clang-format on */
1527
0
  return vp9_pattern_search(
1528
0
      x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1529
0
      use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates);
1530
0
}
1531
1532
static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1533
                         int sad_per_bit, int do_init_search, int *cost_list,
1534
                         const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1535
0
                         const MV *center_mv, MV *best_mv) {
1536
  // First scale has 4-closest points, the rest have 8 points in diamond
1537
  // shape at increasing scales
1538
0
  static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = {
1539
0
    4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1540
0
  };
1541
  // Note that the largest candidate step at each scale is 2^scale
1542
  /* clang-format off */
1543
0
  static const MV
1544
0
      bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1545
0
        { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
1546
0
        { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 },
1547
0
          { -1, 1 }, { -2, 0 } },
1548
0
        { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 },
1549
0
          { -2, 2 }, { -4, 0 } },
1550
0
        { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 },
1551
0
          { -4, 4 }, { -8, 0 } },
1552
0
        { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 },
1553
0
          { -8, 8 }, { -16, 0 } },
1554
0
        { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 },
1555
0
          { 0, 32 }, { -16, 16 }, { -32, 0 } },
1556
0
        { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 },
1557
0
          { 0, 64 }, { -32, 32 }, { -64, 0 } },
1558
0
        { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 },
1559
0
          { 0, 128 }, { -64, 64 }, { -128, 0 } },
1560
0
        { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 },
1561
0
          { 0, 256 }, { -128, 128 }, { -256, 0 } },
1562
0
        { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 },
1563
0
          { 0, 512 }, { -256, 256 }, { -512, 0 } },
1564
0
        { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 },
1565
0
          { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }
1566
0
      };
1567
  /* clang-format on */
1568
0
  return vp9_pattern_search_sad(
1569
0
      x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1570
0
      use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates);
1571
0
}
1572
1573
static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1574
                         int sad_per_bit, int do_init_search, int *cost_list,
1575
                         const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1576
0
                         const MV *center_mv, MV *best_mv) {
1577
  // All scales have 8 closest points in square shape
1578
0
  static const int square_num_candidates[MAX_PATTERN_SCALES] = {
1579
0
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
1580
0
  };
1581
  // Note that the largest candidate step at each scale is 2^scale
1582
  /* clang-format off */
1583
0
  static const MV
1584
0
      square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = {
1585
0
        { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 },
1586
0
          { -1, 1 }, { -1, 0 } },
1587
0
        { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 },
1588
0
          { -2, 2 }, { -2, 0 } },
1589
0
        { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 },
1590
0
          { -4, 4 }, { -4, 0 } },
1591
0
        { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 },
1592
0
          { -8, 8 }, { -8, 0 } },
1593
0
        { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 },
1594
0
          { 0, 16 }, { -16, 16 }, { -16, 0 } },
1595
0
        { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 },
1596
0
          { 0, 32 }, { -32, 32 }, { -32, 0 } },
1597
0
        { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 },
1598
0
          { 0, 64 }, { -64, 64 }, { -64, 0 } },
1599
0
        { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 },
1600
0
          { 0, 128 }, { -128, 128 }, { -128, 0 } },
1601
0
        { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 },
1602
0
          { 0, 256 }, { -256, 256 }, { -256, 0 } },
1603
0
        { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 },
1604
0
          { 0, 512 }, { -512, 512 }, { -512, 0 } },
1605
0
        { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 },
1606
0
          { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }
1607
0
      };
1608
  /* clang-format on */
1609
0
  return vp9_pattern_search(
1610
0
      x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp,
1611
0
      use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates);
1612
0
}
1613
1614
static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1615
                           int sad_per_bit,
1616
                           int do_init_search,  // must be zero for fast_hex
1617
                           int *cost_list, const vp9_variance_fn_ptr_t *vfp,
1618
0
                           int use_mvcost, const MV *center_mv, MV *best_mv) {
1619
0
  return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1620
0
                    sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1621
0
                    center_mv, best_mv);
1622
0
}
1623
1624
static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param,
1625
                           int sad_per_bit, int do_init_search, int *cost_list,
1626
                           const vp9_variance_fn_ptr_t *vfp, int use_mvcost,
1627
0
                           const MV *center_mv, MV *best_mv) {
1628
0
  return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param),
1629
0
                       sad_per_bit, do_init_search, cost_list, vfp, use_mvcost,
1630
0
                       center_mv, best_mv);
1631
0
}
1632
1633
#undef CHECK_BETTER
1634
1635
// Exhuastive motion search around a given centre position with a given
1636
// step size.
1637
static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
1638
                                  int range, int step, int sad_per_bit,
1639
                                  const vp9_variance_fn_ptr_t *fn_ptr,
1640
0
                                  const MV *center_mv) {
1641
0
  const MACROBLOCKD *const xd = &x->e_mbd;
1642
0
  const struct buf_2d *const what = &x->plane[0].src;
1643
0
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
1644
0
  MV fcenter_mv = { center_mv->row, center_mv->col };
1645
0
  unsigned int best_sad = INT_MAX;
1646
0
  int r, c, i;
1647
0
  int start_col, end_col, start_row, end_row;
1648
0
  int col_step = (step > 1) ? step : 4;
1649
1650
0
  assert(step >= 1);
1651
1652
0
  clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1653
0
           x->mv_limits.row_min, x->mv_limits.row_max);
1654
0
  *best_mv = fcenter_mv;
1655
0
  best_sad =
1656
0
      fn_ptr->sdf(what->buf, what->stride,
1657
0
                  get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
1658
0
      mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
1659
0
  start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
1660
0
  start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
1661
0
  end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
1662
0
  end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
1663
1664
0
  for (r = start_row; r <= end_row; r += step) {
1665
0
    for (c = start_col; c <= end_col; c += col_step) {
1666
      // Step > 1 means we are not checking every location in this pass.
1667
0
      if (step > 1) {
1668
0
        const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
1669
0
        unsigned int sad =
1670
0
            fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
1671
0
                        in_what->stride);
1672
0
        if (sad < best_sad) {
1673
0
          sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1674
0
          if (sad < best_sad) {
1675
0
            best_sad = sad;
1676
0
            *best_mv = mv;
1677
0
          }
1678
0
        }
1679
0
      } else {
1680
        // 4 sads in a single call if we are checking every location
1681
0
        if (c + 3 <= end_col) {
1682
0
          unsigned int sads[4];
1683
0
          const uint8_t *addrs[4];
1684
0
          for (i = 0; i < 4; ++i) {
1685
0
            const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1686
0
            addrs[i] = get_buf_from_mv(in_what, &mv);
1687
0
          }
1688
0
          fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
1689
1690
0
          for (i = 0; i < 4; ++i) {
1691
0
            if (sads[i] < best_sad) {
1692
0
              const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1693
0
              const unsigned int sad =
1694
0
                  sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1695
0
              if (sad < best_sad) {
1696
0
                best_sad = sad;
1697
0
                *best_mv = mv;
1698
0
              }
1699
0
            }
1700
0
          }
1701
0
        } else {
1702
0
          for (i = 0; i < end_col - c; ++i) {
1703
0
            const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
1704
0
            unsigned int sad =
1705
0
                fn_ptr->sdf(what->buf, what->stride,
1706
0
                            get_buf_from_mv(in_what, &mv), in_what->stride);
1707
0
            if (sad < best_sad) {
1708
0
              sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
1709
0
              if (sad < best_sad) {
1710
0
                best_sad = sad;
1711
0
                *best_mv = mv;
1712
0
              }
1713
0
            }
1714
0
          }
1715
0
        }
1716
0
      }
1717
0
    }
1718
0
  }
1719
1720
0
  return best_sad;
1721
0
}
1722
1723
0
#define MIN_RANGE 7
1724
0
#define MAX_RANGE 256
1725
0
#define MIN_INTERVAL 1
1726
#if CONFIG_NON_GREEDY_MV
1727
static int64_t exhaustive_mesh_search_multi_step(
1728
    MV *best_mv, const MV *center_mv, int range, int step,
1729
    const struct buf_2d *src, const struct buf_2d *pre, int lambda,
1730
    const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits,
1731
    const vp9_variance_fn_ptr_t *fn_ptr) {
1732
  int64_t best_sad;
1733
  int r, c;
1734
  int start_col, end_col, start_row, end_row;
1735
  *best_mv = *center_mv;
1736
  best_sad =
1737
      ((int64_t)fn_ptr->sdf(src->buf, src->stride,
1738
                            get_buf_from_mv(pre, center_mv), pre->stride)
1739
       << LOG2_PRECISION) +
1740
      lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
1741
  start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
1742
  start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
1743
  end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
1744
  end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
1745
  for (r = start_row; r <= end_row; r += step) {
1746
    for (c = start_col; c <= end_col; c += step) {
1747
      const MV mv = { r, c };
1748
      int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
1749
                                         get_buf_from_mv(pre, &mv), pre->stride)
1750
                    << LOG2_PRECISION;
1751
      if (sad < best_sad) {
1752
        sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
1753
        if (sad < best_sad) {
1754
          best_sad = sad;
1755
          *best_mv = mv;
1756
        }
1757
      }
1758
    }
1759
  }
1760
  return best_sad;
1761
}
1762
1763
static int64_t exhaustive_mesh_search_single_step(
1764
    MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src,
1765
    const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs,
1766
    int full_mv_num, const MvLimits *mv_limits,
1767
    const vp9_variance_fn_ptr_t *fn_ptr) {
1768
  int64_t best_sad;
1769
  int r, c, i;
1770
  int start_col, end_col, start_row, end_row;
1771
1772
  *best_mv = *center_mv;
1773
  best_sad =
1774
      ((int64_t)fn_ptr->sdf(src->buf, src->stride,
1775
                            get_buf_from_mv(pre, center_mv), pre->stride)
1776
       << LOG2_PRECISION) +
1777
      lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
1778
  start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
1779
  start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
1780
  end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
1781
  end_col = VPXMIN(center_mv->col + range, mv_limits->col_max);
1782
  for (r = start_row; r <= end_row; r += 1) {
1783
    c = start_col;
1784
    while (c + 3 <= end_col) {
1785
      unsigned int sads[4];
1786
      const uint8_t *addrs[4];
1787
      for (i = 0; i < 4; ++i) {
1788
        const MV mv = { r, c + i };
1789
        addrs[i] = get_buf_from_mv(pre, &mv);
1790
      }
1791
      fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads);
1792
1793
      for (i = 0; i < 4; ++i) {
1794
        int64_t sad = (int64_t)sads[i] << LOG2_PRECISION;
1795
        if (sad < best_sad) {
1796
          const MV mv = { r, c + i };
1797
          sad +=
1798
              lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
1799
          if (sad < best_sad) {
1800
            best_sad = sad;
1801
            *best_mv = mv;
1802
          }
1803
        }
1804
      }
1805
      c += 4;
1806
    }
1807
    while (c <= end_col) {
1808
      const MV mv = { r, c };
1809
      int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
1810
                                         get_buf_from_mv(pre, &mv), pre->stride)
1811
                    << LOG2_PRECISION;
1812
      if (sad < best_sad) {
1813
        sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
1814
        if (sad < best_sad) {
1815
          best_sad = sad;
1816
          *best_mv = mv;
1817
        }
1818
      }
1819
      c += 1;
1820
    }
1821
  }
1822
  return best_sad;
1823
}
1824
1825
static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
1826
                                          int range, int step,
1827
                                          const vp9_variance_fn_ptr_t *fn_ptr,
1828
                                          const MV *center_mv, int lambda,
1829
                                          const int_mv *nb_full_mvs,
1830
                                          int full_mv_num) {
1831
  const MACROBLOCKD *const xd = &x->e_mbd;
1832
  const struct buf_2d *src = &x->plane[0].src;
1833
  const struct buf_2d *pre = &xd->plane[0].pre[0];
1834
  assert(step >= 1);
1835
  assert(is_mv_in(&x->mv_limits, center_mv));
1836
  if (step == 1) {
1837
    return exhaustive_mesh_search_single_step(
1838
        best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num,
1839
        &x->mv_limits, fn_ptr);
1840
  }
1841
  return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src,
1842
                                           pre, lambda, nb_full_mvs,
1843
                                           full_mv_num, &x->mv_limits, fn_ptr);
1844
}
1845
1846
static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
1847
                                         MV *centre_mv_full,
1848
                                         const vp9_variance_fn_ptr_t *fn_ptr,
1849
                                         MV *dst_mv, int lambda,
1850
                                         const int_mv *nb_full_mvs,
1851
                                         int full_mv_num) {
1852
  const SPEED_FEATURES *const sf = &cpi->sf;
1853
  MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
1854
  int64_t bestsme;
1855
  int i;
1856
  int interval = sf->mesh_patterns[0].interval;
1857
  int range = sf->mesh_patterns[0].range;
1858
  int baseline_interval_divisor;
1859
1860
  // Trap illegal values for interval and range for this function.
1861
  if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
1862
      (interval > range)) {
1863
    printf("ERROR: invalid range\n");
1864
    assert(0);
1865
  }
1866
1867
  baseline_interval_divisor = range / interval;
1868
1869
  // Check size of proposed first range against magnitude of the centre
1870
  // value used as a starting point.
1871
  range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
1872
  range = VPXMIN(range, MAX_RANGE);
1873
  interval = VPXMAX(interval, range / baseline_interval_divisor);
1874
1875
  // initial search
1876
  bestsme =
1877
      exhaustive_mesh_search_new(x, &temp_mv, range, interval, fn_ptr, &temp_mv,
1878
                                 lambda, nb_full_mvs, full_mv_num);
1879
1880
  if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
1881
    // Progressive searches with range and step size decreasing each time
1882
    // till we reach a step size of 1. Then break out.
1883
    for (i = 1; i < MAX_MESH_STEP; ++i) {
1884
      // First pass with coarser step and longer range
1885
      bestsme = exhaustive_mesh_search_new(
1886
          x, &temp_mv, sf->mesh_patterns[i].range,
1887
          sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs,
1888
          full_mv_num);
1889
1890
      if (sf->mesh_patterns[i].interval == 1) break;
1891
    }
1892
  }
1893
1894
  *dst_mv = temp_mv;
1895
1896
  return bestsme;
1897
}
1898
1899
static int64_t diamond_search_sad_new(const MACROBLOCK *x,
1900
                                      const search_site_config *cfg,
1901
                                      const MV *init_full_mv, MV *best_full_mv,
1902
                                      int search_param, int lambda, int *num00,
1903
                                      const vp9_variance_fn_ptr_t *fn_ptr,
1904
                                      const int_mv *nb_full_mvs,
1905
                                      int full_mv_num) {
1906
  int i, j, step;
1907
1908
  const MACROBLOCKD *const xd = &x->e_mbd;
1909
  uint8_t *what = x->plane[0].src.buf;
1910
  const int what_stride = x->plane[0].src.stride;
1911
  const uint8_t *in_what;
1912
  const int in_what_stride = xd->plane[0].pre[0].stride;
1913
  const uint8_t *best_address;
1914
1915
  int64_t bestsad;
1916
  int best_site = -1;
1917
  int last_site = -1;
1918
1919
  // search_param determines the length of the initial step and hence the number
1920
  // of iterations.
1921
  // 0 = initial step (MAX_FIRST_STEP) pel
1922
  // 1 = (MAX_FIRST_STEP/2) pel,
1923
  // 2 = (MAX_FIRST_STEP/4) pel...
1924
  //  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
1925
  const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
1926
  const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
1927
  const int tot_steps = cfg->total_steps - search_param;
1928
  vpx_clear_system_state();
1929
1930
  *best_full_mv = *init_full_mv;
1931
  clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max,
1932
           x->mv_limits.row_min, x->mv_limits.row_max);
1933
  *num00 = 0;
1934
1935
  // Work out the start point for the search
1936
  in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride +
1937
            best_full_mv->col;
1938
  best_address = in_what;
1939
1940
  // Check the starting position
1941
  {
1942
    const int64_t mv_dist =
1943
        (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride)
1944
        << LOG2_PRECISION;
1945
    const int64_t mv_cost =
1946
        vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
1947
    bestsad = mv_dist + lambda * mv_cost;
1948
  }
1949
1950
  i = 0;
1951
1952
  for (step = 0; step < tot_steps; step++) {
1953
    int all_in = 1, t;
1954
1955
    // All_in is true if every one of the points we are checking are within
1956
    // the bounds of the image.
1957
    all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
1958
    all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
1959
    all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
1960
    all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
1961
1962
    // If all the pixels are within the bounds we don't check whether the
1963
    // search point is valid in this loop,  otherwise we check each point
1964
    // for validity..
1965
    if (all_in) {
1966
      unsigned int sad_array[4];
1967
1968
      for (j = 0; j < cfg->searches_per_step; j += 4) {
1969
        unsigned char const *block_offset[4];
1970
1971
        for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
1972
1973
        fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
1974
                       sad_array);
1975
1976
        for (t = 0; t < 4; t++, i++) {
1977
          const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION;
1978
          if (mv_dist < bestsad) {
1979
            const MV this_mv = { best_full_mv->row + ss_mv[i].row,
1980
                                 best_full_mv->col + ss_mv[i].col };
1981
            const int64_t mv_cost =
1982
                vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
1983
            const int64_t thissad = mv_dist + lambda * mv_cost;
1984
            if (thissad < bestsad) {
1985
              bestsad = thissad;
1986
              best_site = i;
1987
            }
1988
          }
1989
        }
1990
      }
1991
    } else {
1992
      for (j = 0; j < cfg->searches_per_step; j++) {
1993
        // Trap illegal vectors
1994
        const MV this_mv = { best_full_mv->row + ss_mv[i].row,
1995
                             best_full_mv->col + ss_mv[i].col };
1996
1997
        if (is_mv_in(&x->mv_limits, &this_mv)) {
1998
          const uint8_t *const check_here = ss_os[i] + best_address;
1999
          const int64_t mv_dist =
2000
              (int64_t)fn_ptr->sdf(what, what_stride, check_here,
2001
                                   in_what_stride)
2002
              << LOG2_PRECISION;
2003
          if (mv_dist < bestsad) {
2004
            const int64_t mv_cost =
2005
                vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
2006
            const int64_t thissad = mv_dist + lambda * mv_cost;
2007
            if (thissad < bestsad) {
2008
              bestsad = thissad;
2009
              best_site = i;
2010
            }
2011
          }
2012
        }
2013
        i++;
2014
      }
2015
    }
2016
    if (best_site != last_site) {
2017
      best_full_mv->row += ss_mv[best_site].row;
2018
      best_full_mv->col += ss_mv[best_site].col;
2019
      best_address += ss_os[best_site];
2020
      last_site = best_site;
2021
    } else if (best_address == in_what) {
2022
      (*num00)++;
2023
    }
2024
  }
2025
  return bestsad;
2026
}
2027
2028
int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row,
2029
                            int mi_col, int_mv *nb_full_mvs) {
2030
  const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize];
2031
  const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize];
2032
  const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
2033
  int nb_full_mv_num = 0;
2034
  int i;
2035
  assert(mi_row % mi_height == 0);
2036
  assert(mi_col % mi_width == 0);
2037
  for (i = 0; i < NB_MVS_NUM; ++i) {
2038
    int r = dirs[i][0];
2039
    int c = dirs[i][1];
2040
    int brow = mi_row / mi_height + r;
2041
    int bcol = mi_col / mi_width + c;
2042
    if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 &&
2043
        bcol < motion_field->block_cols) {
2044
      if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) {
2045
        int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol);
2046
        nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv);
2047
        ++nb_full_mv_num;
2048
      }
2049
    }
2050
  }
2051
  return nb_full_mv_num;
2052
}
2053
#endif  // CONFIG_NON_GREEDY_MV
2054
2055
int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
2056
                             MV *ref_mv, uint32_t start_mv_sad, MV *best_mv,
2057
                             int search_param, int sad_per_bit, int *num00,
2058
                             const vp9_sad_fn_ptr_t *sad_fn_ptr,
2059
64.6M
                             const MV *center_mv) {
2060
64.6M
  int i, j, step;
2061
2062
64.6M
  const MACROBLOCKD *const xd = &x->e_mbd;
2063
64.6M
  uint8_t *what = x->plane[0].src.buf;
2064
64.6M
  const int what_stride = x->plane[0].src.stride;
2065
64.6M
  const uint8_t *in_what;
2066
64.6M
  const int in_what_stride = xd->plane[0].pre[0].stride;
2067
64.6M
  const uint8_t *best_address;
2068
2069
64.6M
  unsigned int bestsad = start_mv_sad;
2070
64.6M
  int best_site = -1;
2071
64.6M
  int last_site = -1;
2072
2073
64.6M
  int ref_row;
2074
64.6M
  int ref_col;
2075
2076
  // search_param determines the length of the initial step and hence the number
2077
  // of iterations.
2078
  // 0 = initial step (MAX_FIRST_STEP) pel
2079
  // 1 = (MAX_FIRST_STEP/2) pel,
2080
  // 2 = (MAX_FIRST_STEP/4) pel...
2081
  //  const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
2082
64.6M
  const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
2083
64.6M
  const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
2084
64.6M
  const int tot_steps = cfg->total_steps - search_param;
2085
2086
64.6M
  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2087
64.6M
  ref_row = ref_mv->row;
2088
64.6M
  ref_col = ref_mv->col;
2089
64.6M
  *num00 = 0;
2090
64.6M
  best_mv->row = ref_row;
2091
64.6M
  best_mv->col = ref_col;
2092
2093
  // Work out the start point for the search
2094
64.6M
  in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
2095
64.6M
  best_address = in_what;
2096
2097
64.6M
  i = 0;
2098
2099
241M
  for (step = 0; step < tot_steps; step++) {
2100
176M
    int all_in = 1, t;
2101
2102
    // All_in is true if every one of the points we are checking are within
2103
    // the bounds of the image.
2104
176M
    all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
2105
176M
    all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
2106
176M
    all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
2107
176M
    all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
2108
2109
    // If all the pixels are within the bounds we don't check whether the
2110
    // search point is valid in this loop,  otherwise we check each point
2111
    // for validity..
2112
176M
    if (all_in) {
2113
141M
      unsigned int sad_array[4];
2114
2115
423M
      for (j = 0; j < cfg->searches_per_step; j += 4) {
2116
282M
        unsigned char const *block_offset[4];
2117
2118
1.41G
        for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
2119
2120
282M
        sad_fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
2121
282M
                           sad_array);
2122
2123
1.41G
        for (t = 0; t < 4; t++, i++) {
2124
1.12G
          if (sad_array[t] < bestsad) {
2125
407M
            const MV this_mv = { best_mv->row + ss_mv[i].row,
2126
407M
                                 best_mv->col + ss_mv[i].col };
2127
407M
            sad_array[t] +=
2128
407M
                mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
2129
407M
            if (sad_array[t] < bestsad) {
2130
123M
              bestsad = sad_array[t];
2131
123M
              best_site = i;
2132
123M
            }
2133
407M
          }
2134
1.12G
        }
2135
282M
      }
2136
141M
    } else {
2137
320M
      for (j = 0; j < cfg->searches_per_step; j++) {
2138
        // Trap illegal vectors
2139
284M
        const MV this_mv = { best_mv->row + ss_mv[i].row,
2140
284M
                             best_mv->col + ss_mv[i].col };
2141
2142
284M
        if (is_mv_in(&x->mv_limits, &this_mv)) {
2143
148M
          const uint8_t *const check_here = ss_os[i] + best_address;
2144
148M
          unsigned int thissad =
2145
148M
              sad_fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
2146
2147
148M
          if (thissad < bestsad) {
2148
70.1M
            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
2149
70.1M
            if (thissad < bestsad) {
2150
17.4M
              bestsad = thissad;
2151
17.4M
              best_site = i;
2152
17.4M
            }
2153
70.1M
          }
2154
148M
        }
2155
284M
        i++;
2156
284M
      }
2157
35.6M
    }
2158
176M
    if (best_site != last_site) {
2159
91.7M
      best_mv->row += ss_mv[best_site].row;
2160
91.7M
      best_mv->col += ss_mv[best_site].col;
2161
91.7M
      best_address += ss_os[best_site];
2162
91.7M
      last_site = best_site;
2163
#if defined(NEW_DIAMOND_SEARCH)
2164
      while (1) {
2165
        const MV this_mv = { best_mv->row + ss_mv[best_site].row,
2166
                             best_mv->col + ss_mv[best_site].col };
2167
        if (is_mv_in(&x->mv_limits, &this_mv)) {
2168
          const uint8_t *const check_here = ss_os[best_site] + best_address;
2169
          unsigned int thissad =
2170
              fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
2171
          if (thissad < bestsad) {
2172
            thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
2173
            if (thissad < bestsad) {
2174
              bestsad = thissad;
2175
              best_mv->row += ss_mv[best_site].row;
2176
              best_mv->col += ss_mv[best_site].col;
2177
              best_address += ss_os[best_site];
2178
              continue;
2179
            }
2180
          }
2181
        }
2182
        break;
2183
      }
2184
#endif
2185
91.7M
    } else if (best_address == in_what) {
2186
50.1M
      (*num00)++;
2187
50.1M
    }
2188
176M
  }
2189
64.6M
  return bestsad;
2190
64.6M
}
2191
2192
0
static int vector_match(int16_t *ref, int16_t *src, int bwl) {
2193
0
  int best_sad = INT_MAX;
2194
0
  int this_sad;
2195
0
  int d;
2196
0
  int center, offset = 0;
2197
0
  int bw = 4 << bwl;  // redundant variable, to be changed in the experiments.
2198
0
  for (d = 0; d <= bw; d += 16) {
2199
0
    this_sad = vpx_vector_var(&ref[d], src, bwl);
2200
0
    if (this_sad < best_sad) {
2201
0
      best_sad = this_sad;
2202
0
      offset = d;
2203
0
    }
2204
0
  }
2205
0
  center = offset;
2206
2207
0
  for (d = -8; d <= 8; d += 16) {
2208
0
    int this_pos = offset + d;
2209
    // check limit
2210
0
    if (this_pos < 0 || this_pos > bw) continue;
2211
0
    this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
2212
0
    if (this_sad < best_sad) {
2213
0
      best_sad = this_sad;
2214
0
      center = this_pos;
2215
0
    }
2216
0
  }
2217
0
  offset = center;
2218
2219
0
  for (d = -4; d <= 4; d += 8) {
2220
0
    int this_pos = offset + d;
2221
    // check limit
2222
0
    if (this_pos < 0 || this_pos > bw) continue;
2223
0
    this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
2224
0
    if (this_sad < best_sad) {
2225
0
      best_sad = this_sad;
2226
0
      center = this_pos;
2227
0
    }
2228
0
  }
2229
0
  offset = center;
2230
2231
0
  for (d = -2; d <= 2; d += 4) {
2232
0
    int this_pos = offset + d;
2233
    // check limit
2234
0
    if (this_pos < 0 || this_pos > bw) continue;
2235
0
    this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
2236
0
    if (this_sad < best_sad) {
2237
0
      best_sad = this_sad;
2238
0
      center = this_pos;
2239
0
    }
2240
0
  }
2241
0
  offset = center;
2242
2243
0
  for (d = -1; d <= 1; d += 2) {
2244
0
    int this_pos = offset + d;
2245
    // check limit
2246
0
    if (this_pos < 0 || this_pos > bw) continue;
2247
0
    this_sad = vpx_vector_var(&ref[this_pos], src, bwl);
2248
0
    if (this_sad < best_sad) {
2249
0
      best_sad = this_sad;
2250
0
      center = this_pos;
2251
0
    }
2252
0
  }
2253
2254
0
  return (center - (bw >> 1));
2255
0
}
2256
2257
static const MV search_pos[4] = {
2258
  { -1, 0 },
2259
  { 0, -1 },
2260
  { 0, 1 },
2261
  { 1, 0 },
2262
};
2263
2264
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
2265
                                           BLOCK_SIZE bsize, int mi_row,
2266
0
                                           int mi_col, const MV *ref_mv) {
2267
0
  MACROBLOCKD *xd = &x->e_mbd;
2268
0
  MODE_INFO *mi = xd->mi[0];
2269
0
  struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
2270
0
  DECLARE_ALIGNED(16, int16_t, hbuf[128]);
2271
0
  DECLARE_ALIGNED(16, int16_t, vbuf[128]);
2272
0
  DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
2273
0
  DECLARE_ALIGNED(16, int16_t, src_vbuf[64]);
2274
0
  int idx;
2275
0
  const int bw = 4 << b_width_log2_lookup[bsize];
2276
0
  const int bh = 4 << b_height_log2_lookup[bsize];
2277
0
  const int search_width = bw << 1;
2278
0
  const int search_height = bh << 1;
2279
0
  const int src_stride = x->plane[0].src.stride;
2280
0
  const int ref_stride = xd->plane[0].pre[0].stride;
2281
0
  uint8_t const *ref_buf, *src_buf;
2282
0
  MV *tmp_mv = &xd->mi[0]->mv[0].as_mv;
2283
0
  unsigned int best_sad, tmp_sad, this_sad[4];
2284
0
  MV this_mv;
2285
0
  const int norm_factor = 3 + (bw >> 5);
2286
0
  const YV12_BUFFER_CONFIG *scaled_ref_frame =
2287
0
      vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
2288
0
  MvLimits subpel_mv_limits;
2289
2290
0
  if (scaled_ref_frame) {
2291
0
    int i;
2292
    // Swap out the reference frame for a version that's been scaled to
2293
    // match the resolution of the current frame, allowing the existing
2294
    // motion search code to be used without additional modifications.
2295
0
    for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
2296
0
    vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2297
0
  }
2298
2299
0
#if CONFIG_VP9_HIGHBITDEPTH
2300
  // TODO(jingning): Implement integral projection functions for high bit-depth
2301
  // setting and remove this part of code.
2302
0
  if (xd->bd != 8) {
2303
0
    const unsigned int sad = cpi->fn_ptr[bsize].sdf(
2304
0
        x->plane[0].src.buf, src_stride, xd->plane[0].pre[0].buf, ref_stride);
2305
0
    tmp_mv->row = 0;
2306
0
    tmp_mv->col = 0;
2307
2308
0
    if (scaled_ref_frame) {
2309
0
      int i;
2310
0
      for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2311
0
    }
2312
0
    return sad;
2313
0
  }
2314
0
#endif
2315
2316
  // Set up prediction 1-D reference set
2317
0
  ref_buf = xd->plane[0].pre[0].buf - (bw >> 1);
2318
0
  for (idx = 0; idx < search_width; idx += 16) {
2319
0
    vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
2320
0
    ref_buf += 16;
2321
0
  }
2322
2323
0
  ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride;
2324
0
  for (idx = 0; idx < search_height; ++idx) {
2325
0
    vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor;
2326
0
    ref_buf += ref_stride;
2327
0
  }
2328
2329
  // Set up src 1-D reference set
2330
0
  for (idx = 0; idx < bw; idx += 16) {
2331
0
    src_buf = x->plane[0].src.buf + idx;
2332
0
    vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh);
2333
0
  }
2334
2335
0
  src_buf = x->plane[0].src.buf;
2336
0
  for (idx = 0; idx < bh; ++idx) {
2337
0
    src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor;
2338
0
    src_buf += src_stride;
2339
0
  }
2340
2341
  // Find the best match per 1-D search
2342
0
  tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
2343
0
  tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
2344
2345
0
  this_mv = *tmp_mv;
2346
0
  src_buf = x->plane[0].src.buf;
2347
0
  ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
2348
0
  best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
2349
2350
0
  {
2351
0
    const uint8_t *const pos[4] = {
2352
0
      ref_buf - ref_stride,
2353
0
      ref_buf - 1,
2354
0
      ref_buf + 1,
2355
0
      ref_buf + ref_stride,
2356
0
    };
2357
2358
0
    cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
2359
0
  }
2360
2361
0
  for (idx = 0; idx < 4; ++idx) {
2362
0
    if (this_sad[idx] < best_sad) {
2363
0
      best_sad = this_sad[idx];
2364
0
      tmp_mv->row = search_pos[idx].row + this_mv.row;
2365
0
      tmp_mv->col = search_pos[idx].col + this_mv.col;
2366
0
    }
2367
0
  }
2368
2369
0
  if (this_sad[0] < this_sad[3])
2370
0
    this_mv.row -= 1;
2371
0
  else
2372
0
    this_mv.row += 1;
2373
2374
0
  if (this_sad[1] < this_sad[2])
2375
0
    this_mv.col -= 1;
2376
0
  else
2377
0
    this_mv.col += 1;
2378
2379
0
  ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col;
2380
2381
0
  tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
2382
0
  if (best_sad > tmp_sad) {
2383
0
    *tmp_mv = this_mv;
2384
0
    best_sad = tmp_sad;
2385
0
  }
2386
2387
0
  tmp_mv->row *= 8;
2388
0
  tmp_mv->col *= 8;
2389
2390
0
  vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
2391
0
  clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max,
2392
0
           subpel_mv_limits.row_min, subpel_mv_limits.row_max);
2393
2394
0
  if (scaled_ref_frame) {
2395
0
    int i;
2396
0
    for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2397
0
  }
2398
2399
0
  return best_sad;
2400
0
}
2401
2402
static int get_exhaustive_threshold(int exhaustive_searches_thresh,
2403
0
                                    BLOCK_SIZE bsize) {
2404
0
  return exhaustive_searches_thresh >>
2405
0
         (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
2406
0
}
2407
2408
#if CONFIG_NON_GREEDY_MV
2409
// Runs sequence of diamond searches in smaller steps for RD.
2410
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
2411
              point as the best match, we will do a final 1-away diamond
2412
              refining search  */
2413
int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
2414
                               BLOCK_SIZE bsize, MV *mvp_full, int step_param,
2415
                               int lambda, int do_refine,
2416
                               const int_mv *nb_full_mvs, int full_mv_num,
2417
                               MV *best_mv) {
2418
  const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
2419
  const SPEED_FEATURES *const sf = &cpi->sf;
2420
  int n, num00 = 0;
2421
  int thissme;
2422
  int bestsme;
2423
  const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
2424
  const MV center_mv = { 0, 0 };
2425
  vpx_clear_system_state();
2426
  diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda,
2427
                         &n, fn_ptr, nb_full_mvs, full_mv_num);
2428
2429
  bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
2430
2431
  // If there won't be more n-step search, check to see if refining search is
2432
  // needed.
2433
  if (n > further_steps) do_refine = 0;
2434
2435
  while (n < further_steps) {
2436
    ++n;
2437
    if (num00) {
2438
      num00--;
2439
    } else {
2440
      MV temp_mv;
2441
      diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv,
2442
                             step_param + n, lambda, &num00, fn_ptr,
2443
                             nb_full_mvs, full_mv_num);
2444
      thissme = vp9_get_mvpred_var(x, &temp_mv, &center_mv, fn_ptr, 0);
2445
      // check to see if refining search is needed.
2446
      if (num00 > further_steps - n) do_refine = 0;
2447
2448
      if (thissme < bestsme) {
2449
        bestsme = thissme;
2450
        *best_mv = temp_mv;
2451
      }
2452
    }
2453
  }
2454
2455
  // final 1-away diamond refining search
2456
  if (do_refine) {
2457
    const int search_range = 8;
2458
    MV temp_mv = *best_mv;
2459
    vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr,
2460
                                nb_full_mvs, full_mv_num);
2461
    thissme = vp9_get_mvpred_var(x, &temp_mv, &center_mv, fn_ptr, 0);
2462
    if (thissme < bestsme) {
2463
      bestsme = thissme;
2464
      *best_mv = temp_mv;
2465
    }
2466
  }
2467
2468
  if (sf->exhaustive_searches_thresh < INT_MAX &&
2469
      !cpi->rc.is_src_frame_alt_ref) {
2470
    const int64_t exhaustive_thr =
2471
        get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
2472
    if (bestsme > exhaustive_thr) {
2473
      full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
2474
                                nb_full_mvs, full_mv_num);
2475
      bestsme = vp9_get_mvpred_var(x, best_mv, &center_mv, fn_ptr, 0);
2476
    }
2477
  }
2478
  return bestsme;
2479
}
2480
#endif  // CONFIG_NON_GREEDY_MV
2481
2482
// Runs sequence of diamond searches in smaller steps for RD.
2483
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
2484
              point as the best match, we will do a final 1-away diamond
2485
              refining search  */
2486
static int full_pixel_diamond(const VP9_COMP *const cpi,
2487
                              const MACROBLOCK *const x, BLOCK_SIZE bsize,
2488
                              MV *mvp_full, int step_param, int sadpb,
2489
                              int further_steps, int do_refine,
2490
                              int use_downsampled_sad, int *cost_list,
2491
                              const vp9_variance_fn_ptr_t *fn_ptr,
2492
27.1M
                              const MV *ref_mv, MV *dst_mv) {
2493
27.1M
  MV temp_mv;
2494
27.1M
  int thissme, n, num00 = 0;
2495
27.1M
  int bestsme;
2496
27.1M
  const int src_buf_stride = x->plane[0].src.stride;
2497
27.1M
  const uint8_t *const src_buf = x->plane[0].src.buf;
2498
27.1M
  const MACROBLOCKD *const xd = &x->e_mbd;
2499
27.1M
  const int pred_buf_stride = xd->plane[0].pre[0].stride;
2500
27.1M
  uint8_t *pred_buf;
2501
27.1M
  vp9_sad_fn_ptr_t sad_fn_ptr;
2502
27.1M
  unsigned int start_mv_sad, start_mv_sad_even_rows, start_mv_sad_odd_rows;
2503
27.1M
  const MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
2504
27.1M
  clamp_mv(mvp_full, x->mv_limits.col_min, x->mv_limits.col_max,
2505
27.1M
           x->mv_limits.row_min, x->mv_limits.row_max);
2506
2507
27.1M
  pred_buf =
2508
27.1M
      xd->plane[0].pre[0].buf + mvp_full->row * pred_buf_stride + mvp_full->col;
2509
27.1M
  start_mv_sad_even_rows =
2510
27.1M
      fn_ptr->sdsf(src_buf, src_buf_stride, pred_buf, pred_buf_stride);
2511
27.1M
  start_mv_sad_odd_rows =
2512
27.1M
      fn_ptr->sdsf(src_buf + src_buf_stride, src_buf_stride,
2513
27.1M
                   pred_buf + pred_buf_stride, pred_buf_stride);
2514
27.1M
  start_mv_sad = (start_mv_sad_even_rows + start_mv_sad_odd_rows) >> 1;
2515
27.1M
  start_mv_sad += mvsad_err_cost(x, mvp_full, &ref_mv_full, sadpb);
2516
2517
27.1M
  sad_fn_ptr.sdf = fn_ptr->sdf;
2518
27.1M
  sad_fn_ptr.sdx4df = fn_ptr->sdx4df;
2519
27.1M
  if (use_downsampled_sad && num_4x4_blocks_high_lookup[bsize] >= 2) {
2520
    // If the absolute difference between the pred-to-src SAD of even rows and
2521
    // the pred-to-src SAD of odd rows is small, skip every other row in sad
2522
    // computation.
2523
7.32M
    const int odd_to_even_diff_sad =
2524
7.32M
        abs((int)start_mv_sad_even_rows - (int)start_mv_sad_odd_rows);
2525
7.32M
    const int mult_thresh = 10;
2526
7.32M
    if (odd_to_even_diff_sad * mult_thresh < (int)start_mv_sad_even_rows) {
2527
2.00M
      sad_fn_ptr.sdf = fn_ptr->sdsf;
2528
2.00M
      sad_fn_ptr.sdx4df = fn_ptr->sdsx4df;
2529
2.00M
    }
2530
7.32M
  }
2531
2532
27.1M
  bestsme =
2533
27.1M
      cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, start_mv_sad, &temp_mv,
2534
27.1M
                              step_param, sadpb, &n, &sad_fn_ptr, ref_mv);
2535
27.1M
  if (bestsme < INT_MAX)
2536
27.1M
    bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2537
27.1M
  *dst_mv = temp_mv;
2538
2539
  // If there won't be more n-step search, check to see if refining search is
2540
  // needed.
2541
27.1M
  if (n > further_steps) do_refine = 0;
2542
2543
68.9M
  while (n < further_steps) {
2544
41.7M
    ++n;
2545
2546
41.7M
    if (num00) {
2547
4.30M
      num00--;
2548
37.4M
    } else {
2549
37.4M
      thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, start_mv_sad,
2550
37.4M
                                        &temp_mv, step_param + n, sadpb, &num00,
2551
37.4M
                                        &sad_fn_ptr, ref_mv);
2552
37.4M
      if (thissme < INT_MAX)
2553
37.4M
        thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2554
2555
      // check to see if refining search is needed.
2556
37.4M
      if (num00 > further_steps - n) do_refine = 0;
2557
2558
37.4M
      if (thissme < bestsme) {
2559
8.27M
        bestsme = thissme;
2560
8.27M
        *dst_mv = temp_mv;
2561
8.27M
      }
2562
37.4M
    }
2563
41.7M
  }
2564
2565
  // final 1-away diamond refining search
2566
27.1M
  if (do_refine) {
2567
15.1M
    const int search_range = 8;
2568
15.1M
    MV best_mv = *dst_mv;
2569
15.1M
    thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range,
2570
15.1M
                                      &sad_fn_ptr, ref_mv);
2571
15.1M
    if (thissme < INT_MAX)
2572
15.1M
      thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
2573
15.1M
    if (thissme < bestsme) {
2574
1.01M
      bestsme = thissme;
2575
1.01M
      *dst_mv = best_mv;
2576
1.01M
    }
2577
15.1M
  }
2578
2579
27.1M
  if (sad_fn_ptr.sdf != fn_ptr->sdf) {
2580
    // If we are skipping rows when we perform the motion search, we need to
2581
    // check the quality of skipping. If it's bad, then we run search with
2582
    // skip row features off.
2583
2.00M
    const uint8_t *best_address = get_buf_from_mv(&xd->plane[0].pre[0], dst_mv);
2584
2.00M
    const int sad =
2585
2.00M
        fn_ptr->sdf(src_buf, src_buf_stride, best_address, pred_buf_stride);
2586
2.00M
    const int skip_sad =
2587
2.00M
        fn_ptr->sdsf(src_buf, src_buf_stride, best_address, pred_buf_stride);
2588
    // We will keep the result of skipping rows if it's good enough.
2589
2.00M
    const int kSADThresh =
2590
2.00M
        1 << (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
2591
2.00M
    if (sad > kSADThresh && abs(skip_sad - sad) * 10 >= VPXMAX(sad, 1) * 9) {
2592
      // There is a large discrepancy between skipping and not skipping, so we
2593
      // need to redo the motion search.
2594
11.5k
      return full_pixel_diamond(cpi, x, bsize, mvp_full, step_param, sadpb,
2595
11.5k
                                further_steps, do_refine, 0, cost_list, fn_ptr,
2596
11.5k
                                ref_mv, dst_mv);
2597
11.5k
    }
2598
2.00M
  }
2599
2600
  // Return cost list.
2601
27.1M
  if (cost_list) {
2602
0
    calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
2603
0
  }
2604
27.1M
  return bestsme;
2605
27.1M
}
2606
2607
// Runs an limited range exhaustive mesh search using a pattern set
2608
// according to the encode speed profile.
2609
static int full_pixel_exhaustive(const VP9_COMP *const cpi,
2610
                                 const MACROBLOCK *const x, MV *centre_mv_full,
2611
                                 int sadpb, int *cost_list,
2612
                                 const vp9_variance_fn_ptr_t *fn_ptr,
2613
0
                                 const MV *ref_mv, MV *dst_mv) {
2614
0
  const SPEED_FEATURES *const sf = &cpi->sf;
2615
0
  MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
2616
0
  MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 };
2617
0
  int bestsme;
2618
0
  int i;
2619
0
  int interval = sf->mesh_patterns[0].interval;
2620
0
  int range = sf->mesh_patterns[0].range;
2621
0
  int baseline_interval_divisor;
2622
2623
  // Trap illegal values for interval and range for this function.
2624
0
  if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
2625
0
      (interval > range))
2626
0
    return INT_MAX;
2627
2628
0
  baseline_interval_divisor = range / interval;
2629
2630
  // Check size of proposed first range against magnitude of the centre
2631
  // value used as a starting point.
2632
0
  range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
2633
0
  range = VPXMIN(range, MAX_RANGE);
2634
0
  interval = VPXMAX(interval, range / baseline_interval_divisor);
2635
2636
  // initial search
2637
0
  bestsme = exhaustive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval,
2638
0
                                   sadpb, fn_ptr, &temp_mv);
2639
2640
0
  if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
2641
    // Progressive searches with range and step size decreasing each time
2642
    // till we reach a step size of 1. Then break out.
2643
0
    for (i = 1; i < MAX_MESH_STEP; ++i) {
2644
      // First pass with coarser step and longer range
2645
0
      bestsme = exhaustive_mesh_search(
2646
0
          x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
2647
0
          sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
2648
2649
0
      if (sf->mesh_patterns[i].interval == 1) break;
2650
0
    }
2651
0
  }
2652
2653
0
  if (bestsme < INT_MAX)
2654
0
    bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
2655
0
  *dst_mv = temp_mv;
2656
2657
  // Return cost list.
2658
0
  if (cost_list) {
2659
0
    calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
2660
0
  }
2661
0
  return bestsme;
2662
0
}
2663
2664
#if CONFIG_NON_GREEDY_MV
2665
int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
2666
                                    int lambda, int search_range,
2667
                                    const vp9_variance_fn_ptr_t *fn_ptr,
2668
                                    const int_mv *nb_full_mvs,
2669
                                    int full_mv_num) {
2670
  const MACROBLOCKD *const xd = &x->e_mbd;
2671
  const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2672
  const struct buf_2d *const what = &x->plane[0].src;
2673
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2674
  const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
2675
  int64_t best_sad;
2676
  int i, j;
2677
  vpx_clear_system_state();
2678
  {
2679
    const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride,
2680
                                                 best_address, in_what->stride)
2681
                            << LOG2_PRECISION;
2682
    const int64_t mv_cost =
2683
        vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
2684
    best_sad = mv_dist + lambda * mv_cost;
2685
  }
2686
2687
  for (i = 0; i < search_range; i++) {
2688
    int best_site = -1;
2689
    const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) &
2690
                       ((best_full_mv->row + 1) < x->mv_limits.row_max) &
2691
                       ((best_full_mv->col - 1) > x->mv_limits.col_min) &
2692
                       ((best_full_mv->col + 1) < x->mv_limits.col_max);
2693
2694
    if (all_in) {
2695
      unsigned int sads[4];
2696
      const uint8_t *const positions[4] = { best_address - in_what->stride,
2697
                                            best_address - 1, best_address + 1,
2698
                                            best_address + in_what->stride };
2699
2700
      fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
2701
2702
      for (j = 0; j < 4; ++j) {
2703
        const MV mv = { best_full_mv->row + neighbors[j].row,
2704
                        best_full_mv->col + neighbors[j].col };
2705
        const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION;
2706
        const int64_t mv_cost =
2707
            vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
2708
        const int64_t thissad = mv_dist + lambda * mv_cost;
2709
        if (thissad < best_sad) {
2710
          best_sad = thissad;
2711
          best_site = j;
2712
        }
2713
      }
2714
    } else {
2715
      for (j = 0; j < 4; ++j) {
2716
        const MV mv = { best_full_mv->row + neighbors[j].row,
2717
                        best_full_mv->col + neighbors[j].col };
2718
2719
        if (is_mv_in(&x->mv_limits, &mv)) {
2720
          const int64_t mv_dist =
2721
              (int64_t)fn_ptr->sdf(what->buf, what->stride,
2722
                                   get_buf_from_mv(in_what, &mv),
2723
                                   in_what->stride)
2724
              << LOG2_PRECISION;
2725
          const int64_t mv_cost =
2726
              vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
2727
          const int64_t thissad = mv_dist + lambda * mv_cost;
2728
          if (thissad < best_sad) {
2729
            best_sad = thissad;
2730
            best_site = j;
2731
          }
2732
        }
2733
      }
2734
    }
2735
2736
    if (best_site == -1) {
2737
      break;
2738
    } else {
2739
      best_full_mv->row += neighbors[best_site].row;
2740
      best_full_mv->col += neighbors[best_site].col;
2741
      best_address = get_buf_from_mv(in_what, best_full_mv);
2742
    }
2743
  }
2744
2745
  return best_sad;
2746
}
2747
#endif  // CONFIG_NON_GREEDY_MV
2748
2749
int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2750
                            int search_range,
2751
                            const vp9_sad_fn_ptr_t *sad_fn_ptr,
2752
15.1M
                            const MV *center_mv) {
2753
15.1M
  const MACROBLOCKD *const xd = &x->e_mbd;
2754
15.1M
  const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
2755
15.1M
  const struct buf_2d *const what = &x->plane[0].src;
2756
15.1M
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2757
15.1M
  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2758
15.1M
  const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
2759
15.1M
  unsigned int best_sad =
2760
15.1M
      sad_fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
2761
15.1M
      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2762
15.1M
  int i, j;
2763
2764
19.5M
  for (i = 0; i < search_range; i++) {
2765
19.4M
    int best_site = -1;
2766
19.4M
    const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) &
2767
19.4M
                       ((ref_mv->row + 1) < x->mv_limits.row_max) &
2768
19.4M
                       ((ref_mv->col - 1) > x->mv_limits.col_min) &
2769
19.4M
                       ((ref_mv->col + 1) < x->mv_limits.col_max);
2770
2771
19.4M
    if (all_in) {
2772
18.4M
      unsigned int sads[4];
2773
18.4M
      const uint8_t *const positions[4] = { best_address - in_what->stride,
2774
18.4M
                                            best_address - 1, best_address + 1,
2775
18.4M
                                            best_address + in_what->stride };
2776
2777
18.4M
      sad_fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride,
2778
18.4M
                         sads);
2779
2780
92.3M
      for (j = 0; j < 4; ++j) {
2781
73.8M
        if (sads[j] < best_sad) {
2782
31.7M
          const MV mv = { ref_mv->row + neighbors[j].row,
2783
31.7M
                          ref_mv->col + neighbors[j].col };
2784
31.7M
          sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2785
31.7M
          if (sads[j] < best_sad) {
2786
4.36M
            best_sad = sads[j];
2787
4.36M
            best_site = j;
2788
4.36M
          }
2789
31.7M
        }
2790
73.8M
      }
2791
18.4M
    } else {
2792
4.97M
      for (j = 0; j < 4; ++j) {
2793
3.98M
        const MV mv = { ref_mv->row + neighbors[j].row,
2794
3.98M
                        ref_mv->col + neighbors[j].col };
2795
2796
3.98M
        if (is_mv_in(&x->mv_limits, &mv)) {
2797
3.14M
          unsigned int sad =
2798
3.14M
              sad_fn_ptr->sdf(what->buf, what->stride,
2799
3.14M
                              get_buf_from_mv(in_what, &mv), in_what->stride);
2800
3.14M
          if (sad < best_sad) {
2801
2.38M
            sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2802
2.38M
            if (sad < best_sad) {
2803
278k
              best_sad = sad;
2804
278k
              best_site = j;
2805
278k
            }
2806
2.38M
          }
2807
3.14M
        }
2808
3.98M
      }
2809
995k
    }
2810
2811
19.4M
    if (best_site == -1) {
2812
15.0M
      break;
2813
15.0M
    } else {
2814
4.43M
      ref_mv->row += neighbors[best_site].row;
2815
4.43M
      ref_mv->col += neighbors[best_site].col;
2816
4.43M
      best_address = get_buf_from_mv(in_what, ref_mv);
2817
4.43M
    }
2818
19.4M
  }
2819
2820
15.1M
  return best_sad;
2821
15.1M
}
2822
2823
// This function is called when we do joint motion search in comp_inter_inter
2824
// mode.
2825
int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
2826
                             int search_range,
2827
                             const vp9_variance_fn_ptr_t *fn_ptr,
2828
0
                             const MV *center_mv, const uint8_t *second_pred) {
2829
0
  const MV neighbors[8] = { { -1, 0 },  { 0, -1 }, { 0, 1 },  { 1, 0 },
2830
0
                            { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } };
2831
0
  const MACROBLOCKD *const xd = &x->e_mbd;
2832
0
  const struct buf_2d *const what = &x->plane[0].src;
2833
0
  const struct buf_2d *const in_what = &xd->plane[0].pre[0];
2834
0
  const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
2835
0
  unsigned int best_sad = INT_MAX;
2836
0
  int i, j;
2837
0
  clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
2838
0
           x->mv_limits.row_min, x->mv_limits.row_max);
2839
0
  best_sad =
2840
0
      fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
2841
0
                   in_what->stride, second_pred) +
2842
0
      mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
2843
2844
0
  for (i = 0; i < search_range; ++i) {
2845
0
    int best_site = -1;
2846
2847
0
    for (j = 0; j < 8; ++j) {
2848
0
      const MV mv = { ref_mv->row + neighbors[j].row,
2849
0
                      ref_mv->col + neighbors[j].col };
2850
2851
0
      if (is_mv_in(&x->mv_limits, &mv)) {
2852
0
        unsigned int sad =
2853
0
            fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
2854
0
                         in_what->stride, second_pred);
2855
0
        if (sad < best_sad) {
2856
0
          sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
2857
0
          if (sad < best_sad) {
2858
0
            best_sad = sad;
2859
0
            best_site = j;
2860
0
          }
2861
0
        }
2862
0
      }
2863
0
    }
2864
2865
0
    if (best_site == -1) {
2866
0
      break;
2867
0
    } else {
2868
0
      ref_mv->row += neighbors[best_site].row;
2869
0
      ref_mv->col += neighbors[best_site].col;
2870
0
    }
2871
0
  }
2872
0
  return best_sad;
2873
0
}
2874
2875
int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x,
2876
                          BLOCK_SIZE bsize, MV *mvp_full, int step_param,
2877
                          int search_method, int error_per_bit, int *cost_list,
2878
27.1M
                          const MV *ref_mv, MV *tmp_mv, int var_max, int rd) {
2879
27.1M
  const SPEED_FEATURES *const sf = &cpi->sf;
2880
27.1M
  const SEARCH_METHODS method = (SEARCH_METHODS)search_method;
2881
27.1M
  const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
2882
27.1M
  int var = 0;
2883
27.1M
  int run_exhaustive_search = 0;
2884
2885
27.1M
  if (cost_list) {
2886
0
    cost_list[0] = INT_MAX;
2887
0
    cost_list[1] = INT_MAX;
2888
0
    cost_list[2] = INT_MAX;
2889
0
    cost_list[3] = INT_MAX;
2890
0
    cost_list[4] = INT_MAX;
2891
0
  }
2892
2893
27.1M
  switch (method) {
2894
0
    case FAST_DIAMOND:
2895
0
      var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0,
2896
0
                            cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2897
0
      break;
2898
0
    case FAST_HEX:
2899
0
      var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0,
2900
0
                            cost_list, fn_ptr, 1, ref_mv, tmp_mv);
2901
0
      break;
2902
0
    case HEX:
2903
0
      var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2904
0
                       fn_ptr, 1, ref_mv, tmp_mv);
2905
0
      break;
2906
0
    case SQUARE:
2907
0
      var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2908
0
                          fn_ptr, 1, ref_mv, tmp_mv);
2909
0
      break;
2910
0
    case BIGDIA:
2911
0
      var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
2912
0
                          fn_ptr, 1, ref_mv, tmp_mv);
2913
0
      break;
2914
27.1M
    case NSTEP:
2915
27.1M
    case MESH:
2916
27.1M
      var = full_pixel_diamond(
2917
27.1M
          cpi, x, bsize, mvp_full, step_param, error_per_bit,
2918
27.1M
          MAX_MVSEARCH_STEPS - 1 - step_param, 1,
2919
27.1M
          cpi->sf.mv.use_downsampled_sad, cost_list, fn_ptr, ref_mv, tmp_mv);
2920
27.1M
      break;
2921
0
    default: assert(0 && "Unknown search method");
2922
27.1M
  }
2923
2924
27.1M
  if (method == NSTEP) {
2925
27.1M
    if (sf->exhaustive_searches_thresh < INT_MAX &&
2926
27.1M
        !cpi->rc.is_src_frame_alt_ref) {
2927
0
      const int64_t exhaustive_thr =
2928
0
          get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize);
2929
0
      if (var > exhaustive_thr) {
2930
0
        run_exhaustive_search = 1;
2931
0
      }
2932
0
    }
2933
27.1M
  } else if (method == MESH) {
2934
0
    run_exhaustive_search = 1;
2935
0
  }
2936
2937
27.1M
  if (run_exhaustive_search) {
2938
0
    int var_ex;
2939
0
    MV tmp_mv_ex;
2940
0
    var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list,
2941
0
                                   fn_ptr, ref_mv, &tmp_mv_ex);
2942
0
    if (var_ex < var) {
2943
0
      var = var_ex;
2944
0
      *tmp_mv = tmp_mv_ex;
2945
0
    }
2946
0
  }
2947
2948
27.1M
  if (method != NSTEP && method != MESH && rd && var < var_max)
2949
0
    var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
2950
2951
27.1M
  return var;
2952
27.1M
}
2953
2954
// Note(yunqingwang): The following 2 functions are only used in the motion
2955
// vector unit test, which return extreme motion vectors allowed by the MV
2956
// limits.
2957
#define COMMON_MV_TEST \
2958
0
  SETUP_SUBPEL_SEARCH; \
2959
0
                       \
2960
0
  (void)error_per_bit; \
2961
0
  (void)vfp;           \
2962
0
  (void)z;             \
2963
0
  (void)src_stride;    \
2964
0
  (void)y;             \
2965
0
  (void)y_stride;      \
2966
0
  (void)second_pred;   \
2967
0
  (void)w;             \
2968
0
  (void)h;             \
2969
0
  (void)offset;        \
2970
0
  (void)mvjcost;       \
2971
0
  (void)mvcost;        \
2972
0
  (void)sse1;          \
2973
0
  (void)distortion;    \
2974
0
                       \
2975
0
  (void)halfiters;     \
2976
0
  (void)quarteriters;  \
2977
0
  (void)eighthiters;   \
2978
0
  (void)whichdir;      \
2979
0
  (void)allow_hp;      \
2980
0
  (void)forced_stop;   \
2981
0
  (void)hstep;         \
2982
0
  (void)rr;            \
2983
0
  (void)rc;            \
2984
0
                       \
2985
0
  (void)tr;            \
2986
0
  (void)tc;            \
2987
0
  (void)sse;           \
2988
0
  (void)thismse;       \
2989
0
  (void)cost_list;     \
2990
0
  (void)use_accurate_subpel_search
2991
2992
// Return the maximum MV.
2993
uint32_t vp9_return_max_sub_pixel_mv(
2994
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
2995
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
2996
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
2997
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
2998
0
    int h, int use_accurate_subpel_search) {
2999
0
  COMMON_MV_TEST;
3000
3001
0
  (void)minr;
3002
0
  (void)minc;
3003
3004
0
  bestmv->row = maxr;
3005
0
  bestmv->col = maxc;
3006
0
  besterr = 0;
3007
3008
  // In the sub-pel motion search, if hp is not used, then the last bit of mv
3009
  // has to be 0.
3010
0
  lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
3011
3012
0
  return besterr;
3013
0
}
3014
// Return the minimum MV.
3015
uint32_t vp9_return_min_sub_pixel_mv(
3016
    const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
3017
    int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
3018
    int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
3019
    uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
3020
0
    int h, int use_accurate_subpel_search) {
3021
0
  COMMON_MV_TEST;
3022
3023
0
  (void)maxr;
3024
0
  (void)maxc;
3025
3026
0
  bestmv->row = minr;
3027
0
  bestmv->col = minc;
3028
0
  besterr = 0;
3029
3030
  // In the sub-pel motion search, if hp is not used, then the last bit of mv
3031
  // has to be 0.
3032
0
  lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
3033
3034
0
  return besterr;
3035
0
}