Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/mcomp.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10
 */
11
12
#include <limits.h>
13
#include <math.h>
14
#include <stdio.h>
15
#include "mcomp.h"
16
#include "mv.h"
17
#include "av1_common.h"
18
#include "coding_unit.h"
19
#include "block_structures.h"
20
#include "av1me.h"
21
#include "aom_dsp_rtcd.h"
22
#include "rd_cost.h"
23
// ============================================================================
24
//  Cost of motion vectors
25
// ============================================================================
26
// TODO(any): Adaptively adjust the regularization strength based on image size
27
// and motion activity instead of using hard-coded values. It seems like we
28
// roughly half the lambda for each increase in resolution
29
// These are multiplier used to perform regularization in motion compensation
30
// when x->mv_cost_type is set to MV_COST_L1.
31
// LOWRES
32
0
#define SSE_LAMBDA_LOWRES 2 // Used by mv_cost_err_fn
33
// MIDRES
34
0
#define SSE_LAMBDA_MIDRES 0 // Used by mv_cost_err_fn
35
// HDRES
36
0
#define SSE_LAMBDA_HDRES 1 // Used by mv_cost_err_fn
37
38
// Returns the cost of using the current mv during the motion search. This is
39
// used when var is used as the error metric.
40
#define PIXEL_TRANSFORM_ERROR_SCALE 4
41
42
static INLINE int svt_mv_err_cost(const Mv* mv, const Mv* ref_mv, const int* mvjcost, const int* const mvcost[2],
43
0
                                  int error_per_bit, MV_COST_TYPE mv_cost_type) {
44
0
    const Mv diff     = {{mv->x - ref_mv->x, mv->y - ref_mv->y}};
45
0
    const Mv abs_diff = {{abs(diff.x), abs(diff.y)}};
46
47
0
    switch (mv_cost_type) {
48
0
    case MV_COST_ENTROPY:
49
0
        if (mvcost) {
50
0
            return (int)ROUND_POWER_OF_TWO_64(
51
0
                (int64_t)svt_mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
52
0
                RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT + PIXEL_TRANSFORM_ERROR_SCALE);
53
0
        }
54
0
        return 0;
55
0
    case MV_COST_L1_LOWRES:
56
0
        return (SSE_LAMBDA_LOWRES * (abs_diff.y + abs_diff.x)) >> 3;
57
0
    case MV_COST_L1_MIDRES:
58
0
        return (SSE_LAMBDA_MIDRES * (abs_diff.y + abs_diff.x)) >> 3;
59
0
    case MV_COST_L1_HDRES:
60
0
        return (SSE_LAMBDA_HDRES * (abs_diff.y + abs_diff.x)) >> 3;
61
0
    case MV_COST_OPT: {
62
0
        return (int)ROUND_POWER_OF_TWO_64(
63
0
            (int64_t)((abs_diff.y + abs_diff.x) << 8) * error_per_bit,
64
0
            RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT + PIXEL_TRANSFORM_ERROR_SCALE);
65
0
    }
66
0
    case MV_COST_NONE:
67
0
        return 0;
68
0
    default:
69
0
        assert(0 && "Invalid rd_cost_type");
70
0
        return 0;
71
0
    }
72
0
}
73
74
0
static INLINE int svt_mv_err_cost_(const Mv* mv, const svt_mv_cost_param* mv_cost_params) {
75
0
    return svt_mv_err_cost(mv,
76
0
                           mv_cost_params->ref_mv,
77
0
                           mv_cost_params->mvjcost,
78
0
                           mv_cost_params->mvcost,
79
0
                           mv_cost_params->error_per_bit,
80
0
                           mv_cost_params->mv_cost_type);
81
0
}
82
83
// =============================================================================
84
//  Subpixel Motion Search: Translational
85
// =============================================================================
86
0
#define INIT_SUBPEL_STEP_SIZE (4)
87
88
/*
89
 * To avoid the penalty for crossing cache-line read, preload the reference
90
 * area in a small buffer, which is aligned to make sure there won't be crossing
91
 * cache-line read while reading from this buffer. This reduced the cpu
92
 * cycles spent on reading ref data in sub-pixel filter functions.
93
 * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x
94
 * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we
95
 * could reduce the area.
96
 */
97
98
// Returns the subpel offset used by various subpel variance functions [m]sv[a]f
99
0
static INLINE int svt_get_subpel_part(int x) {
100
0
    return x & 7;
101
0
}
102
103
// Gets the address of the ref buffer at subpel location (r, c), rounded to the
104
// nearest fullpel precision toward - \infty
105
106
0
static INLINE const uint8_t* svt_get_buf_from_mv(const struct svt_buf_2d* buf, const Mv mv) {
107
0
    const int offset = (mv.y >> 3) * buf->stride + (mv.x >> 3);
108
0
    return &buf->buf[offset];
109
0
}
110
111
// Calculates the variance of prediction residue.
112
static int svt_upsampled_pref_error(MacroBlockD* xd, const struct AV1Common* const cm, const Mv* this_mv,
113
0
                                    const SUBPEL_SEARCH_VAR_PARAMS* var_params, unsigned int* sse) {
114
0
    const AomVarianceFnPtr*  vfp                = var_params->vfp;
115
0
    const SUBPEL_SEARCH_TYPE subpel_search_type = var_params->subpel_search_type;
116
117
0
    const MSBuffers* ms_buffers  = &var_params->ms_buffers;
118
0
    const uint8_t*   src         = ms_buffers->src->buf;
119
0
    const uint8_t*   ref         = svt_get_buf_from_mv(ms_buffers->ref, *this_mv);
120
0
    const int        src_stride  = ms_buffers->src->stride;
121
0
    const int        ref_stride  = ms_buffers->ref->stride;
122
0
    const int        w           = var_params->w;
123
0
    const int        h           = var_params->h;
124
0
    const int        mi_row      = xd->mi_row;
125
0
    const int        mi_col      = xd->mi_col;
126
0
    const int        subpel_x_q3 = svt_get_subpel_part(this_mv->x);
127
0
    const int        subpel_y_q3 = svt_get_subpel_part(this_mv->y);
128
129
0
    unsigned int besterr;
130
0
    {
131
0
        DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
132
133
0
        {
134
0
            svt_aom_upsampled_pred(xd,
135
0
                                   cm,
136
0
                                   mi_row,
137
0
                                   mi_col,
138
0
                                   this_mv,
139
0
                                   pred,
140
0
                                   w,
141
0
                                   h,
142
0
                                   subpel_x_q3,
143
0
                                   subpel_y_q3,
144
0
                                   ref,
145
0
                                   ref_stride,
146
0
                                   subpel_search_type);
147
0
        }
148
0
        besterr = vfp->vf(pred, w, src, src_stride, sse);
149
0
    }
150
151
0
    return besterr;
152
0
}
153
154
// Estimates the variance of prediction residue using bilinear filter for fast
155
// search.
156
static INLINE int svt_estimated_pref_error(const Mv* this_mv, const SUBPEL_SEARCH_VAR_PARAMS* var_params,
157
0
                                           unsigned int* sse) {
158
0
    const AomVarianceFnPtr* vfp = var_params->vfp;
159
160
0
    const MSBuffers* ms_buffers = &var_params->ms_buffers;
161
0
    const uint8_t*   src        = ms_buffers->src->buf;
162
0
    const uint8_t*   ref        = svt_get_buf_from_mv(ms_buffers->ref, *this_mv);
163
0
    const int        src_stride = ms_buffers->src->stride;
164
0
    const int        ref_stride = ms_buffers->ref->stride;
165
166
0
    const int subpel_x_q3 = svt_get_subpel_part(this_mv->x);
167
0
    const int subpel_y_q3 = svt_get_subpel_part(this_mv->y);
168
169
    // TODO: port other variance-related functions
170
0
    return vfp->svf(ref, ref_stride, subpel_x_q3, subpel_y_q3, src, src_stride, sse);
171
0
}
172
173
// Estimates whether this_mv is better than best_mv. This function incorporates
174
// both prediction error and residue into account. It is suffixed "fast" because
175
// it uses bilinear filter to estimate the prediction.
176
static INLINE unsigned int svt_check_better_fast(MacroBlockD* xd, const struct AV1Common* const cm, const Mv* this_mv,
177
                                                 Mv* best_mv, const SubpelMvLimits* mv_limits,
178
                                                 const SUBPEL_SEARCH_VAR_PARAMS* var_params,
179
                                                 const svt_mv_cost_param* mv_cost_params, unsigned int* besterr,
180
                                                 unsigned int* sse1, int* distortion, int* has_better_mv,
181
0
                                                 int is_scaled) {
182
0
    unsigned int cost;
183
0
    if (svt_av1_is_subpelmv_in_range(mv_limits, *this_mv)) {
184
0
        unsigned int sse;
185
0
        int          thismse;
186
0
        cost = svt_mv_err_cost_(this_mv, mv_cost_params);
187
0
        if (mv_cost_params->mv_cost_type == MV_COST_OPT) {
188
0
            int64_t bestcost = *distortion + cost;
189
0
            if (bestcost > (((int64_t)*besterr * (int64_t)mv_cost_params->early_exit_th) / 1000)) {
190
0
                return (uint32_t)bestcost;
191
0
            }
192
0
        }
193
        // TODO: add estimated func
194
0
        if (is_scaled) {
195
0
            thismse = svt_upsampled_pref_error(xd, cm, this_mv, var_params, &sse);
196
0
        } else {
197
0
            thismse = svt_estimated_pref_error(this_mv, var_params, &sse);
198
0
        }
199
0
        cost += thismse;
200
0
        int weight = 100;
201
0
        if (var_params->bias_fp && (*best_mv).x % 8 == 0 && (*best_mv).y % 8 == 0) {
202
0
            weight = var_params->bias_fp;
203
0
        }
204
0
        if ((((uint64_t)cost * weight) / 100) < *besterr) {
205
0
            *besterr    = cost;
206
0
            *best_mv    = *this_mv;
207
0
            *distortion = thismse;
208
0
            *sse1       = sse;
209
0
            *has_better_mv |= 1;
210
0
        }
211
0
    } else {
212
0
        cost = INT_MAX;
213
0
    }
214
0
    return cost;
215
0
}
216
217
// Checks whether this_mv is better than best_mv. This function incorporates
218
// both prediction error and residue into account.
219
static AOM_FORCE_INLINE unsigned int svt_check_better(MacroBlockD* xd, const struct AV1Common* const cm,
220
                                                      const Mv* this_mv, Mv* best_mv, const SubpelMvLimits* mv_limits,
221
                                                      const SUBPEL_SEARCH_VAR_PARAMS* var_params,
222
                                                      const svt_mv_cost_param* mv_cost_params, unsigned int* besterr,
223
0
                                                      unsigned int* sse1, int* distortion, int* is_better) {
224
0
    unsigned int cost;
225
0
    if (svt_av1_is_subpelmv_in_range(mv_limits, *this_mv)) {
226
0
        unsigned int sse;
227
0
        int          thismse;
228
0
        thismse = svt_upsampled_pref_error(xd, cm, this_mv, var_params, &sse);
229
0
        cost    = svt_mv_err_cost_(this_mv, mv_cost_params);
230
0
        cost += thismse;
231
0
        int weight = 100;
232
0
        if (var_params->bias_fp && (*best_mv).x % 8 == 0 && (*best_mv).y % 8 == 0) {
233
0
            weight = var_params->bias_fp;
234
0
        }
235
0
        if ((((uint64_t)cost * weight) / 100) < *besterr) {
236
0
            *besterr    = cost;
237
0
            *best_mv    = *this_mv;
238
0
            *distortion = thismse;
239
0
            *sse1       = sse;
240
0
            *is_better |= 1;
241
0
        }
242
0
    } else {
243
0
        cost = INT_MAX;
244
0
    }
245
0
    return cost;
246
0
}
247
248
static INLINE Mv get_best_diag_step(int step_size, unsigned int left_cost, unsigned int right_cost,
249
                                    unsigned int up_cost, unsigned int down_cost) {
250
    const Mv diag_step = {
251
        {left_cost <= right_cost ? -step_size : step_size, up_cost <= down_cost ? -step_size : step_size}};
252
253
    return diag_step;
254
}
255
256
static AOM_FORCE_INLINE Mv svt_first_level_check(MacroBlockD* xd, const struct AV1Common* const cm, const Mv this_mv,
257
                                                 Mv* best_mv, const int hstep, const SubpelMvLimits* mv_limits,
258
                                                 const SUBPEL_SEARCH_VAR_PARAMS* var_params,
259
                                                 const svt_mv_cost_param* mv_cost_params, unsigned int* besterr,
260
0
                                                 unsigned int* sse1, int* distortion) {
261
0
    int      dummy     = 0;
262
0
    const Mv left_mv   = {{this_mv.x - hstep, this_mv.y}};
263
0
    const Mv right_mv  = {{this_mv.x + hstep, this_mv.y}};
264
0
    const Mv top_mv    = {{this_mv.x, this_mv.y - hstep}};
265
0
    const Mv bottom_mv = {{this_mv.x, this_mv.y + hstep}};
266
267
0
    const unsigned int left = svt_check_better(
268
0
        xd, cm, &left_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy);
269
0
    const unsigned int right = svt_check_better(
270
0
        xd, cm, &right_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy);
271
0
    const unsigned int up = svt_check_better(
272
0
        xd, cm, &top_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy);
273
0
    const unsigned int down = svt_check_better(
274
0
        xd, cm, &bottom_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy);
275
276
0
    const Mv diag_step = get_best_diag_step(hstep, left, right, up, down);
277
0
    const Mv diag_mv   = {{this_mv.x + diag_step.x, this_mv.y + diag_step.y}};
278
279
    // Check the diagonal direction with the best mv
280
0
    svt_check_better(
281
0
        xd, cm, &diag_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy);
282
283
0
    return diag_step;
284
0
}
285
286
// A newer version of second level check that gives better quality.
287
// TODO(chiyotsai@google.com): evaluate this on subpel_search_types different
288
// from av1_find_best_sub_pixel_tree
289
static AOM_FORCE_INLINE void svt_second_level_check_v2(MacroBlockD* xd, const struct AV1Common* const cm,
290
                                                       const Mv this_mv, Mv diag_step, Mv* best_mv,
291
                                                       const SubpelMvLimits*           mv_limits,
292
                                                       const SUBPEL_SEARCH_VAR_PARAMS* var_params,
293
                                                       const svt_mv_cost_param* mv_cost_params, unsigned int* besterr,
294
0
                                                       unsigned int* sse1, int* distortion, int is_scaled) {
295
0
    assert(best_mv->y == this_mv.y + diag_step.y || best_mv->x == this_mv.x + diag_step.x);
296
0
    if (CHECK_MV_EQUAL(this_mv, *best_mv)) {
297
0
        return;
298
0
    } else if (this_mv.y == best_mv->y) {
299
        // Search away from diagonal step since diagonal search did not provide any
300
        // improvement
301
0
        diag_step.y *= -1;
302
0
    } else if (this_mv.x == best_mv->x) {
303
0
        diag_step.x *= -1;
304
0
    }
305
306
0
    const Mv row_bias_mv   = {{best_mv->x, best_mv->y + diag_step.y}};
307
0
    const Mv col_bias_mv   = {{best_mv->x + diag_step.x, best_mv->y}};
308
0
    const Mv diag_bias_mv  = {{best_mv->x + diag_step.x, best_mv->y + diag_step.y}};
309
0
    int      has_better_mv = 0;
310
0
    svt_check_better(xd,
311
0
                     cm,
312
0
                     &row_bias_mv,
313
0
                     best_mv,
314
0
                     mv_limits,
315
0
                     var_params,
316
0
                     mv_cost_params,
317
0
                     besterr,
318
0
                     sse1,
319
0
                     distortion,
320
0
                     &has_better_mv);
321
0
    svt_check_better(xd,
322
0
                     cm,
323
0
                     &col_bias_mv,
324
0
                     best_mv,
325
0
                     mv_limits,
326
0
                     var_params,
327
0
                     mv_cost_params,
328
0
                     besterr,
329
0
                     sse1,
330
0
                     distortion,
331
0
                     &has_better_mv);
332
333
    // Do an additional search if the second iteration gives a better mv
334
0
    if (has_better_mv) {
335
0
        svt_check_better(xd,
336
0
                         cm,
337
0
                         &diag_bias_mv,
338
0
                         best_mv,
339
0
                         mv_limits,
340
0
                         var_params,
341
0
                         mv_cost_params,
342
0
                         besterr,
343
0
                         sse1,
344
0
                         distortion,
345
0
                         &has_better_mv);
346
0
    }
347
0
    (void)is_scaled;
348
0
}
349
350
// Gets the error at the beginning when the mv has fullpel precision
351
static unsigned int svt_upsampled_setup_center_error(const Mv* bestmv, const SUBPEL_SEARCH_VAR_PARAMS* var_params,
352
                                                     const svt_mv_cost_param* mv_cost_params,
353
0
                                                     unsigned int*            distortion) {
354
0
    const MSBuffers* ms_buffers = &var_params->ms_buffers;
355
0
    const uint8_t*   ref        = svt_get_buf_from_mv(ms_buffers->ref, *bestmv);
356
0
    *distortion                 = var_params->vfp->vf(
357
0
        ref, ms_buffers->ref->stride, ms_buffers->src->buf, ms_buffers->src->stride, distortion);
358
0
    return *distortion + svt_mv_err_cost_(bestmv, mv_cost_params);
359
0
}
360
361
// Searches the four cardinal direction for a better mv, then follows up with a
362
// search in the best quadrant. This uses bilinear filter to speed up the
363
// calculation.
364
static AOM_FORCE_INLINE Mv first_level_check_fast(MacroBlockD* xd, const struct AV1Common* const cm, const Mv this_mv,
365
                                                  Mv* best_mv, int hstep, const SubpelMvLimits* mv_limits,
366
                                                  const SUBPEL_SEARCH_VAR_PARAMS* var_params,
367
                                                  const svt_mv_cost_param* mv_cost_params, unsigned int* besterr,
368
                                                  unsigned int orgerr, unsigned int* sse1, int* distortion,
369
                                                  int is_scaled) {
370
    // Check the four cardinal directions
371
    const Mv           left_mv = {{this_mv.x - hstep, this_mv.y}};
372
    int                dummy   = 0;
373
    const unsigned int left    = svt_check_better_fast(
374
        xd, cm, &left_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy, is_scaled);
375
376
    const Mv           right_mv = {{this_mv.x + hstep, this_mv.y}};
377
    const unsigned int right    = svt_check_better_fast(xd,
378
                                                     cm,
379
                                                     &right_mv,
380
                                                     best_mv,
381
                                                     mv_limits,
382
                                                     var_params,
383
                                                     mv_cost_params,
384
                                                     besterr,
385
                                                     sse1,
386
                                                     distortion,
387
                                                     &dummy,
388
                                                     is_scaled);
389
390
    const Mv           top_mv = {{this_mv.x, this_mv.y - hstep}};
391
    const unsigned int up     = svt_check_better_fast(
392
        xd, cm, &top_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy, is_scaled);
393
394
    const Mv           bottom_mv = {{this_mv.x, this_mv.y + hstep}};
395
    const unsigned int down      = svt_check_better_fast(xd,
396
                                                    cm,
397
                                                    &bottom_mv,
398
                                                    best_mv,
399
                                                    mv_limits,
400
                                                    var_params,
401
                                                    mv_cost_params,
402
                                                    besterr,
403
                                                    sse1,
404
                                                    distortion,
405
                                                    &dummy,
406
                                                    is_scaled);
407
408
    const Mv diag_step = get_best_diag_step(hstep, left, right, up, down);
409
    const Mv diag_mv   = {{this_mv.x + diag_step.x, this_mv.y + diag_step.y}};
410
    if (*besterr >= orgerr) {
411
        return diag_step;
412
    }
413
    // Check the diagonal direction with the best mv
414
    svt_check_better_fast(
415
        xd, cm, &diag_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy, is_scaled);
416
417
    return diag_step;
418
}
419
420
// Performs a following up search after first_level_check_fast is called. This
421
// performs two extra chess pattern searches in the best quadrant.
422
static AOM_FORCE_INLINE void second_level_check_fast(MacroBlockD* xd, const struct AV1Common* const cm,
423
                                                     const Mv this_mv, const Mv diag_step, Mv* best_mv, int hstep,
424
                                                     const SubpelMvLimits*           mv_limits,
425
                                                     const SUBPEL_SEARCH_VAR_PARAMS* var_params,
426
                                                     const svt_mv_cost_param* mv_cost_params, unsigned int* besterr,
427
                                                     unsigned int* sse1, int* distortion, int is_scaled) {
428
    assert(diag_step.y == hstep || diag_step.y == -hstep);
429
    assert(diag_step.x == hstep || diag_step.x == -hstep);
430
    const int tr    = this_mv.y;
431
    const int tc    = this_mv.x;
432
    const int br    = best_mv->y;
433
    const int bc    = best_mv->x;
434
    int       dummy = 0;
435
    if (tr != br && tc != bc) {
436
        assert(diag_step.x == bc - tc);
437
        assert(diag_step.y == br - tr);
438
        const Mv chess_mv_1 = {{bc + diag_step.x, br}};
439
        const Mv chess_mv_2 = {{bc, br + diag_step.y}};
440
        svt_check_better_fast(xd,
441
                              cm,
442
                              &chess_mv_1,
443
                              best_mv,
444
                              mv_limits,
445
                              var_params,
446
                              mv_cost_params,
447
                              besterr,
448
                              sse1,
449
                              distortion,
450
                              &dummy,
451
                              is_scaled);
452
453
        svt_check_better_fast(xd,
454
                              cm,
455
                              &chess_mv_2,
456
                              best_mv,
457
                              mv_limits,
458
                              var_params,
459
                              mv_cost_params,
460
                              besterr,
461
                              sse1,
462
                              distortion,
463
                              &dummy,
464
                              is_scaled);
465
    } else if (tr == br && tc != bc) {
466
        assert(diag_step.x == bc - tc);
467
        // Continue searching in the best direction
468
        const Mv bottom_long_mv = {{bc + diag_step.x, br + hstep}};
469
        const Mv top_long_mv    = {{bc + diag_step.x, br - hstep}};
470
        svt_check_better_fast(xd,
471
                              cm,
472
                              &bottom_long_mv,
473
                              best_mv,
474
                              mv_limits,
475
                              var_params,
476
                              mv_cost_params,
477
                              besterr,
478
                              sse1,
479
                              distortion,
480
                              &dummy,
481
                              is_scaled);
482
        svt_check_better_fast(xd,
483
                              cm,
484
                              &top_long_mv,
485
                              best_mv,
486
                              mv_limits,
487
                              var_params,
488
                              mv_cost_params,
489
                              besterr,
490
                              sse1,
491
                              distortion,
492
                              &dummy,
493
                              is_scaled);
494
495
        // Search in the direction opposite of the best quadrant
496
        const Mv rev_mv = {{bc, br - diag_step.y}};
497
        svt_check_better_fast(xd,
498
                              cm,
499
                              &rev_mv,
500
                              best_mv,
501
                              mv_limits,
502
                              var_params,
503
                              mv_cost_params,
504
                              besterr,
505
                              sse1,
506
                              distortion,
507
                              &dummy,
508
                              is_scaled);
509
    } else if (tr != br && tc == bc) {
510
        assert(diag_step.y == br - tr);
511
        // Continue searching in the best direction
512
        const Mv right_long_mv = {{bc + hstep, br + diag_step.y}};
513
        const Mv left_long_mv  = {{bc - hstep, br + diag_step.y}};
514
        svt_check_better_fast(xd,
515
                              cm,
516
                              &right_long_mv,
517
                              best_mv,
518
                              mv_limits,
519
                              var_params,
520
                              mv_cost_params,
521
                              besterr,
522
                              sse1,
523
                              distortion,
524
                              &dummy,
525
                              is_scaled);
526
        svt_check_better_fast(xd,
527
                              cm,
528
                              &left_long_mv,
529
                              best_mv,
530
                              mv_limits,
531
                              var_params,
532
                              mv_cost_params,
533
                              besterr,
534
                              sse1,
535
                              distortion,
536
                              &dummy,
537
                              is_scaled);
538
539
        // Search in the direction opposite of the best quadrant
540
        const Mv rev_mv = {{bc - diag_step.x, br}};
541
        svt_check_better_fast(xd,
542
                              cm,
543
                              &rev_mv,
544
                              best_mv,
545
                              mv_limits,
546
                              var_params,
547
                              mv_cost_params,
548
                              besterr,
549
                              sse1,
550
                              distortion,
551
                              &dummy,
552
                              is_scaled);
553
    }
554
}
555
556
// Combines first level check and second level check when applicable. This first
557
// searches the four cardinal directions, and perform several
558
// diagonal/chess-pattern searches in the best quadrant.
559
static AOM_FORCE_INLINE void two_level_checks_fast(MacroBlockD* xd, const struct AV1Common* const cm, const Mv this_mv,
560
                                                   Mv* best_mv, int hstep, const SubpelMvLimits* mv_limits,
561
                                                   const SUBPEL_SEARCH_VAR_PARAMS* var_params,
562
                                                   const svt_mv_cost_param* mv_cost_params, unsigned int* besterr,
563
                                                   unsigned int orgerr, unsigned int* sse1, int* distortion, int iters,
564
                                                   int is_scaled) {
565
    const Mv diag_step = first_level_check_fast(xd,
566
                                                cm,
567
                                                this_mv,
568
                                                best_mv,
569
                                                hstep,
570
                                                mv_limits,
571
                                                var_params,
572
                                                mv_cost_params,
573
                                                besterr,
574
                                                orgerr,
575
                                                sse1,
576
                                                distortion,
577
                                                is_scaled);
578
    if (*besterr < orgerr) {
579
        if (iters > 1) {
580
            second_level_check_fast(xd,
581
                                    cm,
582
                                    this_mv,
583
                                    diag_step,
584
                                    best_mv,
585
                                    hstep,
586
                                    mv_limits,
587
                                    var_params,
588
                                    mv_cost_params,
589
                                    besterr,
590
                                    sse1,
591
                                    distortion,
592
                                    is_scaled);
593
        }
594
    }
595
}
596
597
extern const uint8_t svt_aom_eb_av1_var_offs[MAX_SB_SIZE];
598
599
int svt_av1_find_best_sub_pixel_tree_pruned(void* ictx, MacroBlockD* xd, const struct AV1Common* const cm,
600
                                            SUBPEL_MOTION_SEARCH_PARAMS* ms_params, Mv start_mv, Mv* bestmv,
601
                                            int* distortion, unsigned int* sse1, int qp, BlockSize bsize,
602
0
                                            uint8_t early_neigh_check_exit) {
603
0
    (void)ictx;
604
0
    (void)cm;
605
0
    const int                       allow_hp       = ms_params->allow_hp;
606
0
    const int                       forced_stop    = ms_params->forced_stop;
607
0
    const int                       iters_per_step = ms_params->iters_per_step;
608
0
    const SubpelMvLimits*           mv_limits      = &ms_params->mv_limits;
609
0
    const svt_mv_cost_param*        mv_cost_params = &ms_params->mv_cost_params;
610
0
    const SUBPEL_SEARCH_VAR_PARAMS* var_params     = &ms_params->var_params;
611
0
    int                             hstep          = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel
612
0
    unsigned int                    besterr;
613
0
    unsigned int                    org_error;
614
0
    *bestmv = start_mv;
615
616
0
    const int is_scaled = 0;
617
0
    besterr = svt_upsampled_setup_center_error(bestmv, var_params, mv_cost_params, (unsigned int*)distortion);
618
619
0
    if (ictx != NULL && ms_params->search_stage == SPEL_ME) {
620
0
        ModeDecisionContext* ctx                                 = (ModeDecisionContext*)ictx;
621
0
        ctx->fp_me_dist[ms_params->list_idx][ms_params->ref_idx] = besterr;
622
0
    }
623
624
0
    if (early_neigh_check_exit) {
625
0
        return besterr;
626
0
    }
627
0
    const uint64_t th_normalizer = (uint64_t)(((var_params->w * var_params->h) << 5) *
628
0
                                              (uint64_t)ms_params->abs_th_mult);
629
0
    if ((uint64_t)qp * besterr < th_normalizer) {
630
0
        return besterr;
631
0
    }
632
    // How many steps to take. A round of 0 means fullpel search only, 1 means
633
    // half-pel, and so on.
634
0
    const int round = AOMMIN(FULL_PEL - forced_stop, 3 - !allow_hp);
635
636
    // If forced_stop is FULL_PEL, return.
637
0
    if (!round) {
638
0
        return besterr;
639
0
    }
640
    // Exit subpel search if the variance of the full-pel predicted samples is low (i.e. where likely interpolation will not modify the integer samples)
641
0
    if (ms_params->pred_variance_th) {
642
0
        const MSBuffers*   ms_buffers = &var_params->ms_buffers;
643
0
        const uint8_t*     ref        = svt_get_buf_from_mv(ms_buffers->ref, *bestmv);
644
0
        unsigned int       sse;
645
0
        const unsigned int var = var_params->vfp->vf(ref, ms_buffers->ref->stride, svt_aom_eb_av1_var_offs, 0, &sse);
646
0
        int                block_var = ROUND_POWER_OF_TWO(var, eb_num_pels_log2_lookup[bsize]);
647
648
0
        if (block_var < ms_params->pred_variance_th) {
649
0
            return besterr;
650
0
        }
651
0
    }
652
0
    if (ms_params->skip_diag_refinement >= 4) {
653
0
        org_error = 0;
654
0
    } else {
655
0
        unsigned int demo = ms_params->skip_diag_refinement >= 2
656
0
            ? ((var_params->w >= 64 || var_params->h >= 64) ? 2 : 1)
657
0
            : 1;
658
0
        org_error         = ms_params->skip_diag_refinement ? besterr / demo : INT_MAX;
659
0
    }
660
0
    for (int iter = 0; iter < round; ++iter) {
661
0
        unsigned int prev_besterr = besterr;
662
0
        two_level_checks_fast(xd,
663
0
                              cm,
664
0
                              start_mv,
665
0
                              bestmv,
666
0
                              hstep,
667
0
                              mv_limits,
668
0
                              var_params,
669
0
                              mv_cost_params,
670
0
                              &besterr,
671
0
                              org_error,
672
0
                              sse1,
673
0
                              distortion,
674
0
                              iters_per_step,
675
0
                              is_scaled);
676
0
        hstep >>= 1;
677
0
        start_mv = *bestmv;
678
0
        if (ms_params->skip_diag_refinement && iter < QUARTER_PEL) {
679
0
            org_error = MIN(org_error, besterr);
680
0
        }
681
0
        int32_t deviation = (((int64_t)MAX(besterr, 1) - (int64_t)MAX(prev_besterr, 1)) * 100) /
682
0
            (int64_t)MAX(prev_besterr, 1);
683
0
        if (deviation >= ms_params->round_dev_th) {
684
0
            return besterr;
685
0
        }
686
0
    }
687
0
    return besterr;
688
0
}
689
690
int svt_av1_find_best_sub_pixel_tree(void* ictx, MacroBlockD* xd, const struct AV1Common* const cm,
691
                                     SUBPEL_MOTION_SEARCH_PARAMS* ms_params, Mv start_mv, Mv* bestmv, int* distortion,
692
0
                                     unsigned int* sse1, int qp, BlockSize bsize, uint8_t early_neigh_check_exit) {
693
0
    ModeDecisionContext* ctx            = (ModeDecisionContext*)ictx;
694
0
    const int            allow_hp       = ms_params->allow_hp;
695
0
    const int            forced_stop    = ms_params->forced_stop;
696
0
    const int            iters_per_step = ms_params->iters_per_step;
697
698
0
    svt_mv_cost_param*              mv_cost_params = &ms_params->mv_cost_params;
699
0
    const SUBPEL_SEARCH_VAR_PARAMS* var_params     = &ms_params->var_params;
700
0
    const SubpelMvLimits*           mv_limits      = &ms_params->mv_limits;
701
702
    // How many steps to take. A round of 0 means fullpel search only, 1 means
703
    // half-pel, and so on.
704
0
    int round = AOMMIN(FULL_PEL - forced_stop, 3 - !allow_hp);
705
0
    int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel
706
707
0
    unsigned int besterr;
708
709
0
    *bestmv             = start_mv;
710
0
    const int is_scaled = 0;
711
0
    besterr = svt_upsampled_setup_center_error(bestmv, var_params, mv_cost_params, (unsigned int*)distortion);
712
0
    if (ctx != NULL && ms_params->search_stage == SPEL_ME) {
713
0
        ctx->fp_me_dist[ms_params->list_idx][ms_params->ref_idx] = besterr;
714
0
        if (ctx->pd_pass == PD_PASS_1 && ctx->md_subpel_me_ctrls.mvp_th > 0) {
715
0
            unsigned int  best_mvperr  = ctx->best_fp_mvp_dist[ms_params->list_idx][ms_params->ref_idx];
716
0
            int           best_mvp_idx = ctx->best_fp_mvp_idx[ms_params->list_idx][ms_params->ref_idx];
717
0
            const int     mvp_err      = best_mvperr + 1;
718
0
            const int     me_err       = besterr + 1;
719
0
            const int32_t deviation    = ((me_err - mvp_err) * 100) / me_err;
720
0
            if (deviation >= ctx->md_subpel_me_ctrls.mvp_th) {
721
0
                round = 1;
722
0
            } else if (ABS(bestmv->x - ctx->mvp_array[ms_params->list_idx][ms_params->ref_idx][best_mvp_idx].x) >
723
0
                           ctx->md_subpel_me_ctrls.hp_mv_th ||
724
0
                       ABS(bestmv->y - ctx->mvp_array[ms_params->list_idx][ms_params->ref_idx][best_mvp_idx].y) >
725
0
                           ctx->md_subpel_me_ctrls.hp_mv_th) {
726
0
                round = MIN(round, 2);
727
0
            }
728
0
        }
729
0
    }
730
0
    if (early_neigh_check_exit) {
731
0
        return besterr;
732
0
    }
733
0
    const uint64_t th_normalizer = (uint64_t)(((var_params->w * var_params->h) << 5) *
734
0
                                              (uint64_t)ms_params->abs_th_mult);
735
0
    if ((uint64_t)qp * besterr < th_normalizer) {
736
0
        return besterr;
737
0
    }
738
739
    // If forced_stop is FULL_PEL, return.
740
0
    if (!round) {
741
0
        return besterr;
742
0
    }
743
    // Exit subpel search if the variance of the full-pel predicted samples is low (i.e. where likely interpolation will not modify the integer samples)
744
0
    if (ms_params->pred_variance_th) {
745
0
        const MSBuffers*   ms_buffers = &var_params->ms_buffers;
746
0
        const uint8_t*     ref        = svt_get_buf_from_mv(ms_buffers->ref, *bestmv);
747
0
        unsigned int       sse;
748
0
        const unsigned int var = var_params->vfp->vf(ref, ms_buffers->ref->stride, svt_aom_eb_av1_var_offs, 0, &sse);
749
0
        int                block_var = ROUND_POWER_OF_TWO(var, eb_num_pels_log2_lookup[bsize]);
750
751
0
        if (block_var < ms_params->pred_variance_th) {
752
0
            return besterr;
753
0
        }
754
0
    }
755
0
    for (int iter = 0; iter < round; ++iter) {
756
0
        Mv iter_center_mv = *bestmv;
757
0
        Mv diag_step;
758
0
        diag_step = svt_first_level_check(
759
0
            xd, cm, iter_center_mv, bestmv, hstep, mv_limits, var_params, mv_cost_params, &besterr, sse1, distortion);
760
761
        // Check diagonal sub-pixel position
762
0
        if (!CHECK_MV_EQUAL(iter_center_mv, *bestmv) && iters_per_step > 1) {
763
0
            svt_second_level_check_v2(xd,
764
0
                                      cm,
765
0
                                      iter_center_mv,
766
0
                                      diag_step,
767
0
                                      bestmv,
768
0
                                      mv_limits,
769
0
                                      var_params,
770
0
                                      mv_cost_params,
771
0
                                      &besterr,
772
0
                                      sse1,
773
0
                                      distortion,
774
0
                                      is_scaled);
775
0
        }
776
777
0
        hstep >>= 1;
778
0
    }
779
780
0
    return besterr;
781
0
}
782
783
// =============================================================================
784
//  SVT Functions
785
// =============================================================================
786
0
int svt_aom_fp_mv_err_cost(const Mv* mv, const svt_mv_cost_param* mv_cost_params) {
787
0
    return svt_mv_err_cost_(mv, mv_cost_params);
788
0
}