Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/av1me.c
Line
Count
Source
1
/*
2
 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3
 *
4
 * This source code is subject to the terms of the BSD 2 Clause License and
5
 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
 * was not distributed with this source code in the LICENSE file, you can
7
 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8
 * Media Patent License 1.0 was not distributed with this source code in the
9
 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
 */
11
12
#include <limits.h>
13
#include <math.h>
14
#include <stdio.h>
15
#include "av1me.h"
16
#include "mcomp.h"
17
#include "utility.h"
18
#include "pcs.h"
19
#include "sequence_control_set.h"
20
#include "aom_dsp_rtcd.h"
21
#include "md_process.h"
22
#include "adaptive_mv_pred.h"
23
24
AomVarianceFnPtr svt_aom_mefn_ptr[BLOCK_SIZES_ALL];
25
26
1
void init_fn_ptr(void) {
27
1
#if CONFIG_ENABLE_HIGH_BIT_DEPTH
28
1
#define BFP0(w, h)                                                                     \
29
22
    svt_aom_mefn_ptr[BLOCK_##w##X##h].sdf       = svt_aom_sad##w##x##h;                \
30
22
    svt_aom_mefn_ptr[BLOCK_##w##X##h].vf        = svt_aom_variance##w##x##h;           \
31
22
    svt_aom_mefn_ptr[BLOCK_##w##X##h].vf_hbd_10 = svt_aom_highbd_10_variance##w##x##h; \
32
22
    svt_aom_mefn_ptr[BLOCK_##w##X##h].svf       = svt_aom_sub_pixel_variance##w##x##h; \
33
22
    svt_aom_mefn_ptr[BLOCK_##w##X##h].sdx4df    = svt_aom_sad##w##x##h##x4d;
34
#else
35
#define BFP0(w, h)                                                                  \
36
    svt_aom_mefn_ptr[BLOCK_##w##X##h].sdf    = svt_aom_sad##w##x##h;                \
37
    svt_aom_mefn_ptr[BLOCK_##w##X##h].vf     = svt_aom_variance##w##x##h;           \
38
    svt_aom_mefn_ptr[BLOCK_##w##X##h].svf    = svt_aom_sub_pixel_variance##w##x##h; \
39
    svt_aom_mefn_ptr[BLOCK_##w##X##h].sdx4df = svt_aom_sad##w##x##h##x4d;
40
#endif
41
1
    BFP0(4, 16)
42
1
    BFP0(16, 4)
43
1
    BFP0(8, 32)
44
1
    BFP0(32, 8)
45
1
    BFP0(16, 64)
46
1
    BFP0(64, 16)
47
1
    BFP0(128, 128)
48
1
    BFP0(128, 64)
49
1
    BFP0(64, 128)
50
1
    BFP0(32, 16)
51
1
    BFP0(16, 32)
52
1
    BFP0(64, 32)
53
1
    BFP0(32, 64)
54
1
    BFP0(32, 32)
55
1
    BFP0(64, 64)
56
1
    BFP0(16, 16)
57
1
    BFP0(16, 8)
58
1
    BFP0(8, 16)
59
1
    BFP0(8, 8)
60
1
    BFP0(8, 4)
61
1
    BFP0(4, 8)
62
1
    BFP0(4, 4)
63
1
#if CONFIG_ENABLE_OBMC
64
1
#define OBFP(w, h)                                                           \
65
22
    svt_aom_mefn_ptr[BLOCK_##w##X##h].osdf = svt_aom_obmc_sad##w##x##h;      \
66
22
    svt_aom_mefn_ptr[BLOCK_##w##X##h].ovf  = svt_aom_obmc_variance##w##x##h; \
67
22
    svt_aom_mefn_ptr[BLOCK_##w##X##h].osvf = svt_aom_obmc_sub_pixel_variance##w##x##h;
68
1
    OBFP(128, 128)
69
1
    OBFP(128, 64)
70
1
    OBFP(64, 128)
71
1
    OBFP(64, 64)
72
1
    OBFP(64, 32)
73
1
    OBFP(32, 64)
74
1
    OBFP(32, 32)
75
1
    OBFP(32, 16)
76
1
    OBFP(16, 32)
77
1
    OBFP(16, 16)
78
1
    OBFP(16, 8)
79
1
    OBFP(8, 16)
80
1
    OBFP(8, 8)
81
1
    OBFP(4, 8)
82
1
    OBFP(8, 4)
83
1
    OBFP(4, 4)
84
1
    OBFP(4, 16)
85
1
    OBFP(16, 4)
86
1
    OBFP(8, 32)
87
1
    OBFP(32, 8)
88
1
    OBFP(16, 64)
89
1
    OBFP(64, 16)
90
1
#endif
91
1
}
92
93
0
static INLINE const uint8_t* get_buf_from_mv(const Buf2D* buf, const Mv* mv) {
94
0
    return &buf->buf[mv->y * buf->stride + mv->x];
95
0
}
96
97
0
void svt_av1_set_mv_search_range(MvLimits* mv_limits, const Mv* mv) {
98
0
    int col_min = (mv->x >> 3) - MAX_FULL_PEL_VAL + !!(mv->x & 7);
99
0
    int row_min = (mv->y >> 3) - MAX_FULL_PEL_VAL + !!(mv->y & 7);
100
0
    int col_max = (mv->x >> 3) + MAX_FULL_PEL_VAL;
101
0
    int row_max = (mv->y >> 3) + MAX_FULL_PEL_VAL;
102
103
0
    col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1);
104
0
    row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1);
105
0
    col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1);
106
0
    row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1);
107
108
    // Get intersection of UMV window and valid MV window to reduce # of checks
109
    // in diamond search.
110
0
    if (mv_limits->col_min < col_min) {
111
0
        mv_limits->col_min = col_min;
112
0
    }
113
0
    if (mv_limits->col_max > col_max) {
114
0
        mv_limits->col_max = col_max;
115
0
    }
116
0
    if (mv_limits->row_min < row_min) {
117
0
        mv_limits->row_min = row_min;
118
0
    }
119
0
    if (mv_limits->row_max > row_max) {
120
0
        mv_limits->row_max = row_max;
121
0
    }
122
0
}
123
124
#define PIXEL_TRANSFORM_ERROR_SCALE 4
125
126
0
int svt_aom_mv_err_cost_light(const Mv* mv, const Mv* ref) {
127
0
    const uint32_t factor     = 50;
128
0
    const uint32_t absmvdiffx = ABS(mv->x - ref->x);
129
0
    const uint32_t absmvdiffy = ABS(mv->y - ref->y);
130
0
    const uint32_t mv_rate    = 1296 + (factor * (absmvdiffx + absmvdiffy));
131
0
    return mv_rate;
132
0
}
133
134
0
static int mvsad_err_cost_light(const Mv* mv, const Mv* ref) {
135
0
    const uint32_t factor     = 50;
136
0
    const uint32_t absmvdiffx = ABS(mv->x - ref->x) * 8;
137
0
    const uint32_t absmvdiffy = ABS(mv->y - ref->y) * 8;
138
0
    const uint32_t mv_rate    = 1296 + (factor * (absmvdiffx + absmvdiffy));
139
0
    return mv_rate;
140
0
}
141
142
0
int svt_aom_mv_err_cost(const Mv* mv, const Mv* ref, const int* mvjcost, const int* mvcost[2], int error_per_bit) {
143
0
    if (mvcost) {
144
0
        const Mv diff = (Mv){{mv->x - ref->x, mv->y - ref->y}};
145
0
        return (int)ROUND_POWER_OF_TWO_64(
146
0
            (int64_t)svt_mv_cost(&diff, mvjcost, mvcost) * error_per_bit,
147
0
            RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT + PIXEL_TRANSFORM_ERROR_SCALE);
148
0
    }
149
0
    return 0;
150
0
}
151
152
0
static int mvsad_err_cost(const IntraBcContext* x, const Mv* mv, const Mv* ref, int sad_per_bit) {
153
0
    if (x->approx_inter_rate) {
154
0
        return mvsad_err_cost_light(mv, ref);
155
0
    }
156
0
    const Mv diff = (Mv){{(mv->x - ref->x) * 8, (mv->y - ref->y) * 8}};
157
0
    return ROUND_POWER_OF_TWO(
158
0
        (unsigned)svt_mv_cost(&diff, x->nmv_vec_cost, (const int* const*)x->mv_cost_stack) * sad_per_bit,
159
0
        AV1_PROB_COST_SHIFT);
160
0
}
161
162
0
void svt_av1_init3smotion_compensation(SearchSiteConfig* cfg, int stride) {
163
0
    int len, ss_count = 1;
164
165
0
    cfg->ss[0].mv.as_int = 0;
166
0
    cfg->ss[0].offset    = 0;
167
168
0
    for (len = MAX_FIRST_STEP; len > 0; len /= 2) {
169
        // Generate offsets for 8 search sites per step.
170
0
        const Mv ss_mvs[8] = {{{0, -len}},
171
0
                              {{0, len}},
172
0
                              {{-len, 0}},
173
0
                              {{len, 0}},
174
0
                              {{-len, -len}},
175
0
                              {{len, -len}},
176
0
                              {{-len, len}},
177
0
                              {{len, len}}};
178
0
        int      i;
179
0
        for (i = 0; i < 8; ++i) {
180
0
            SearchSite* const ss = &cfg->ss[ss_count++];
181
0
            ss->mv               = ss_mvs[i];
182
0
            ss->offset           = ss->mv.y * stride + ss->mv.x;
183
0
        }
184
0
    }
185
186
0
    cfg->ss_count          = ss_count;
187
0
    cfg->searches_per_step = 8;
188
0
}
189
190
0
static INLINE int is_mv_in(const MvLimits* mv_limits, const Mv* mv) {
191
0
    return (mv->x >= mv_limits->col_min) && (mv->x <= mv_limits->col_max) && (mv->y >= mv_limits->row_min) &&
192
0
        (mv->y <= mv_limits->row_max);
193
0
}
194
195
int svt_av1_get_mvpred_var(const IntraBcContext* x, const Mv* best_mv, const Mv* center_mv, const AomVarianceFnPtr* vfp,
196
0
                           int use_mvcost) {
197
0
    const Buf2D* const what    = &x->plane[0].src;
198
0
    const Buf2D* const in_what = &x->xdplane[0].pre[0];
199
0
    const Mv           mv      = {{best_mv->x * 8, best_mv->y * 8}};
200
0
    unsigned int       unused;
201
0
    if (x->approx_inter_rate) {
202
0
        return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
203
0
            (use_mvcost ? svt_aom_mv_err_cost_light(&mv, center_mv) : 0);
204
0
    } else {
205
0
        return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) +
206
0
            (use_mvcost ? svt_aom_mv_err_cost(&mv, center_mv, x->nmv_vec_cost, x->mv_cost_stack, x->errorperbit) : 0);
207
0
    }
208
0
}
209
210
// Exhaustive motion search around a given centre position with a given
211
// step size.
212
static int exhaustive_mesh_search(IntraBcContext* x, Mv* ref_mv, Mv* best_mv, int range, int step, int sad_per_bit,
213
0
                                  const AomVarianceFnPtr* fn_ptr, const Mv* center_mv) {
214
0
    const Buf2D* const what       = &x->plane[0].src;
215
0
    const Buf2D* const in_what    = &x->xdplane[0].pre[0];
216
0
    Mv                 fcenter_mv = {.as_int = center_mv->as_int};
217
0
    unsigned int       best_sad   = INT_MAX;
218
0
    int                r, c, i;
219
0
    int                start_col, end_col, start_row, end_row;
220
0
    int                col_step = (step > 1) ? step : 4;
221
222
0
    assert(step >= 1);
223
224
0
    clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max);
225
0
    *best_mv = fcenter_mv;
226
0
    best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
227
0
        mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit);
228
0
    start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.y);
229
0
    start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.x);
230
0
    end_row   = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.y);
231
0
    end_col   = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.x);
232
233
0
    for (r = start_row; r <= end_row; r += step) {
234
0
        for (c = start_col; c <= end_col; c += col_step) {
235
            // Step > 1 means we are not checking every location in this pass.
236
0
            if (step > 1) {
237
0
                const Mv     mv  = {{fcenter_mv.x + c, fcenter_mv.y + r}};
238
0
                unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride);
239
0
                if (sad < best_sad) {
240
0
                    sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
241
0
                    if (sad < best_sad) {
242
0
                        best_sad          = sad;
243
0
                        x->second_best_mv = *best_mv;
244
0
                        *best_mv          = mv;
245
0
                    }
246
0
                }
247
0
            } else {
248
                // 4 sads in a single call if we are checking every location
249
0
                if (c + 3 <= end_col) {
250
0
                    unsigned int   sads[4];
251
0
                    const uint8_t* addrs[4];
252
0
                    for (i = 0; i < 4; ++i) {
253
0
                        const Mv mv = {{fcenter_mv.x + c + i, fcenter_mv.y + r}};
254
0
                        addrs[i]    = get_buf_from_mv(in_what, &mv);
255
0
                    }
256
0
                    fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
257
258
0
                    for (i = 0; i < 4; ++i) {
259
0
                        if (sads[i] < best_sad) {
260
0
                            const Mv           mv  = {{fcenter_mv.x + c + i, fcenter_mv.y + r}};
261
0
                            const unsigned int sad = sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
262
0
                            if (sad < best_sad) {
263
0
                                best_sad          = sad;
264
0
                                x->second_best_mv = *best_mv;
265
0
                                *best_mv          = mv;
266
0
                            }
267
0
                        }
268
0
                    }
269
0
                } else {
270
0
                    for (i = 0; i < end_col - c; ++i) {
271
0
                        const Mv     mv  = {{fcenter_mv.x + c + i, fcenter_mv.y + r}};
272
0
                        unsigned int sad = fn_ptr->sdf(
273
0
                            what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride);
274
0
                        if (sad < best_sad) {
275
0
                            sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit);
276
0
                            if (sad < best_sad) {
277
0
                                best_sad          = sad;
278
0
                                x->second_best_mv = *best_mv;
279
0
                                *best_mv          = mv;
280
0
                            }
281
0
                        }
282
0
                    }
283
0
                }
284
0
            }
285
0
        }
286
0
    }
287
288
0
    return best_sad;
289
0
}
290
291
int svt_av1_diamond_search_sad_c(IntraBcContext* x, const SearchSiteConfig* cfg, Mv* ref_mv, Mv* best_mv,
292
                                 int search_param, int sad_per_bit, int* num00, const AomVarianceFnPtr* fn_ptr,
293
0
                                 const Mv* center_mv) {
294
0
    int i, j, step;
295
296
0
    uint8_t*       what        = x->plane[0].src.buf;
297
0
    const int      what_stride = x->plane[0].src.stride;
298
0
    const uint8_t* in_what;
299
0
    const int      in_what_stride = x->xdplane[0].pre[0].stride;
300
0
    const uint8_t* best_address;
301
302
0
    unsigned int bestsad;
303
0
    int          best_site = 0;
304
0
    int          last_site = 0;
305
306
0
    int ref_row;
307
0
    int ref_col;
308
309
    // search_param determines the length of the initial step and hence the number
310
    // of iterations.
311
    // 0 = initial step (MAX_FIRST_STEP) pel
312
    // 1 = (MAX_FIRST_STEP/2) pel,
313
    // 2 = (MAX_FIRST_STEP/4) pel...
314
0
    const SearchSite* ss        = &cfg->ss[search_param * cfg->searches_per_step];
315
0
    const int         tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param;
316
317
0
    const Mv fcenter_mv = {{center_mv->x >> 3, center_mv->y >> 3}};
318
0
    clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max);
319
0
    ref_row    = ref_mv->y;
320
0
    ref_col    = ref_mv->x;
321
0
    *num00     = 0;
322
0
    best_mv->y = ref_row;
323
0
    best_mv->x = ref_col;
324
325
    // Work out the start point for the search
326
0
    in_what      = x->xdplane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
327
0
    best_address = in_what;
328
329
    // Check the starting position
330
0
    bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
331
0
        mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
332
333
0
    i = 1;
334
335
0
    for (step = 0; step < tot_steps; step++) {
336
0
        int all_in = 1;
337
338
        // All_in is true if every one of the points we are checking are within
339
        // the bounds of the image.
340
0
        all_in &= ((best_mv->y + ss[i].mv.y) > x->mv_limits.row_min);
341
0
        all_in &= ((best_mv->y + ss[i + 1].mv.y) < x->mv_limits.row_max);
342
0
        all_in &= ((best_mv->x + ss[i + 2].mv.x) > x->mv_limits.col_min);
343
0
        all_in &= ((best_mv->x + ss[i + 3].mv.x) < x->mv_limits.col_max);
344
345
        // If all the pixels are within the bounds we don't check whether the
346
        // search point is valid in this loop,  otherwise we check each point
347
        // for validity..
348
0
        if (all_in) {
349
0
            unsigned int sad_array[4];
350
351
0
            for (j = 0; j < cfg->searches_per_step; j += 4) {
352
0
                unsigned char const* block_offset[4];
353
354
0
                for (int t = 0; t < 4; t++) {
355
0
                    block_offset[t] = ss[i + t].offset + best_address;
356
0
                }
357
358
0
                fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
359
360
0
                for (int t = 0; t < 4; t++, i++) {
361
0
                    if (sad_array[t] < bestsad) {
362
0
                        const Mv this_mv = {{best_mv->x + ss[i].mv.x, best_mv->y + ss[i].mv.y}};
363
0
                        sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
364
0
                        if (sad_array[t] < bestsad) {
365
0
                            bestsad   = sad_array[t];
366
0
                            best_site = i;
367
0
                        }
368
0
                    }
369
0
                }
370
0
            }
371
0
        } else {
372
0
            for (j = 0; j < cfg->searches_per_step; j++) {
373
                // Trap illegal vectors
374
0
                const Mv this_mv = {{best_mv->x + ss[i].mv.x, best_mv->y + ss[i].mv.y}};
375
376
0
                if (is_mv_in(&x->mv_limits, &this_mv)) {
377
0
                    const uint8_t* const check_here = ss[i].offset + best_address;
378
0
                    unsigned int         thissad    = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
379
380
0
                    if (thissad < bestsad) {
381
0
                        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
382
0
                        if (thissad < bestsad) {
383
0
                            bestsad   = thissad;
384
0
                            best_site = i;
385
0
                        }
386
0
                    }
387
0
                }
388
0
                i++;
389
0
            }
390
0
        }
391
0
        if (best_site != last_site) {
392
0
            x->second_best_mv = *best_mv;
393
0
            best_mv->y += ss[best_site].mv.y;
394
0
            best_mv->x += ss[best_site].mv.x;
395
0
            best_address += ss[best_site].offset;
396
0
            last_site = best_site;
397
#if defined(NEW_DIAMOND_SEARCH)
398
            while (1) {
399
                const Mv this_mv = {{best_mv->x + ss[best_site].mv.x, best_mv->y + ss[best_site].mv.y}};
400
                if (is_mv_in(&x->mv_limits, &this_mv)) {
401
                    const uint8_t* const check_here = ss[best_site].offset + best_address;
402
                    unsigned int         thissad    = fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
403
                    if (thissad < bestsad) {
404
                        thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
405
                        if (thissad < bestsad) {
406
                            bestsad = thissad;
407
                            best_mv->y += ss[best_site].mv.y;
408
                            best_mv->x += ss[best_site].mv.x;
409
                            best_address += ss[best_site].offset;
410
                            continue;
411
                        }
412
                    }
413
                }
414
                break;
415
            }
416
#endif
417
0
        } else if (best_address == in_what) {
418
0
            (*num00)++;
419
0
        }
420
0
    }
421
0
    return bestsad;
422
0
}
423
424
static int svt_av1_refining_search_sad(IntraBcContext* x, Mv* ref_mv, int error_per_bit, int search_range,
425
0
                                       const AomVarianceFnPtr* fn_ptr, const Mv* center_mv) {
426
0
    const Mv           neighbors[4] = {{{0, -1}}, {{-1, 0}}, {{1, 0}}, {{0, 1}}};
427
0
    const Buf2D* const what         = &x->plane[0].src;
428
0
    const Buf2D* const in_what      = &x->xdplane[0].pre[0];
429
0
    const Mv           fcenter_mv   = {{center_mv->x >> 3, center_mv->y >> 3}};
430
0
    const uint8_t*     best_address = get_buf_from_mv(in_what, ref_mv);
431
0
    unsigned int       best_sad     = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
432
0
        mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
433
0
    for (int i = 0; i < search_range; i++) {
434
0
        int       best_site = -1;
435
0
        const int all_in    = (ref_mv->y - 1) > x->mv_limits.row_min && (ref_mv->y + 1) < x->mv_limits.row_max &&
436
0
            (ref_mv->x - 1) > x->mv_limits.col_min && (ref_mv->x + 1) < x->mv_limits.col_max;
437
438
0
        if (all_in) {
439
0
            unsigned int         sads[4];
440
0
            const uint8_t* const positions[4] = {
441
0
                best_address - in_what->stride, best_address - 1, best_address + 1, best_address + in_what->stride};
442
443
0
            fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
444
445
0
            for (int j = 0; j < 4; ++j) {
446
0
                if (sads[j] < best_sad) {
447
0
                    const Mv mv = {{ref_mv->x + neighbors[j].x, ref_mv->y + neighbors[j].y}};
448
0
                    sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
449
0
                    if (sads[j] < best_sad) {
450
0
                        best_sad  = sads[j];
451
0
                        best_site = j;
452
0
                    }
453
0
                }
454
0
            }
455
0
        } else {
456
0
            for (int j = 0; j < 4; ++j) {
457
0
                const Mv mv = {{ref_mv->x + neighbors[j].x, ref_mv->y + neighbors[j].y}};
458
459
0
                if (is_mv_in(&x->mv_limits, &mv)) {
460
0
                    unsigned int sad = fn_ptr->sdf(
461
0
                        what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride);
462
0
                    if (sad < best_sad) {
463
0
                        sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
464
0
                        if (sad < best_sad) {
465
0
                            best_sad  = sad;
466
0
                            best_site = j;
467
0
                        }
468
0
                    }
469
0
                }
470
0
            }
471
0
        }
472
473
0
        if (best_site == -1) {
474
0
            break;
475
0
        } else {
476
0
            x->second_best_mv = *ref_mv;
477
0
            ref_mv->y += neighbors[best_site].y;
478
0
            ref_mv->x += neighbors[best_site].x;
479
0
            best_address = get_buf_from_mv(in_what, ref_mv);
480
0
        }
481
0
    }
482
483
0
    return best_sad;
484
0
}
485
486
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
487
              point as the best match, we will do a final 1-away diamond
488
              refining search  */
489
static int full_pixel_diamond(PictureControlSet* pcs, IntraBcContext /*MACROBLOCK*/* x, Mv* mvp_full, int step_param,
490
                              int sadpb, int further_steps, int do_refine, int* cost_list,
491
0
                              const AomVarianceFnPtr* fn_ptr, const Mv* ref_mv) {
492
0
    Mv  temp_mv;
493
0
    int thissme, n, num00 = 0;
494
0
    (void)cost_list;
495
    /*int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
496
                                        step_param, sadpb, &n, fn_ptr, ref_mv);*/
497
0
    int bestsme = svt_av1_diamond_search_sad_c(
498
0
        x, &pcs->ss_cfg, mvp_full, &temp_mv, step_param, sadpb, &n, fn_ptr, ref_mv);
499
500
0
    if (bestsme < INT_MAX) {
501
0
        bestsme = svt_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
502
0
    }
503
0
    x->best_mv = temp_mv;
504
505
    // If there won't be more n-step search, check to see if refining search is
506
    // needed.
507
0
    if (n > further_steps) {
508
0
        do_refine = 0;
509
0
    }
510
511
0
    while (n < further_steps) {
512
0
        ++n;
513
514
0
        if (num00) {
515
0
            num00--;
516
0
        } else {
517
            /*thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
518
                                        step_param + n, sadpb, &num00, fn_ptr,
519
                                        ref_mv);*/
520
0
            thissme = svt_av1_diamond_search_sad_c(
521
0
                x, &pcs->ss_cfg, mvp_full, &temp_mv, step_param + n, sadpb, &num00, fn_ptr, ref_mv);
522
523
0
            if (thissme < INT_MAX) {
524
0
                thissme = svt_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
525
0
            }
526
527
            // check to see if refining search is needed.
528
0
            if (num00 > further_steps - n) {
529
0
                do_refine = 0;
530
0
            }
531
532
0
            if (thissme < bestsme) {
533
0
                bestsme    = thissme;
534
0
                x->best_mv = temp_mv;
535
0
            }
536
0
        }
537
0
    }
538
539
    // final 1-away diamond refining search
540
0
    if (do_refine) {
541
0
        const int search_range = 8;
542
0
        Mv        best_mv      = x->best_mv;
543
0
        thissme                = svt_av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, ref_mv);
544
0
        if (thissme < INT_MAX) {
545
0
            thissme = svt_av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
546
0
        }
547
0
        if (thissme < bestsme) {
548
0
            bestsme    = thissme;
549
0
            x->best_mv = best_mv;
550
0
        }
551
0
    }
552
553
    // Return cost list.
554
    /* if (cost_list) {
555
    calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list);
556
  }*/
557
0
    return bestsme;
558
0
}
559
560
0
#define MIN_RANGE 7
561
0
#define MAX_RANGE 256
562
0
#define MIN_INTERVAL 1
563
564
// Runs an limited range exhaustive mesh search using a pattern set
565
// according to the encode speed profile.
566
static int intrabc_full_pixel_exhaustive(PictureControlSet* pcs, IntraBcContext* x, const Mv* center_mv, int sadpb,
567
0
                                         const AomVarianceFnPtr* fn_ptr, const Mv* ref_mv, Mv* dst_mv) {
568
0
    const IntrabcCtrls* ctrls = &pcs->ppcs->intrabc_ctrls;
569
570
0
    Mv search_mv = *center_mv;
571
0
    Mv ref_mv_fp = {{ref_mv->x >> 3, ref_mv->y >> 3}};
572
573
0
    int range     = ctrls->mesh_patterns[0].range;
574
0
    int interval  = ctrls->mesh_patterns[0].interval;
575
0
    int best_cost = INT_MAX;
576
577
    // Validate parameters
578
0
    if (range < MIN_RANGE || range > MAX_RANGE || interval < MIN_INTERVAL || interval > range) {
579
0
        return INT_MAX;
580
0
    }
581
582
0
    const int base_interval_div = range / interval;
583
584
    // Adapt search range based on center MV magnitude
585
0
    int mv_mag = AOMMAX(abs(search_mv.x), abs(search_mv.y));
586
0
    range      = AOMMAX(range, (5 * mv_mag) / 4);
587
0
    range      = AOMMIN(range, MAX_RANGE);
588
0
    interval   = AOMMAX(interval, range / base_interval_div);
589
590
    // Initial coarse search
591
0
    best_cost = exhaustive_mesh_search(x, &ref_mv_fp, &search_mv, range, interval, sadpb, fn_ptr, &search_mv);
592
593
    // Progressive refinement
594
0
    if (interval > MIN_INTERVAL && range > MIN_RANGE) {
595
0
        for (int i = 1; i < MAX_MESH_STEP; i++) {
596
0
            const MeshPattern* pattern = &ctrls->mesh_patterns[i];
597
598
0
            if (pattern->range == 0) {
599
0
                break;
600
0
            }
601
602
0
            best_cost = exhaustive_mesh_search(
603
0
                x, &ref_mv_fp, &search_mv, pattern->range, pattern->interval, sadpb, fn_ptr, &search_mv);
604
605
0
            if (pattern->interval == 1) {
606
0
                break;
607
0
            }
608
0
        }
609
0
    }
610
611
    // Final cost evaluation
612
0
    if (best_cost < INT_MAX) {
613
0
        best_cost = svt_av1_get_mvpred_var(x, &search_mv, ref_mv, fn_ptr, 1);
614
0
    }
615
616
0
    *dst_mv = search_mv;
617
618
0
    return best_cost;
619
0
}
620
#if CONFIG_ENABLE_OBMC
621
static int get_obmc_mvpred_var(const IntraBcContext* x, const int32_t* wsrc, const int32_t* mask, const Mv* best_mv,
622
0
                               const Mv* center_mv, const AomVarianceFnPtr* vfp, int use_mvcost, int is_second) {
623
0
    const Buf2D* in_what = (const Buf2D*)(&x->xdplane[0].pre[is_second]);
624
0
    const Mv     mv      = {{best_mv->x * 8, best_mv->y * 8}};
625
0
    unsigned int unused;
626
0
    if (x->approx_inter_rate) {
627
0
        return vfp->ovf(get_buf_from_mv((const Buf2D*)in_what, best_mv), in_what->stride, wsrc, mask, &unused) +
628
0
            (use_mvcost ? svt_aom_mv_err_cost_light(&mv, center_mv) : 0);
629
0
    } else {
630
0
        return vfp->ovf(get_buf_from_mv((const Buf2D*)in_what, best_mv), in_what->stride, wsrc, mask, &unused) +
631
0
            (use_mvcost ? svt_aom_mv_err_cost(&mv, center_mv, x->nmv_vec_cost, x->mv_cost_stack, x->errorperbit) : 0);
632
0
    }
633
0
}
634
635
static int obmc_refining_search_sad(const IntraBcContext* x, const int32_t* wsrc, const int32_t* mask, Mv* ref_mv,
636
                                    int error_per_bit, int search_range, const AomVarianceFnPtr* fn_ptr,
637
0
                                    const Mv* center_mv, int is_second, uint8_t search_diag) {
638
0
    const Mv     neighbors[8] = {{{0, -1}}, {{-1, 0}}, {{1, 0}}, {{0, 1}}, {{1, -1}}, {{1, 1}}, {{-1, 1}}, {{-1, -1}}};
639
0
    const Buf2D* in_what      = (const Buf2D*)(&x->xdplane[0].pre[is_second]);
640
0
    const Mv     fcenter_mv   = {{center_mv->x >> 3, center_mv->y >> 3}};
641
0
    unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv((const Buf2D*)in_what, ref_mv), in_what->stride, wsrc, mask) +
642
0
        mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
643
0
    int i, j;
644
645
0
    for (i = 0; i < search_range; i++) {
646
0
        int best_site = -1;
647
648
0
        for (j = 0; j < (search_diag ? 8 : 4); j++) {
649
0
            const Mv mv = {{ref_mv->x + neighbors[j].x, ref_mv->y + neighbors[j].y}};
650
0
            if (is_mv_in(&x->mv_limits, &mv)) {
651
0
                unsigned int sad = fn_ptr->osdf(
652
0
                    get_buf_from_mv((const Buf2D*)in_what, &mv), in_what->stride, wsrc, mask);
653
0
                if (sad < best_sad) {
654
0
                    sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
655
0
                    if (sad < best_sad) {
656
0
                        best_sad  = sad;
657
0
                        best_site = j;
658
0
                    }
659
0
                }
660
0
            }
661
0
        }
662
663
0
        if (best_site == -1) {
664
0
            break;
665
0
        } else {
666
0
            ref_mv->y += neighbors[best_site].y;
667
0
            ref_mv->x += neighbors[best_site].x;
668
0
        }
669
0
    }
670
0
    return best_sad;
671
0
}
672
673
int svt_av1_obmc_full_pixel_search(ModeDecisionContext* ctx, IntraBcContext* x, const Mv* mvp_full, int sadpb,
674
0
                                   const AomVarianceFnPtr* fn_ptr, const Mv* ref_mv, Mv* dst_mv, int is_second) {
675
    // obmc_full_pixel_diamond does not provide BDR gain on 360p
676
0
    const int32_t* wsrc         = ctx->wsrc_buf;
677
0
    const int32_t* mask         = ctx->mask_buf;
678
0
    const int      search_range = ctx->obmc_ctrls.fpel_search_range;
679
0
    *dst_mv                     = *mvp_full;
680
0
    x->approx_inter_rate        = ctx->approx_inter_rate;
681
0
    clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max);
682
0
    clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max);
683
0
    int thissme = obmc_refining_search_sad(
684
0
        x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second, ctx->obmc_ctrls.fpel_search_diag);
685
0
    if (thissme < INT_MAX) {
686
0
        thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1, is_second);
687
0
    }
688
689
0
    return thissme;
690
0
}
691
#endif
692
693
#if CONFIG_ENABLE_OBMC
694
static INLINE void set_subpel_mv_search_range(const MvLimits* mv_limits, int* col_min, int* col_max, int* row_min,
695
0
                                              int* row_max, const Mv* ref_mv) {
696
0
    const int max_mv = MAX_FULL_PEL_VAL * 8;
697
0
    const int minc   = AOMMAX(mv_limits->col_min * 8, ref_mv->x - max_mv);
698
0
    const int maxc   = AOMMIN(mv_limits->col_max * 8, ref_mv->x + max_mv);
699
0
    const int minr   = AOMMAX(mv_limits->row_min * 8, ref_mv->y - max_mv);
700
0
    const int maxr   = AOMMIN(mv_limits->row_max * 8, ref_mv->y + max_mv);
701
702
0
    *col_min = AOMMAX(MV_LOW + 1, minc);
703
0
    *col_max = AOMMIN(MV_UPP - 1, maxc);
704
0
    *row_min = AOMMAX(MV_LOW + 1, minr);
705
0
    *row_max = AOMMIN(MV_UPP - 1, maxr);
706
0
}
707
708
static const Mv search_step_table[12] = {
709
    // left, right, up, down
710
    {{-4, 0}},
711
    {{4, 0}},
712
    {{0, -4}},
713
    {{0, 4}},
714
    {{-2, 0}},
715
    {{2, 0}},
716
    {{0, -2}},
717
    {{0, 2}},
718
    {{-1, 0}},
719
    {{1, 0}},
720
    {{0, -1}},
721
    {{0, 1}}};
722
723
static unsigned int setup_obmc_center_error(const int32_t* mask, const Mv* bestmv, const Mv* ref_mv, int error_per_bit,
724
                                            const AomVarianceFnPtr* vfp, const int32_t* const wsrc,
725
                                            const uint8_t* const y, int y_stride, int offset, int* mvjcost,
726
                                            const int* mvcost[2], unsigned int* sse1,
727
0
                                            uint8_t use_low_precision_cost_estimation, int* distortion) {
728
0
    unsigned int besterr;
729
0
    besterr     = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
730
0
    *distortion = besterr;
731
0
    if (use_low_precision_cost_estimation) {
732
0
        besterr += svt_aom_mv_err_cost_light(bestmv, ref_mv);
733
0
    } else {
734
0
        besterr += svt_aom_mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
735
0
    }
736
0
    return besterr;
737
0
}
738
739
/* returns subpixel variance error function */
740
0
#define DIST(r, c) vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
741
#define CHECK_BETTER(v, r, c, lp)                                                                     \
742
0
    do {                                                                                              \
743
0
        if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                                       \
744
0
            thismse = (DIST(r, c));                                                                   \
745
0
                                                                                                      \
746
0
            if (lp)                                                                                   \
747
0
                v = svt_aom_mv_err_cost_light(&(const Mv){{c, r}}, ref_mv);                           \
748
0
            else                                                                                      \
749
0
                v = svt_aom_mv_err_cost(&(const Mv){{c, r}}, ref_mv, mvjcost, mvcost, error_per_bit); \
750
0
            if ((v + thismse) < besterr) {                                                            \
751
0
                besterr     = v + thismse;                                                            \
752
0
                br          = r;                                                                      \
753
0
                bc          = c;                                                                      \
754
0
                *distortion = thismse;                                                                \
755
0
                *sse1       = sse;                                                                    \
756
0
            }                                                                                         \
757
0
        } else                                                                                        \
758
0
            v = INT_MAX;                                                                              \
759
0
    } while (0)
760
0
#define CHECK_BETTER0(v, r, c, lp) CHECK_BETTER(v, r, c, lp)
761
#define CHECK_BETTER1(v, r, c, lp)                                                         \
762
0
    do {                                                                                   \
763
0
        if (c >= minc && c <= maxc && r >= minr && r <= maxr) {                            \
764
0
            Mv this_mv = {{c, r}};                                                         \
765
0
            thismse    = upsampled_obmc_pref_error(xd,                                     \
766
0
                                                cm,                                     \
767
0
                                                mi_row,                                 \
768
0
                                                mi_col,                                 \
769
0
                                                &this_mv,                               \
770
0
                                                mask,                                   \
771
0
                                                vfp,                                    \
772
0
                                                z,                                      \
773
0
                                                pre(y, y_stride, r, c),                 \
774
0
                                                y_stride,                               \
775
0
                                                sp(c),                                  \
776
0
                                                sp(r),                                  \
777
0
                                                w,                                      \
778
0
                                                h,                                      \
779
0
                                                &sse,                                   \
780
0
                                                use_accurate_subpel_search);            \
781
0
            if (lp)                                                                        \
782
0
                v = svt_aom_mv_err_cost_light(&this_mv, ref_mv);                           \
783
0
            else                                                                           \
784
0
                v = svt_aom_mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \
785
0
            if ((v + thismse) < besterr) {                                                 \
786
0
                besterr     = v + thismse;                                                 \
787
0
                br          = r;                                                           \
788
0
                bc          = c;                                                           \
789
0
                *distortion = thismse;                                                     \
790
0
                *sse1       = sse;                                                         \
791
0
            }                                                                              \
792
0
        } else                                                                             \
793
0
            v = INT_MAX;                                                                   \
794
0
    } while (0)
795
#define SECOND_LEVEL_CHECKS_BEST(k)                          \
796
0
    do {                                                     \
797
0
        unsigned int second;                                 \
798
0
        int          br0 = br;                               \
799
0
        int          bc0 = bc;                               \
800
0
        assert(tr == br || tc == bc);                        \
801
0
        if (tr == br && tc != bc)                            \
802
0
            kc = bc - tc;                                    \
803
0
        else if (tr != br && tc == bc)                       \
804
0
            kr = br - tr;                                    \
805
0
        CHECK_BETTER##k(second, br0 + kr, bc0, lp);          \
806
0
        CHECK_BETTER##k(second, br0, bc0 + kc, lp);          \
807
0
        if (br0 != br || bc0 != bc)                          \
808
0
            CHECK_BETTER##k(second, br0 + kr, bc0 + kc, lp); \
809
0
    } while (0)
810
811
static int upsampled_obmc_pref_error(MacroBlockD* xd, const Av1Common* const cm, int mi_row, int mi_col,
812
                                     const Mv* const mv, const int32_t* mask, const AomVarianceFnPtr* vfp,
813
                                     const int32_t* const wsrc, const uint8_t* const y, int y_stride, int subpel_x_q3,
814
0
                                     int subpel_y_q3, int w, int h, unsigned int* sse, int subpel_search) {
815
0
    unsigned int besterr;
816
817
0
    DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]);
818
#if CONFIG_AV1_HIGHBITDEPTH
819
    if (is_cur_buf_hbd(xd)) {
820
        uint8_t* pred8 = CONVERT_TO_BYTEPTR(pred);
821
        aom_highbd_upsampled_pred(
822
            xd, cm, mi_row, mi_col, mv, pred8, w, h, subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, subpel_search);
823
        besterr = vfp->ovf(pred8, w, wsrc, mask, sse);
824
    } else {
825
        svt_aom_upsampled_pred(
826
            xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride, subpel_search);
827
828
        besterr = vfp->ovf(pred, w, wsrc, mask, sse);
829
    }
830
#else
831
0
    svt_aom_upsampled_pred(xd,
832
0
                           (const struct AV1Common* const)cm,
833
0
                           mi_row,
834
0
                           mi_col,
835
0
                           mv,
836
0
                           pred,
837
0
                           w,
838
0
                           h,
839
0
                           subpel_x_q3,
840
0
                           subpel_y_q3,
841
0
                           y,
842
0
                           y_stride,
843
0
                           subpel_search);
844
845
0
    besterr = vfp->ovf(pred, w, wsrc, mask, sse);
846
0
#endif
847
0
    return besterr;
848
0
}
849
850
static unsigned int upsampled_setup_obmc_center_error(MacroBlockD* xd, const Av1Common* const cm, int mi_row,
851
                                                      int mi_col, const int32_t* mask, const Mv* bestmv,
852
                                                      const Mv* ref_mv, int error_per_bit, const AomVarianceFnPtr* vfp,
853
                                                      const int32_t* const wsrc, const uint8_t* const y, int y_stride,
854
                                                      int w, int h, int offset, int* mvjcost, const int* mvcost[2],
855
                                                      unsigned int* sse1, int* distortion,
856
0
                                                      uint8_t use_low_precision_cost_estimation, int subpel_search) {
857
0
    unsigned int besterr = upsampled_obmc_pref_error(
858
0
        xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0, 0, w, h, sse1, subpel_search);
859
0
    *distortion = besterr;
860
0
    if (use_low_precision_cost_estimation) {
861
0
        besterr += svt_aom_mv_err_cost_light(bestmv, ref_mv);
862
0
    } else {
863
0
        besterr += svt_aom_mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
864
0
    }
865
0
    return besterr;
866
0
}
867
868
// convert motion vector component to offset for sv[a]f calc
869
0
static INLINE int sp(int x) {
870
0
    return x & 7;
871
0
}
872
873
0
static INLINE const uint8_t* pre(const uint8_t* buf, int stride, int r, int c) {
874
0
    const int offset = (r >> 3) * stride + (c >> 3);
875
0
    return buf + offset;
876
0
}
877
878
int svt_av1_find_best_obmc_sub_pixel_tree_up(ModeDecisionContext* ctx, IntraBcContext* x,
879
                                             const struct Av1Common* const cm, int mi_row, int mi_col, Mv* bestmv,
880
                                             const Mv* ref_mv, int allow_hp, int error_per_bit,
881
                                             const AomVarianceFnPtr* vfp, int forced_stop, int iters_per_step,
882
                                             int* mvjcost, const int* mvcost[2], int* distortion, unsigned int* sse1,
883
0
                                             int is_second, int use_accurate_subpel_search) {
884
0
    const int32_t*                 wsrc        = ctx->wsrc_buf;
885
0
    const int32_t*                 mask        = ctx->mask_buf;
886
0
    const int* const               z           = wsrc;
887
0
    const int* const               src_address = z;
888
0
    MacroBlockD*                   xd          = x->xd;
889
0
    struct MacroBlockDPlane* const pd          = &x->xdplane[0];
890
0
    unsigned int                   besterr     = INT_MAX;
891
0
    unsigned int                   sse;
892
0
    unsigned int                   thismse;
893
0
    int                            br    = bestmv->y * 8;
894
0
    int                            bc    = bestmv->x * 8;
895
0
    int                            hstep = 4;
896
0
    int                            round = 3 - forced_stop;
897
0
    int                            tr;
898
0
    int                            tc;
899
0
    const Mv*                      search_step = search_step_table;
900
0
    int                            best_idx    = -1;
901
0
    unsigned int                   cost_array[5];
902
0
    const int                      w  = block_size_wide[ctx->blk_geom->bsize];
903
0
    const int                      h  = block_size_high[ctx->blk_geom->bsize];
904
0
    const uint8_t                  lp = ctx->approx_inter_rate;
905
0
    int                            minc, maxc, minr, maxr;
906
907
0
    set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv);
908
909
0
    const uint8_t* y        = pd->pre[is_second].buf;
910
0
    int            y_stride = pd->pre[is_second].stride;
911
0
    int            offset   = bestmv->y * y_stride + bestmv->x;
912
913
0
    if (!allow_hp && round == 3) {
914
0
        round = 2;
915
0
    }
916
917
0
    bestmv->y *= 8;
918
0
    bestmv->x *= 8;
919
    // use_accurate_subpel_search can be 0 or 1 or 2
920
0
    besterr = use_accurate_subpel_search
921
0
        ? upsampled_setup_obmc_center_error(xd,
922
0
                                            cm,
923
0
                                            mi_row,
924
0
                                            mi_col,
925
0
                                            mask,
926
0
                                            bestmv,
927
0
                                            ref_mv,
928
0
                                            error_per_bit,
929
0
                                            vfp,
930
0
                                            z,
931
0
                                            y,
932
0
                                            y_stride,
933
0
                                            w,
934
0
                                            h,
935
0
                                            offset,
936
0
                                            mvjcost,
937
0
                                            mvcost,
938
0
                                            sse1,
939
0
                                            distortion,
940
0
                                            lp,
941
0
                                            use_accurate_subpel_search)
942
0
        : setup_obmc_center_error(
943
0
              mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, offset, mvjcost, mvcost, sse1, lp, distortion);
944
945
0
    for (int iter = 0; iter < round; ++iter) {
946
        // Check vertical and horizontal sub-pixel positions.
947
0
        int idx = 0;
948
0
        for (; idx < 4; ++idx) {
949
0
            tr = br + search_step[idx].y;
950
0
            tc = bc + search_step[idx].x;
951
0
            if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
952
0
                Mv this_mv = {{tc, tr}};
953
0
                thismse    = use_accurate_subpel_search
954
0
                       ? (unsigned)upsampled_obmc_pref_error(xd,
955
0
                                                          cm,
956
0
                                                          mi_row,
957
0
                                                          mi_col,
958
0
                                                          &this_mv,
959
0
                                                          mask,
960
0
                                                          vfp,
961
0
                                                          src_address,
962
0
                                                          pre(y, y_stride, tr, tc),
963
0
                                                          y_stride,
964
0
                                                          sp(tc),
965
0
                                                          sp(tr),
966
0
                                                          w,
967
0
                                                          h,
968
0
                                                          &sse,
969
0
                                                          use_accurate_subpel_search)
970
0
                       : vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), src_address, mask, &sse);
971
0
                if (lp) {
972
0
                    cost_array[idx] = thismse + svt_aom_mv_err_cost_light(&this_mv, ref_mv);
973
0
                } else {
974
0
                    cost_array[idx] = thismse + svt_aom_mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
975
0
                }
976
0
                if (cost_array[idx] < besterr) {
977
0
                    best_idx    = idx;
978
0
                    besterr     = cost_array[idx];
979
0
                    *distortion = thismse;
980
0
                    *sse1       = sse;
981
0
                }
982
0
            } else {
983
0
                cost_array[idx] = INT_MAX;
984
0
            }
985
0
        }
986
987
        // Check diagonal sub-pixel position
988
0
        int kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep);
989
0
        int kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep);
990
991
0
        tc = bc + kc;
992
0
        tr = br + kr;
993
0
        if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
994
0
            Mv this_mv = {{tc, tr}};
995
0
            thismse    = use_accurate_subpel_search
996
0
                   ? (unsigned)upsampled_obmc_pref_error(xd,
997
0
                                                      cm,
998
0
                                                      mi_row,
999
0
                                                      mi_col,
1000
0
                                                      &this_mv,
1001
0
                                                      mask,
1002
0
                                                      vfp,
1003
0
                                                      src_address,
1004
0
                                                      pre(y, y_stride, tr, tc),
1005
0
                                                      y_stride,
1006
0
                                                      sp(tc),
1007
0
                                                      sp(tr),
1008
0
                                                      w,
1009
0
                                                      h,
1010
0
                                                      &sse,
1011
0
                                                      use_accurate_subpel_search)
1012
0
                   : vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), src_address, mask, &sse);
1013
0
            if (lp) {
1014
0
                cost_array[4] = thismse + svt_aom_mv_err_cost_light(&this_mv, ref_mv);
1015
0
            } else {
1016
0
                cost_array[4] = thismse + svt_aom_mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
1017
0
            }
1018
1019
0
            if (cost_array[4] < besterr) {
1020
0
                best_idx    = 4;
1021
0
                besterr     = cost_array[4];
1022
0
                *distortion = thismse;
1023
0
                *sse1       = sse;
1024
0
            }
1025
0
        } else {
1026
0
            cost_array[idx] = INT_MAX;
1027
0
        }
1028
1029
0
        if (best_idx < 4 && best_idx >= 0) {
1030
0
            br += search_step[best_idx].y;
1031
0
            bc += search_step[best_idx].x;
1032
0
        } else if (best_idx == 4) {
1033
0
            br = tr;
1034
0
            bc = tc;
1035
0
        }
1036
1037
0
        if (iters_per_step > 1 && best_idx != -1) {
1038
0
            if (use_accurate_subpel_search) {
1039
0
                SECOND_LEVEL_CHECKS_BEST(1);
1040
0
            } else {
1041
0
                SECOND_LEVEL_CHECKS_BEST(0);
1042
0
            }
1043
0
        }
1044
1045
0
        search_step += 4;
1046
0
        hstep >>= 1;
1047
0
        best_idx = -1;
1048
0
    }
1049
1050
0
    bestmv->y = br;
1051
0
    bestmv->x = bc;
1052
1053
0
    return besterr;
1054
0
}
1055
#endif
1056
void svt_av1_intrabc_hash_search(PictureControlSet* pcs, IntraBcContext* x, BlockSize bsize, int x_pos, int y_pos,
1057
                                 const Mv* ref_mv, int intra, const AomVarianceFnPtr* fn_ptr, int* best_hash_cost,
1058
0
                                 Mv* best_hash_mv) {
1059
0
    const int block_width  = block_size_wide[bsize];
1060
0
    const int block_height = block_size_high[bsize];
1061
1062
0
    if (block_width != block_height || block_width > pcs->ppcs->intrabc_ctrls.max_block_size_hash) {
1063
0
        return;
1064
0
    }
1065
1066
0
    uint8_t* src_buf    = x->plane[0].src.buf;
1067
0
    int      src_stride = x->plane[0].src.stride;
1068
1069
0
    uint32_t hash_value1, hash_value2;
1070
1071
0
    svt_av1_get_block_hash_value(src_buf, src_stride, block_width, &hash_value1, &hash_value2, 0, pcs, x);
1072
1073
0
    HashTable* ref_frame_hash = &pcs->hash_table;
1074
0
    int        count          = svt_av1_hash_table_count(ref_frame_hash, hash_value1);
1075
1076
0
    if (count <= (intra ? 1 : 0)) {
1077
0
        return;
1078
0
    }
1079
1080
0
    Iterator iterator = svt_av1_hash_get_first_iterator(ref_frame_hash, hash_value1);
1081
1082
0
    const int mi_col = x_pos / MI_SIZE;
1083
0
    const int mi_row = y_pos / MI_SIZE;
1084
1085
0
    for (int i = 0; i < count; i++, svt_aom_iterator_increment(&iterator)) {
1086
0
        BlockHash ref_block_hash = *(BlockHash*)(svt_aom_iterator_get(&iterator));
1087
1088
0
        if (hash_value2 != ref_block_hash.hash_value2) {
1089
0
            continue;
1090
0
        }
1091
1092
0
        if (intra) {
1093
0
            Mv dv = {{8 * (ref_block_hash.x - x_pos), 8 * (ref_block_hash.y - y_pos)}};
1094
1095
0
            if (!svt_aom_is_dv_valid(dv, x->xd, mi_row, mi_col, bsize, pcs->ppcs->scs->seq_header.sb_size_log2)) {
1096
0
                continue;
1097
0
            }
1098
0
        }
1099
1100
0
        Mv hash_mv = {{ref_block_hash.x - x_pos, ref_block_hash.y - y_pos}};
1101
1102
0
        if (!is_mv_in(&x->mv_limits, &hash_mv)) {
1103
0
            continue;
1104
0
        }
1105
1106
0
        int ref_cost = svt_av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1);
1107
1108
0
        if (ref_cost < *best_hash_cost) {
1109
0
            *best_hash_cost = ref_cost;
1110
0
            *best_hash_mv   = hash_mv;
1111
0
        }
1112
0
    }
1113
0
}
1114
1115
int svt_av1_full_pixel_search(PictureControlSet* pcs, IntraBcContext* x, BlockSize bsize, Mv* mvp_full, int step_param,
1116
0
                              int error_per_bit, int* cost_list, const Mv* ref_mv) {
1117
0
    const AomVarianceFnPtr* fn_ptr = &svt_aom_mefn_ptr[bsize];
1118
0
    int                     var    = 0;
1119
1120
    // Initialize cost list if requested
1121
0
    if (cost_list) {
1122
0
        for (int i = 0; i < 5; i++) {
1123
0
            cost_list[i] = INT_MAX;
1124
0
        }
1125
0
    }
1126
1127
    // Primary diamond search
1128
0
    var = full_pixel_diamond(
1129
0
        pcs, x, mvp_full, step_param, error_per_bit, MAX_MVSEARCH_STEPS - 1 - step_param, 1, cost_list, fn_ptr, ref_mv);
1130
1131
    // Decide whether to run exhaustive refinement
1132
0
    bool run_mesh_search = 0;
1133
1134
0
    int exhaustive_mesh_thresh = (int)pcs->ppcs->intrabc_ctrls.exhaustive_mesh_thresh;
1135
1136
    // Scale threshold by block size
1137
0
    exhaustive_mesh_thresh >>= 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]);
1138
1139
0
    if (var > exhaustive_mesh_thresh) {
1140
0
        run_mesh_search = 1;
1141
0
    }
1142
0
    const int32_t full_pel_mv_diff = MAX(abs(mvp_full->x - x->best_mv.x), abs(mvp_full->y - x->best_mv.y));
1143
0
    if (full_pel_mv_diff <= pcs->ppcs->intrabc_ctrls.mesh_search_mv_diff_threshold) {
1144
0
        run_mesh_search = 0;
1145
0
    }
1146
    // Exhaustive (Mesh) Search
1147
0
    if (run_mesh_search) {
1148
0
        int var_ex;
1149
0
        Mv  mv_ex;
1150
1151
0
        var_ex = intrabc_full_pixel_exhaustive(pcs, x, &x->best_mv, error_per_bit, fn_ptr, ref_mv, &mv_ex);
1152
1153
0
        if (var_ex < var) {
1154
0
            x->best_mv = mv_ex;
1155
0
        }
1156
0
    }
1157
1158
0
    return 0;
1159
0
}