Coverage Report

Created: 2026-05-30 06:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
1.28M
{
47
1.28M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
1.17M
    if (ref.ref[1] == -1) {
50
1.31M
        for (int n = 0; n < 2; n++) {
51
1.18M
            if (b->ref.ref[n] == ref.ref[0]) {
52
908k
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
897k
                                   gmv[0] : b->mv.mv[n];
54
55
908k
                *have_refmv_match = 1;
56
908k
                *have_newmv_match |= b->mf >> 1;
57
58
908k
                const int last = *cnt;
59
1.79M
                for (int m = 0; m < last; m++)
60
1.05M
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
173k
                        mvstack[m].weight += weight;
62
173k
                        return;
63
173k
                    }
64
65
734k
                if (last < 8) {
66
734k
                    mvstack[last].mv.mv[0] = cand_mv;
67
734k
                    mvstack[last].weight = weight;
68
734k
                    *cnt = last + 1;
69
734k
                }
70
734k
                return;
71
908k
            }
72
1.18M
        }
73
1.03M
    } else if (b->ref.pair == ref.pair) {
74
46.1k
        const refmvs_mvpair cand_mv = { .mv = {
75
46.1k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
46.1k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
46.1k
        }};
78
79
46.1k
        *have_refmv_match = 1;
80
46.1k
        *have_newmv_match |= b->mf >> 1;
81
82
46.1k
        const int last = *cnt;
83
67.8k
        for (int n = 0; n < last; n++)
84
38.4k
            if (mvstack[n].mv.n == cand_mv.n) {
85
16.7k
                mvstack[n].weight += weight;
86
16.7k
                return;
87
16.7k
            }
88
89
29.3k
        if (last < 8) {
90
29.3k
            mvstack[last].mv = cand_mv;
91
29.3k
            mvstack[last].weight = weight;
92
29.3k
            *cnt = last + 1;
93
29.3k
        }
94
29.3k
    }
95
1.17M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
334k
{
103
334k
    const refmvs_block *cand_b = b;
104
334k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
334k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
334k
    int cand_bw4 = first_cand_b_dim[0];
107
334k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
334k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
295k
        const int weight = bw4 == 1 ? 2 :
115
295k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
295k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
295k
                              have_newmv_match, have_refmv_match);
118
295k
        return weight >> 1;
119
295k
    }
120
121
71.7k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
71.7k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
71.7k
                              have_newmv_match, have_refmv_match);
127
71.7k
        x += len;
128
71.7k
        if (x >= w4) return 1;
129
32.5k
        cand_b = &b[x];
130
32.5k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
32.5k
        assert(cand_bw4 < bw4);
132
32.5k
        len = imax(step, cand_bw4);
133
32.5k
    }
134
39.3k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
619k
{
142
619k
    const refmvs_block *cand_b = &b[0][bx4];
143
619k
    const enum BlockSize first_cand_bs = cand_b->bs;
144
619k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
619k
    int cand_bh4 = first_cand_b_dim[1];
146
619k
    int len = imax(step, imin(bh4, cand_bh4));
147
148
619k
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
578k
        const int weight = bh4 == 1 ? 2 :
154
578k
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
578k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
578k
                            have_newmv_match, have_refmv_match);
157
578k
        return weight >> 1;
158
578k
    }
159
160
71.4k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
71.4k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
71.4k
                              have_newmv_match, have_refmv_match);
166
71.4k
        y += len;
167
71.4k
        if (y >= h4) return 1;
168
31.0k
        cand_b = &b[y][bx4];
169
31.0k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
31.0k
        assert(cand_bh4 < bh4);
171
31.0k
        len = imax(step, cand_bh4);
172
31.0k
    }
173
40.9k
}
174
175
18.3k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
18.3k
    static const uint16_t div_mult[32] = {
177
18.3k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
18.3k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
18.3k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
18.3k
         682,   655,  630,  606,  585,  564,  546,  528
181
18.3k
    };
182
18.3k
    assert(den > 0 && den < 32);
183
18.3k
    assert(num > -32 && num < 32);
184
18.3k
    const int frac = num * div_mult[den];
185
18.3k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
18.3k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
18.3k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
18.3k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
18.3k
    };
191
18.3k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
16.9k
{
199
16.9k
    if (rb->mv.n == INVALID_MV) return;
200
201
9.97k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
9.97k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
9.97k
    const int last = *cnt;
205
9.97k
    if (ref.ref[1] == -1) {
206
5.92k
        if (globalmv_ctx)
207
1.86k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
10.6k
        for (int n = 0; n < last; n++)
210
9.05k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
4.29k
                mvstack[n].weight += 2;
212
4.29k
                return;
213
4.29k
            }
214
1.63k
        if (last < 8) {
215
1.62k
            mvstack[last].mv.mv[0] = mv;
216
1.62k
            mvstack[last].weight = 2;
217
1.62k
            *cnt = last + 1;
218
1.62k
        }
219
4.04k
    } else {
220
4.04k
        refmvs_mvpair mvp = { .mv = {
221
4.04k
            [0] = mv,
222
4.04k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
4.04k
        }};
224
4.04k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
5.35k
        for (int n = 0; n < last; n++)
227
4.58k
            if (mvstack[n].mv.n == mvp.n) {
228
3.27k
                mvstack[n].weight += 2;
229
3.27k
                return;
230
3.27k
            }
231
769
        if (last < 8) {
232
769
            mvstack[last].mv = mvp;
233
769
            mvstack[last].weight = 2;
234
769
            *cnt = last + 1;
235
769
        }
236
769
    }
237
9.97k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
37.1k
{
246
37.1k
    refmvs_candidate *const diff = &same[2];
247
37.1k
    int *const diff_count = &same_count[2];
248
249
96.2k
    for (int n = 0; n < 2; n++) {
250
73.1k
        const int cand_ref = cand_b->ref.ref[n];
251
252
73.1k
        if (cand_ref <= 0) break;
253
254
59.0k
        mv cand_mv = cand_b->mv.mv[n];
255
59.0k
        if (cand_ref == ref.ref[0]) {
256
20.3k
            if (same_count[0] < 2)
257
19.9k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
20.3k
            if (diff_count[1] < 2) {
259
17.8k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
711
                    cand_mv.y = -cand_mv.y;
261
711
                    cand_mv.x = -cand_mv.x;
262
711
                }
263
17.8k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
17.8k
            }
265
38.7k
        } else if (cand_ref == ref.ref[1]) {
266
21.1k
            if (same_count[1] < 2)
267
20.8k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
21.1k
            if (diff_count[0] < 2) {
269
17.8k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
771
                    cand_mv.y = -cand_mv.y;
271
771
                    cand_mv.x = -cand_mv.x;
272
771
                }
273
17.8k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
17.8k
            }
275
21.1k
        } else {
276
17.5k
            mv i_cand_mv = (union mv) {
277
17.5k
                .x = -cand_mv.x,
278
17.5k
                .y = -cand_mv.y
279
17.5k
            };
280
281
17.5k
            if (diff_count[0] < 2) {
282
14.0k
                diff[diff_count[0]++].mv.mv[0] =
283
14.0k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
13.7k
                    i_cand_mv : cand_mv;
285
14.0k
            }
286
287
17.5k
            if (diff_count[1] < 2) {
288
13.5k
                diff[diff_count[1]++].mv.mv[1] =
289
13.5k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
13.2k
                    i_cand_mv : cand_mv;
291
13.5k
            }
292
17.5k
        }
293
59.0k
    }
294
37.1k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
100k
{
300
203k
    for (int n = 0; n < 2; n++) {
301
197k
        const int cand_ref = cand_b->ref.ref[n];
302
303
197k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
102k
        mv cand_mv = cand_b->mv.mv[n];
311
102k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
686
            cand_mv.y = -cand_mv.y;
313
686
            cand_mv.x = -cand_mv.x;
314
686
        }
315
316
102k
        int m;
317
102k
        const int last = *cnt;
318
119k
        for (m = 0; m < last; m++)
319
90.8k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
74.4k
                break;
321
102k
        if (m == last) {
322
28.2k
            mvstack[m].mv.mv[0] = cand_mv;
323
28.2k
            mvstack[m].weight = 2; // "minimal"
324
28.2k
            *cnt = last + 1;
325
28.2k
        }
326
102k
    }
327
100k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
359k
{
355
359k
    const refmvs_frame *const rf = rt->rf;
356
359k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
359k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
359k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
359k
    mv gmv[2], tgmv[2];
360
361
359k
    *cnt = 0;
362
359k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
359k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
359k
    if (ref.ref[0] > 0) {
365
151k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
151k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
151k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
108k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
208k
    } else {
370
208k
        tgmv[0] = (mv) { .n = 0 };
371
208k
        gmv[0] = (mv) { .n = INVALID_MV };
372
208k
    }
373
359k
    if (ref.ref[1] > 0) {
374
31.9k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
31.9k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
31.9k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
24.7k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
31.9k
    }
379
380
    // top
381
359k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
359k
    unsigned max_rows = 0, n_rows = ~0;
383
359k
    const refmvs_block *b_top;
384
359k
    if (by4 > rt->tile_row.start) {
385
210k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
210k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
210k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
210k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
210k
                          &have_newmv, &have_row_mvs);
390
210k
    }
391
392
    // left
393
359k
    unsigned max_cols = 0, n_cols = ~0U;
394
359k
    refmvs_block *const *b_left;
395
359k
    if (bx4 > rt->tile_col.start) {
396
311k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
311k
        b_left = &rt->r[(by4 & 31) + 5];
398
311k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
311k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
311k
                          &have_newmv, &have_col_mvs);
401
311k
    }
402
403
    // top/right
404
359k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
121k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
97.0k
    {
407
97.0k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
97.0k
                              &have_newmv, &have_row_mvs);
409
97.0k
    }
410
411
359k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
359k
    const int nearest_cnt = *cnt;
413
816k
    for (int n = 0; n < nearest_cnt; n++)
414
456k
        mvstack[n].weight += 640;
415
416
    // temporal
417
359k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
359k
    if (rf->use_ref_frame_mvs) {
419
5.85k
        const ptrdiff_t stride = rf->rp_stride;
420
5.85k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
5.85k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
5.85k
        const refmvs_temporal_block *rb = rbi;
423
5.85k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
5.85k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
15.7k
        for (int y = 0; y < h8; y += step_v) {
426
23.7k
            for (int x = 0; x < w8; x+= step_h) {
427
13.7k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
13.7k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
13.7k
            }
430
9.92k
            rb += stride * step_v;
431
9.92k
        }
432
5.85k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
3.10k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
3.10k
            rb = &rbi[bh8 * stride];
435
3.10k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
3.10k
                                                    (by8 & ~7) + 8);
437
3.10k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
912
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
912
                                       NULL, NULL);
440
912
            }
441
3.10k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
1.44k
                if (has_bottom) {
443
877
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
877
                                           NULL, NULL);
445
877
                }
446
1.44k
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
1.33k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
1.33k
                                           ref, NULL, NULL);
449
1.33k
                }
450
1.44k
            }
451
3.10k
        }
452
5.85k
    }
453
359k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
359k
    int have_dummy_newmv_match;
457
359k
    if ((n_rows | n_cols) != ~0U) {
458
174k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
174k
                              &have_dummy_newmv_match, &have_row_mvs);
460
174k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
1.07M
    for (int n = 2; n <= 3; n++) {
465
717k
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
124k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
124k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
124k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
124k
                               &have_dummy_newmv_match, &have_row_mvs);
470
124k
        }
471
472
717k
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
308k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
308k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
308k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
308k
                               &have_dummy_newmv_match, &have_col_mvs);
477
308k
        }
478
717k
    }
479
359k
    assert(*cnt <= 8);
480
481
359k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
359k
    int refmv_ctx, newmv_ctx;
485
359k
    switch (nearest_match) {
486
57.6k
    case 0:
487
57.6k
        refmv_ctx = imin(2, ref_match_count);
488
57.6k
        newmv_ctx = ref_match_count > 0;
489
57.6k
        break;
490
178k
    case 1:
491
178k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
178k
        newmv_ctx = 3 - have_newmv;
493
178k
        break;
494
123k
    case 2:
495
123k
        refmv_ctx = 5;
496
123k
        newmv_ctx = 5 - have_newmv;
497
123k
        break;
498
359k
    }
499
500
    // sorting (nearest, then "secondary")
501
359k
    int len = nearest_cnt;
502
749k
    while (len) {
503
390k
        int last = 0;
504
586k
        for (int n = 1; n < len; n++) {
505
195k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
96.2k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
91.2k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
91.2k
                last = n;
509
91.2k
            }
510
195k
        }
511
390k
        len = last;
512
390k
    }
513
359k
    len = *cnt;
514
579k
    while (len > nearest_cnt) {
515
220k
        int last = nearest_cnt;
516
318k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
97.9k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
4.98k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
4.98k
#undef EXCHANGE
520
4.98k
                last = n;
521
4.98k
            }
522
97.9k
        }
523
220k
        len = last;
524
220k
    }
525
526
359k
    if (ref.ref[1] > 0) {
527
31.9k
        if (*cnt < 2) {
528
24.6k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
24.6k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
24.6k
            const int sz4 = imin(w4, h4);
531
24.6k
            refmvs_candidate *const same = &mvstack[*cnt];
532
24.6k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
35.1k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
18.4k
                const refmvs_block *const cand_b = &b_top[x];
537
18.4k
                add_compound_extended_candidate(same, same_count, cand_b,
538
18.4k
                                                sign0, sign1, ref, rf->sign_bias);
539
18.4k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
18.4k
            }
541
542
            // non-self references in left
543
36.1k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
18.7k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
18.7k
                add_compound_extended_candidate(same, same_count, cand_b,
546
18.7k
                                                sign0, sign1, ref, rf->sign_bias);
547
18.7k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
18.7k
            }
549
550
24.6k
            refmvs_candidate *const diff = &same[2];
551
24.6k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
74.0k
            for (int n = 0; n < 2; n++) {
555
49.3k
                int m = same_count[n];
556
557
49.3k
                if (m >= 2) continue;
558
559
40.3k
                const int l = diff_count[n];
560
40.3k
                if (l) {
561
31.7k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
31.7k
                    if (++m == 2) continue;
563
11.3k
                    if (l == 2) {
564
8.02k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
8.02k
                        continue;
566
8.02k
                    }
567
11.3k
                }
568
18.1k
                do {
569
18.1k
                    same[m].mv.mv[n] = tgmv[n];
570
18.1k
                } while (++m < 2);
571
11.8k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
24.6k
            int n = *cnt;
576
24.6k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
8.60k
                mvstack[1].mv = mvstack[2].mv;
578
38.1k
            do {
579
38.1k
                mvstack[n].weight = 2;
580
38.1k
            } while (++n < 2);
581
24.6k
            *cnt = 2;
582
24.6k
        }
583
584
        // clamping
585
31.9k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
31.9k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
31.9k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
31.9k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
31.9k
        const int n_refmvs = *cnt;
591
31.9k
        int n = 0;
592
68.2k
        do {
593
68.2k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
68.2k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
68.2k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
68.2k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
68.2k
        } while (++n < n_refmvs);
598
599
31.9k
        switch (refmv_ctx >> 1) {
600
14.8k
        case 0:
601
14.8k
            *ctx = imin(newmv_ctx, 1);
602
14.8k
            break;
603
11.4k
        case 1:
604
11.4k
            *ctx = 1 + imin(newmv_ctx, 3);
605
11.4k
            break;
606
5.65k
        case 2:
607
5.65k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
5.65k
            break;
609
31.9k
        }
610
611
31.9k
        return;
612
327k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
81.8k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
81.8k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
102k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
52.5k
            const refmvs_block *const cand_b = &b_top[x];
619
52.5k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
52.5k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
52.5k
        }
622
623
        // non-self references in left
624
100k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
47.9k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
47.9k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
47.9k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
47.9k
        }
629
81.8k
    }
630
327k
    assert(*cnt <= 8);
631
632
    // clamping
633
327k
    int n_refmvs = *cnt;
634
327k
    if (n_refmvs) {
635
304k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
304k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
304k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
304k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
304k
        int n = 0;
641
767k
        do {
642
767k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
767k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
767k
        } while (++n < n_refmvs);
645
304k
    }
646
647
444k
    for (int n = *cnt; n < 2; n++)
648
117k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
327k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
327k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
63.6k
{
658
63.6k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
63.6k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
63.6k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
63.6k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
40.5k
        35 * 2 * rf->n_blocks : 0;
663
63.6k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
63.6k
    const int sbsz = rf->sbsz;
665
63.6k
    const int off = (sbsz * sby) & 16;
666
1.39M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
1.32M
        rt->r[off + 5 + i] = r;
668
63.6k
    rt->r[off + 0] = r;
669
63.6k
    r += r_stride;
670
63.6k
    rt->r[off + 1] = NULL;
671
63.6k
    rt->r[off + 2] = r;
672
63.6k
    r += r_stride;
673
63.6k
    rt->r[off + 3] = NULL;
674
63.6k
    rt->r[off + 4] = r;
675
63.6k
    if (sby & 1) {
676
57.2k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
19.0k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
19.0k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
19.0k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
19.0k
#undef EXCHANGE
681
19.0k
    }
682
683
63.6k
    rt->rf = rf;
684
63.6k
    rt->tile_row.start = tile_row_start4;
685
63.6k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
63.6k
    rt->tile_col.start = tile_col_start4;
687
63.6k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
63.6k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
3.01k
{
694
3.01k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
3.01k
    assert(row_start8 >= 0);
696
3.01k
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
3.01k
    row_end8 = imin(row_end8, rf->ih8);
698
3.01k
    const int col_start8i = imax(col_start8 - 8, 0);
699
3.01k
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
3.01k
    const ptrdiff_t stride = rf->rp_stride;
702
3.01k
    refmvs_temporal_block *rp_proj =
703
3.01k
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
19.5k
    for (int y = row_start8; y < row_end8; y++) {
705
55.2k
        for (int x = col_start8; x < col_end8; x++)
706
38.6k
            rp_proj[x].mv.n = INVALID_MV;
707
16.5k
        rp_proj += stride;
708
16.5k
    }
709
710
3.01k
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
4.80k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
1.78k
        const int ref2cur = rf->mfmv_ref2cur[n];
713
1.78k
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
1.43k
        const int ref = rf->mfmv_ref[n];
716
1.43k
        const int ref_sign = ref - 4;
717
1.43k
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
10.3k
        for (int y = row_start8; y < row_end8; y++) {
719
8.90k
            const int y_sb_align = y & ~7;
720
8.90k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
8.90k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
22.8k
            for (int x = col_start8i; x < col_end8i; x++) {
723
13.9k
                const refmvs_temporal_block *rb = &r[x];
724
13.9k
                const int b_ref = rb->ref;
725
13.9k
                if (!b_ref) continue;
726
8.25k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
8.25k
                if (!ref2ref) continue;
728
4.30k
                const mv b_mv = rb->mv;
729
4.30k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
4.30k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
4.30k
                                           offset.x ^ ref_sign);
732
4.30k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
4.30k
                                                 offset.y ^ ref_sign);
734
4.30k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
3.79k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
16.8k
                    for (;;) {
737
16.8k
                        const int x_sb_align = x & ~7;
738
16.8k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
16.5k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
16.2k
                        {
741
16.2k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
16.2k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
16.2k
                        }
744
16.8k
                        if (++x >= col_end8i) break;
745
13.6k
                        rb++;
746
13.6k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
13.0k
                        pos_x++;
748
13.0k
                    }
749
3.79k
                } else {
750
1.10k
                    for (;;) {
751
1.10k
                        if (++x >= col_end8i) break;
752
724
                        rb++;
753
724
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
724
                    }
755
504
                }
756
4.30k
                x--;
757
4.30k
            }
758
8.90k
            r += stride;
759
8.90k
        }
760
1.43k
    }
761
3.01k
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
9.14k
{
769
63.8k
    for (int y = row_start8; y < row_end8; y++) {
770
54.6k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
128k
        for (int x = col_start8; x < col_end8;) {
773
73.7k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
73.7k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
73.7k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
7.58k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
6.91k
            {
779
18.5k
                for (int n = 0; n < bw8; n++, x++)
780
11.6k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
11.6k
                                                      .ref = cand_b->ref.ref[1] };
782
66.8k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
25.2k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
23.9k
            {
785
95.3k
                for (int n = 0; n < bw8; n++, x++)
786
71.3k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
71.3k
                                                      .ref = cand_b->ref.ref[0] };
788
42.8k
            } else {
789
141k
                for (int n = 0; n < bw8; n++, x++) {
790
98.4k
                    rp[x].mv.n = 0;
791
98.4k
                    rp[x].ref = 0; // "invalid"
792
98.4k
                }
793
42.8k
            }
794
73.7k
        }
795
54.6k
        rp += stride;
796
54.6k
    }
797
9.14k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
19.6k
{
808
19.6k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
19.6k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
19.6k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
19.6k
    rf->sbsz = 16 << seq_hdr->sb128;
813
19.6k
    rf->frm_hdr = frm_hdr;
814
19.6k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
19.6k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
19.6k
    rf->iw4 = rf->iw8 << 1;
817
19.6k
    rf->ih4 = rf->ih8 << 1;
818
19.6k
    rf->rp = rp;
819
19.6k
    rf->rp_stride = rp_stride;
820
19.6k
    rf->n_tile_threads = n_tile_threads;
821
19.6k
    rf->n_frame_threads = n_frame_threads;
822
823
19.6k
    if (n_blocks != rf->n_blocks) {
824
19.6k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
19.6k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
19.6k
        dav1d_free_aligned(rf->r);
829
19.6k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
19.6k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
19.6k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
19.6k
        rf->n_blocks = n_blocks;
837
19.6k
    }
838
839
19.6k
    const int poc = frm_hdr->frame_offset;
840
157k
    for (int i = 0; i < 7; i++) {
841
137k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
137k
                                          ref_poc[i], poc);
843
137k
        rf->sign_bias[i] = poc_diff > 0;
844
137k
        rf->mfmv_sign[i] = poc_diff < 0;
845
137k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
137k
                                            poc, ref_poc[i]), -31, 31);
847
137k
    }
848
849
    // temporal MV setup
850
19.6k
    rf->n_mfmvs = 0;
851
19.6k
    rf->rp_ref = rp_ref;
852
19.6k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
2.69k
        int total = 2;
854
2.69k
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
206
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
206
            total = 3;
857
206
        }
858
2.69k
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
466
                                      frm_hdr->frame_offset) > 0)
860
267
        {
861
267
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
267
        }
863
2.69k
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
478
                                      frm_hdr->frame_offset) > 0)
865
255
        {
866
255
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
255
        }
868
2.69k
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
338
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
338
                         frm_hdr->frame_offset) > 0)
871
151
        {
872
151
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
151
        }
874
2.69k
        if (rf->n_mfmvs < total && rp_ref[1])
875
572
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
4.14k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
1.45k
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
1.45k
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
1.45k
                                           rpoc, frm_hdr->frame_offset);
881
1.45k
            if (abs(diff1) > 31) {
882
274
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
1.17k
            } else {
884
1.17k
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
9.41k
                for (int m = 0; m < 7; m++) {
886
8.23k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
8.23k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
8.23k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
8.23k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
8.23k
                }
892
1.17k
            }
893
1.45k
        }
894
2.69k
    }
895
19.6k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
19.6k
    return 0;
898
19.6k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
640k
{
903
2.28M
    do {
904
2.28M
        refmvs_block *const r = *rr++ + bx4;
905
21.1M
        for (int x = 0; x < bw4; x++)
906
18.8M
            r[x] = *rmv;
907
2.28M
    } while (--bh4);
908
640k
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
39.4k
{
922
39.4k
    c->load_tmvs = load_tmvs_c;
923
39.4k
    c->save_tmvs = save_tmvs_c;
924
39.4k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
39.4k
}