Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
1.54M
{
47
1.54M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
1.22M
    if (ref.ref[1] == -1) {
50
1.29M
        for (int n = 0; n < 2; n++) {
51
1.22M
            if (b->ref.ref[n] == ref.ref[0]) {
52
1.08M
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
1.05M
                                   gmv[0] : b->mv.mv[n];
54
55
1.08M
                *have_refmv_match = 1;
56
1.08M
                *have_newmv_match |= b->mf >> 1;
57
58
1.08M
                const int last = *cnt;
59
2.15M
                for (int m = 0; m < last; m++)
60
1.49M
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
425k
                        mvstack[m].weight += weight;
62
425k
                        return;
63
425k
                    }
64
65
662k
                if (last < 8) {
66
658k
                    mvstack[last].mv.mv[0] = cand_mv;
67
658k
                    mvstack[last].weight = weight;
68
658k
                    *cnt = last + 1;
69
658k
                }
70
662k
                return;
71
1.08M
            }
72
1.22M
        }
73
1.15M
    } else if (b->ref.pair == ref.pair) {
74
25.9k
        const refmvs_mvpair cand_mv = { .mv = {
75
25.9k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
25.9k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
25.9k
        }};
78
79
25.9k
        *have_refmv_match = 1;
80
25.9k
        *have_newmv_match |= b->mf >> 1;
81
82
25.9k
        const int last = *cnt;
83
39.0k
        for (int n = 0; n < last; n++)
84
22.0k
            if (mvstack[n].mv.n == cand_mv.n) {
85
8.96k
                mvstack[n].weight += weight;
86
8.96k
                return;
87
8.96k
            }
88
89
16.9k
        if (last < 8) {
90
16.9k
            mvstack[last].mv = cand_mv;
91
16.9k
            mvstack[last].weight = weight;
92
16.9k
            *cnt = last + 1;
93
16.9k
        }
94
16.9k
    }
95
1.22M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
478k
{
103
478k
    const refmvs_block *cand_b = b;
104
478k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
478k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
478k
    int cand_bw4 = first_cand_b_dim[0];
107
478k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
478k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
416k
        const int weight = bw4 == 1 ? 2 :
115
416k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
416k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
416k
                              have_newmv_match, have_refmv_match);
118
416k
        return weight >> 1;
119
416k
    }
120
121
127k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
127k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
127k
                              have_newmv_match, have_refmv_match);
127
127k
        x += len;
128
127k
        if (x >= w4) return 1;
129
65.4k
        cand_b = &b[x];
130
65.4k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
65.4k
        assert(cand_bw4 < bw4);
132
65.4k
        len = imax(step, cand_bw4);
133
65.4k
    }
134
61.8k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
565k
{
142
565k
    const refmvs_block *cand_b = &b[0][bx4];
143
565k
    const enum BlockSize first_cand_bs = cand_b->bs;
144
565k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
565k
    int cand_bh4 = first_cand_b_dim[1];
146
565k
    int len = imax(step, imin(bh4, cand_bh4));
147
148
565k
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
490k
        const int weight = bh4 == 1 ? 2 :
154
490k
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
490k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
490k
                            have_newmv_match, have_refmv_match);
157
490k
        return weight >> 1;
158
490k
    }
159
160
149k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
149k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
149k
                              have_newmv_match, have_refmv_match);
166
149k
        y += len;
167
149k
        if (y >= h4) return 1;
168
74.4k
        cand_b = &b[y][bx4];
169
74.4k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
74.4k
        assert(cand_bh4 < bh4);
171
74.4k
        len = imax(step, cand_bh4);
172
74.4k
    }
173
75.2k
}
174
175
24.5k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
24.5k
    static const uint16_t div_mult[32] = {
177
24.5k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
24.5k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
24.5k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
24.5k
         682,   655,  630,  606,  585,  564,  546,  528
181
24.5k
    };
182
24.5k
    assert(den > 0 && den < 32);
183
24.5k
    assert(num > -32 && num < 32);
184
24.5k
    const int frac = num * div_mult[den];
185
24.5k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
24.5k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
24.5k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
24.5k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
24.5k
    };
191
24.5k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
33.4k
{
199
33.4k
    if (rb->mv.n == INVALID_MV) return;
200
201
9.68k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
9.68k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
9.68k
    const int last = *cnt;
205
9.68k
    if (ref.ref[1] == -1) {
206
7.35k
        if (globalmv_ctx)
207
1.60k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
7.77k
        for (int n = 0; n < last; n++)
210
6.21k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
5.80k
                mvstack[n].weight += 2;
212
5.80k
                return;
213
5.80k
            }
214
1.55k
        if (last < 8) {
215
1.55k
            mvstack[last].mv.mv[0] = mv;
216
1.55k
            mvstack[last].weight = 2;
217
1.55k
            *cnt = last + 1;
218
1.55k
        }
219
2.32k
    } else {
220
2.32k
        refmvs_mvpair mvp = { .mv = {
221
2.32k
            [0] = mv,
222
2.32k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
2.32k
        }};
224
2.32k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
2.49k
        for (int n = 0; n < last; n++)
227
1.65k
            if (mvstack[n].mv.n == mvp.n) {
228
1.48k
                mvstack[n].weight += 2;
229
1.48k
                return;
230
1.48k
            }
231
842
        if (last < 8) {
232
842
            mvstack[last].mv = mvp;
233
842
            mvstack[last].weight = 2;
234
842
            *cnt = last + 1;
235
842
        }
236
842
    }
237
9.68k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
22.3k
{
246
22.3k
    refmvs_candidate *const diff = &same[2];
247
22.3k
    int *const diff_count = &same_count[2];
248
249
57.6k
    for (int n = 0; n < 2; n++) {
250
43.9k
        const int cand_ref = cand_b->ref.ref[n];
251
252
43.9k
        if (cand_ref <= 0) break;
253
254
35.3k
        mv cand_mv = cand_b->mv.mv[n];
255
35.3k
        if (cand_ref == ref.ref[0]) {
256
12.2k
            if (same_count[0] < 2)
257
11.8k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
12.2k
            if (diff_count[1] < 2) {
259
10.6k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
447
                    cand_mv.y = -cand_mv.y;
261
447
                    cand_mv.x = -cand_mv.x;
262
447
                }
263
10.6k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
10.6k
            }
265
23.1k
        } else if (cand_ref == ref.ref[1]) {
266
12.2k
            if (same_count[1] < 2)
267
12.0k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
12.2k
            if (diff_count[0] < 2) {
269
10.3k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
515
                    cand_mv.y = -cand_mv.y;
271
515
                    cand_mv.x = -cand_mv.x;
272
515
                }
273
10.3k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
10.3k
            }
275
12.2k
        } else {
276
10.8k
            mv i_cand_mv = (union mv) {
277
10.8k
                .x = -cand_mv.x,
278
10.8k
                .y = -cand_mv.y
279
10.8k
            };
280
281
10.8k
            if (diff_count[0] < 2) {
282
8.57k
                diff[diff_count[0]++].mv.mv[0] =
283
8.57k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
8.37k
                    i_cand_mv : cand_mv;
285
8.57k
            }
286
287
10.8k
            if (diff_count[1] < 2) {
288
8.14k
                diff[diff_count[1]++].mv.mv[1] =
289
8.14k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
7.98k
                    i_cand_mv : cand_mv;
291
8.14k
            }
292
10.8k
        }
293
35.3k
    }
294
22.3k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
152k
{
300
306k
    for (int n = 0; n < 2; n++) {
301
302k
        const int cand_ref = cand_b->ref.ref[n];
302
303
302k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
153k
        mv cand_mv = cand_b->mv.mv[n];
311
153k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
1.12k
            cand_mv.y = -cand_mv.y;
313
1.12k
            cand_mv.x = -cand_mv.x;
314
1.12k
        }
315
316
153k
        int m;
317
153k
        const int last = *cnt;
318
163k
        for (m = 0; m < last; m++)
319
147k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
138k
                break;
321
153k
        if (m == last) {
322
15.4k
            mvstack[m].mv.mv[0] = cand_mv;
323
15.4k
            mvstack[m].weight = 2; // "minimal"
324
15.4k
            *cnt = last + 1;
325
15.4k
        }
326
153k
    }
327
152k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
336k
{
355
336k
    const refmvs_frame *const rf = rt->rf;
356
336k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
336k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
336k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
336k
    mv gmv[2], tgmv[2];
360
361
336k
    *cnt = 0;
362
336k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
336k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
336k
    if (ref.ref[0] > 0) {
365
178k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
178k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
178k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
133k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
178k
    } else {
370
158k
        tgmv[0] = (mv) { .n = 0 };
371
158k
        gmv[0] = (mv) { .n = INVALID_MV };
372
158k
    }
373
336k
    if (ref.ref[1] > 0) {
374
30.8k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
30.8k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
30.8k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
15.5k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
30.8k
    }
379
380
    // top
381
336k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
336k
    unsigned max_rows = 0, n_rows = ~0;
383
336k
    const refmvs_block *b_top;
384
336k
    if (by4 > rt->tile_row.start) {
385
249k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
249k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
249k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
249k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
249k
                          &have_newmv, &have_row_mvs);
390
249k
    }
391
392
    // left
393
336k
    unsigned max_cols = 0, n_cols = ~0U;
394
336k
    refmvs_block *const *b_left;
395
336k
    if (bx4 > rt->tile_col.start) {
396
266k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
266k
        b_left = &rt->r[(by4 & 31) + 5];
398
266k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
266k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
266k
                          &have_newmv, &have_col_mvs);
401
266k
    }
402
403
    // top/right
404
336k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
158k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
138k
    {
407
138k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
138k
                              &have_newmv, &have_row_mvs);
409
138k
    }
410
411
336k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
336k
    const int nearest_cnt = *cnt;
413
741k
    for (int n = 0; n < nearest_cnt; n++)
414
404k
        mvstack[n].weight += 640;
415
416
    // temporal
417
336k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
336k
    if (rf->use_ref_frame_mvs) {
419
11.2k
        const ptrdiff_t stride = rf->rp_stride;
420
11.2k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
11.2k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
11.2k
        const refmvs_temporal_block *rb = rbi;
423
11.2k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
11.2k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
35.3k
        for (int y = 0; y < h8; y += step_v) {
426
56.8k
            for (int x = 0; x < w8; x+= step_h) {
427
32.8k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
32.8k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
32.8k
            }
430
24.0k
            rb += stride * step_v;
431
24.0k
        }
432
11.2k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
5.68k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
5.68k
            rb = &rbi[bh8 * stride];
435
5.68k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
5.68k
                                                    (by8 & ~7) + 8);
437
5.68k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
125
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
125
                                       NULL, NULL);
440
125
            }
441
5.68k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
720
                if (has_bottom) {
443
133
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
133
                                           NULL, NULL);
445
133
                }
446
720
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
377
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
377
                                           ref, NULL, NULL);
449
377
                }
450
720
            }
451
5.68k
        }
452
11.2k
    }
453
336k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
336k
    int have_dummy_newmv_match;
457
336k
    if ((n_rows | n_cols) != ~0U) {
458
227k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
227k
                              &have_dummy_newmv_match, &have_row_mvs);
460
227k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
1.00M
    for (int n = 2; n <= 3; n++) {
465
672k
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
228k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
228k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
228k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
228k
                               &have_dummy_newmv_match, &have_row_mvs);
470
228k
        }
471
472
672k
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
299k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
299k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
299k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
299k
                               &have_dummy_newmv_match, &have_col_mvs);
477
299k
        }
478
672k
    }
479
336k
    assert(*cnt <= 8);
480
481
336k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
336k
    int refmv_ctx, newmv_ctx;
485
336k
    switch (nearest_match) {
486
89.5k
    case 0:
487
89.5k
        refmv_ctx = imin(2, ref_match_count);
488
89.5k
        newmv_ctx = ref_match_count > 0;
489
89.5k
        break;
490
96.1k
    case 1:
491
96.1k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
96.1k
        newmv_ctx = 3 - have_newmv;
493
96.1k
        break;
494
150k
    case 2:
495
150k
        refmv_ctx = 5;
496
150k
        newmv_ctx = 5 - have_newmv;
497
150k
        break;
498
336k
    }
499
500
    // sorting (nearest, then "secondary")
501
336k
    int len = nearest_cnt;
502
650k
    while (len) {
503
313k
        int last = 0;
504
493k
        for (int n = 1; n < len; n++) {
505
179k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
127k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
73.5k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
73.5k
                last = n;
509
73.5k
            }
510
179k
        }
511
313k
        len = last;
512
313k
    }
513
336k
    len = *cnt;
514
530k
    while (len > nearest_cnt) {
515
193k
        int last = nearest_cnt;
516
336k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
142k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
53.7k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
53.7k
#undef EXCHANGE
520
53.7k
                last = n;
521
53.7k
            }
522
142k
        }
523
193k
        len = last;
524
193k
    }
525
526
336k
    if (ref.ref[1] > 0) {
527
30.8k
        if (*cnt < 2) {
528
26.7k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
26.7k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
26.7k
            const int sz4 = imin(w4, h4);
531
26.7k
            refmvs_candidate *const same = &mvstack[*cnt];
532
26.7k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
26.7k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
9.71k
                const refmvs_block *const cand_b = &b_top[x];
537
9.71k
                add_compound_extended_candidate(same, same_count, cand_b,
538
9.71k
                                                sign0, sign1, ref, rf->sign_bias);
539
9.71k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
9.71k
            }
541
542
            // non-self references in left
543
26.7k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
12.6k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
12.6k
                add_compound_extended_candidate(same, same_count, cand_b,
546
12.6k
                                                sign0, sign1, ref, rf->sign_bias);
547
12.6k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
12.6k
            }
549
550
26.7k
            refmvs_candidate *const diff = &same[2];
551
26.7k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
80.1k
            for (int n = 0; n < 2; n++) {
555
53.4k
                int m = same_count[n];
556
557
53.4k
                if (m >= 2) continue;
558
559
48.1k
                const int l = diff_count[n];
560
48.1k
                if (l) {
561
19.2k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
19.2k
                    if (++m == 2) continue;
563
7.14k
                    if (l == 2) {
564
5.38k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
5.38k
                        continue;
566
5.38k
                    }
567
7.14k
                }
568
58.2k
                do {
569
58.2k
                    same[m].mv.mv[n] = tgmv[n];
570
58.2k
                } while (++m < 2);
571
30.6k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
26.7k
            int n = *cnt;
576
26.7k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
5.41k
                mvstack[1].mv = mvstack[2].mv;
578
46.5k
            do {
579
46.5k
                mvstack[n].weight = 2;
580
46.5k
            } while (++n < 2);
581
26.7k
            *cnt = 2;
582
26.7k
        }
583
584
        // clamping
585
30.8k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
30.8k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
30.8k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
30.8k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
30.8k
        const int n_refmvs = *cnt;
591
30.8k
        int n = 0;
592
64.3k
        do {
593
64.3k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
64.3k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
64.3k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
64.3k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
64.3k
        } while (++n < n_refmvs);
598
599
30.8k
        switch (refmv_ctx >> 1) {
600
21.0k
        case 0:
601
21.0k
            *ctx = imin(newmv_ctx, 1);
602
21.0k
            break;
603
6.49k
        case 1:
604
6.49k
            *ctx = 1 + imin(newmv_ctx, 3);
605
6.49k
            break;
606
3.34k
        case 2:
607
3.34k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
3.34k
            break;
609
30.8k
        }
610
611
30.8k
        return;
612
305k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
121k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
121k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
147k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
75.5k
            const refmvs_block *const cand_b = &b_top[x];
619
75.5k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
75.5k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
75.5k
        }
622
623
        // non-self references in left
624
151k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
77.3k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
77.3k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
77.3k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
77.3k
        }
629
121k
    }
630
305k
    assert(*cnt <= 8);
631
632
    // clamping
633
305k
    int n_refmvs = *cnt;
634
305k
    if (n_refmvs) {
635
257k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
257k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
257k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
257k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
257k
        int n = 0;
641
675k
        do {
642
675k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
675k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
675k
        } while (++n < n_refmvs);
645
257k
    }
646
647
500k
    for (int n = *cnt; n < 2; n++)
648
195k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
305k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
305k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
136k
{
658
136k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
136k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
136k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
136k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
71.9k
        35 * 2 * rf->n_blocks : 0;
663
136k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
136k
    const int sbsz = rf->sbsz;
665
136k
    const int off = (sbsz * sby) & 16;
666
3.11M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
2.97M
        rt->r[off + 5 + i] = r;
668
136k
    rt->r[off + 0] = r;
669
136k
    r += r_stride;
670
136k
    rt->r[off + 1] = NULL;
671
136k
    rt->r[off + 2] = r;
672
136k
    r += r_stride;
673
136k
    rt->r[off + 3] = NULL;
674
136k
    rt->r[off + 4] = r;
675
136k
    if (sby & 1) {
676
56.6k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
18.8k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
18.8k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
18.8k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
18.8k
#undef EXCHANGE
681
18.8k
    }
682
683
136k
    rt->rf = rf;
684
136k
    rt->tile_row.start = tile_row_start4;
685
136k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
136k
    rt->tile_col.start = tile_col_start4;
687
136k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
136k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
9.01k
{
694
9.01k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
9.01k
    assert(row_start8 >= 0);
696
9.01k
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
9.01k
    row_end8 = imin(row_end8, rf->ih8);
698
9.01k
    const int col_start8i = imax(col_start8 - 8, 0);
699
9.01k
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
9.01k
    const ptrdiff_t stride = rf->rp_stride;
702
9.01k
    refmvs_temporal_block *rp_proj =
703
9.01k
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
55.6k
    for (int y = row_start8; y < row_end8; y++) {
705
107k
        for (int x = col_start8; x < col_end8; x++)
706
60.4k
            rp_proj[x].mv.n = INVALID_MV;
707
46.6k
        rp_proj += stride;
708
46.6k
    }
709
710
9.01k
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
19.1k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
10.1k
        const int ref2cur = rf->mfmv_ref2cur[n];
713
10.1k
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
8.58k
        const int ref = rf->mfmv_ref[n];
716
8.58k
        const int ref_sign = ref - 4;
717
8.58k
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
58.4k
        for (int y = row_start8; y < row_end8; y++) {
719
49.8k
            const int y_sb_align = y & ~7;
720
49.8k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
49.8k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
105k
            for (int x = col_start8i; x < col_end8i; x++) {
723
55.2k
                const refmvs_temporal_block *rb = &r[x];
724
55.2k
                const int b_ref = rb->ref;
725
55.2k
                if (!b_ref) continue;
726
20.8k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
20.8k
                if (!ref2ref) continue;
728
12.5k
                const mv b_mv = rb->mv;
729
12.5k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
12.5k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
12.5k
                                           offset.x ^ ref_sign);
732
12.5k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
12.5k
                                                 offset.y ^ ref_sign);
734
12.5k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
11.6k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
16.6k
                    for (;;) {
737
16.6k
                        const int x_sb_align = x & ~7;
738
16.6k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
16.3k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
15.9k
                        {
741
15.9k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
15.9k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
15.9k
                        }
744
16.6k
                        if (++x >= col_end8i) break;
745
7.15k
                        rb++;
746
7.15k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
5.00k
                        pos_x++;
748
5.00k
                    }
749
11.6k
                } else {
750
988
                    for (;;) {
751
988
                        if (++x >= col_end8i) break;
752
521
                        rb++;
753
521
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
521
                    }
755
896
                }
756
12.5k
                x--;
757
12.5k
            }
758
49.8k
            r += stride;
759
49.8k
        }
760
8.58k
    }
761
9.01k
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
15.0k
{
769
97.0k
    for (int y = row_start8; y < row_end8; y++) {
770
81.9k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
165k
        for (int x = col_start8; x < col_end8;) {
773
83.4k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
83.4k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
83.4k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
3.56k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
3.56k
            {
779
29.6k
                for (int n = 0; n < bw8; n++, x++)
780
26.1k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
26.1k
                                                      .ref = cand_b->ref.ref[1] };
782
79.9k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
22.1k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
22.1k
            {
785
153k
                for (int n = 0; n < bw8; n++, x++)
786
131k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
131k
                                                      .ref = cand_b->ref.ref[0] };
788
57.7k
            } else {
789
406k
                for (int n = 0; n < bw8; n++, x++) {
790
348k
                    rp[x].mv.n = 0;
791
348k
                    rp[x].ref = 0; // "invalid"
792
348k
                }
793
57.7k
            }
794
83.4k
        }
795
81.9k
        rp += stride;
796
81.9k
    }
797
15.0k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
57.2k
{
808
57.2k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
57.2k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
57.2k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
57.2k
    rf->sbsz = 16 << seq_hdr->sb128;
813
57.2k
    rf->frm_hdr = frm_hdr;
814
57.2k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
57.2k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
57.2k
    rf->iw4 = rf->iw8 << 1;
817
57.2k
    rf->ih4 = rf->ih8 << 1;
818
57.2k
    rf->rp = rp;
819
57.2k
    rf->rp_stride = rp_stride;
820
57.2k
    rf->n_tile_threads = n_tile_threads;
821
57.2k
    rf->n_frame_threads = n_frame_threads;
822
823
57.2k
    if (n_blocks != rf->n_blocks) {
824
18.2k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
18.2k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
18.2k
        dav1d_free_aligned(rf->r);
829
18.2k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
18.2k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
18.2k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
18.2k
        rf->n_blocks = n_blocks;
837
18.2k
    }
838
839
57.2k
    const int poc = frm_hdr->frame_offset;
840
457k
    for (int i = 0; i < 7; i++) {
841
400k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
400k
                                          ref_poc[i], poc);
843
400k
        rf->sign_bias[i] = poc_diff > 0;
844
400k
        rf->mfmv_sign[i] = poc_diff < 0;
845
400k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
400k
                                            poc, ref_poc[i]), -31, 31);
847
400k
    }
848
849
    // temporal MV setup
850
57.2k
    rf->n_mfmvs = 0;
851
57.2k
    rf->rp_ref = rp_ref;
852
57.2k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
8.41k
        int total = 2;
854
8.41k
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
2.07k
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
2.07k
            total = 3;
857
2.07k
        }
858
8.41k
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
6.96k
                                      frm_hdr->frame_offset) > 0)
860
487
        {
861
487
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
487
        }
863
8.41k
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
5.52k
                                      frm_hdr->frame_offset) > 0)
865
200
        {
866
200
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
200
        }
868
8.41k
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
4.16k
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
4.16k
                         frm_hdr->frame_offset) > 0)
871
1.64k
        {
872
1.64k
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
1.64k
        }
874
8.41k
        if (rf->n_mfmvs < total && rp_ref[1])
875
5.30k
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
18.1k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
9.70k
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
9.70k
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
9.70k
                                           rpoc, frm_hdr->frame_offset);
881
9.70k
            if (abs(diff1) > 31) {
882
1.39k
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
8.30k
            } else {
884
8.30k
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
66.4k
                for (int m = 0; m < 7; m++) {
886
58.1k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
58.1k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
58.1k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
58.1k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
58.1k
                }
892
8.30k
            }
893
9.70k
        }
894
8.41k
    }
895
57.2k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
57.2k
    return 0;
898
57.2k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
668k
{
903
3.70M
    do {
904
3.70M
        refmvs_block *const r = *rr++ + bx4;
905
42.2M
        for (int x = 0; x < bw4; x++)
906
38.5M
            r[x] = *rmv;
907
3.70M
    } while (--bh4);
908
668k
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
16.9k
{
922
16.9k
    c->load_tmvs = load_tmvs_c;
923
16.9k
    c->save_tmvs = save_tmvs_c;
924
16.9k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
16.9k
}