Coverage Report

Created: 2026-06-10 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
1.11M
{
47
1.11M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
1.00M
    if (ref.ref[1] == -1) {
50
1.17M
        for (int n = 0; n < 2; n++) {
51
1.03M
            if (b->ref.ref[n] == ref.ref[0]) {
52
749k
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
729k
                                   gmv[0] : b->mv.mv[n];
54
55
749k
                *have_refmv_match = 1;
56
749k
                *have_newmv_match |= b->mf >> 1;
57
58
749k
                const int last = *cnt;
59
1.33M
                for (int m = 0; m < last; m++)
60
814k
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
233k
                        mvstack[m].weight += weight;
62
233k
                        return;
63
233k
                    }
64
65
515k
                if (last < 8) {
66
515k
                    mvstack[last].mv.mv[0] = cand_mv;
67
515k
                    mvstack[last].weight = weight;
68
515k
                    *cnt = last + 1;
69
515k
                }
70
515k
                return;
71
749k
            }
72
1.03M
        }
73
883k
    } else if (b->ref.pair == ref.pair) {
74
39.3k
        const refmvs_mvpair cand_mv = { .mv = {
75
39.3k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
39.3k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
39.3k
        }};
78
79
39.3k
        *have_refmv_match = 1;
80
39.3k
        *have_newmv_match |= b->mf >> 1;
81
82
39.3k
        const int last = *cnt;
83
59.6k
        for (int n = 0; n < last; n++)
84
33.4k
            if (mvstack[n].mv.n == cand_mv.n) {
85
13.1k
                mvstack[n].weight += weight;
86
13.1k
                return;
87
13.1k
            }
88
89
26.1k
        if (last < 8) {
90
26.1k
            mvstack[last].mv = cand_mv;
91
26.1k
            mvstack[last].weight = weight;
92
26.1k
            *cnt = last + 1;
93
26.1k
        }
94
26.1k
    }
95
1.00M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
346k
{
103
346k
    const refmvs_block *cand_b = b;
104
346k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
346k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
346k
    int cand_bw4 = first_cand_b_dim[0];
107
346k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
346k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
308k
        const int weight = bw4 == 1 ? 2 :
115
308k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
308k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
308k
                              have_newmv_match, have_refmv_match);
118
308k
        return weight >> 1;
119
308k
    }
120
121
66.0k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
66.0k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
66.0k
                              have_newmv_match, have_refmv_match);
127
66.0k
        x += len;
128
66.0k
        if (x >= w4) return 1;
129
28.4k
        cand_b = &b[x];
130
28.4k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
28.4k
        assert(cand_bw4 < bw4);
132
28.4k
        len = imax(step, cand_bw4);
133
28.4k
    }
134
37.6k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
465k
{
142
465k
    const refmvs_block *cand_b = &b[0][bx4];
143
465k
    const enum BlockSize first_cand_bs = cand_b->bs;
144
465k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
465k
    int cand_bh4 = first_cand_b_dim[1];
146
465k
    int len = imax(step, imin(bh4, cand_bh4));
147
148
465k
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
420k
        const int weight = bh4 == 1 ? 2 :
154
420k
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
420k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
420k
                            have_newmv_match, have_refmv_match);
157
420k
        return weight >> 1;
158
420k
    }
159
160
88.3k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
88.3k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
88.3k
                              have_newmv_match, have_refmv_match);
166
88.3k
        y += len;
167
88.3k
        if (y >= h4) return 1;
168
42.9k
        cand_b = &b[y][bx4];
169
42.9k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
42.9k
        assert(cand_bh4 < bh4);
171
42.9k
        len = imax(step, cand_bh4);
172
42.9k
    }
173
45.5k
}
174
175
9.82k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
9.82k
    static const uint16_t div_mult[32] = {
177
9.82k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
9.82k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
9.82k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
9.82k
         682,   655,  630,  606,  585,  564,  546,  528
181
9.82k
    };
182
9.82k
    assert(den > 0 && den < 32);
183
9.82k
    assert(num > -32 && num < 32);
184
9.82k
    const int frac = num * div_mult[den];
185
9.82k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
9.82k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
9.82k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
9.82k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
9.82k
    };
191
9.82k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
8.14k
{
199
8.14k
    if (rb->mv.n == INVALID_MV) return;
200
201
5.39k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
5.39k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
5.39k
    const int last = *cnt;
205
5.39k
    if (ref.ref[1] == -1) {
206
3.19k
        if (globalmv_ctx)
207
1.06k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
5.80k
        for (int n = 0; n < last; n++)
210
4.92k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
2.31k
                mvstack[n].weight += 2;
212
2.31k
                return;
213
2.31k
            }
214
887
        if (last < 8) {
215
863
            mvstack[last].mv.mv[0] = mv;
216
863
            mvstack[last].weight = 2;
217
863
            *cnt = last + 1;
218
863
        }
219
2.19k
    } else {
220
2.19k
        refmvs_mvpair mvp = { .mv = {
221
2.19k
            [0] = mv,
222
2.19k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
2.19k
        }};
224
2.19k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
3.65k
        for (int n = 0; n < last; n++)
227
3.07k
            if (mvstack[n].mv.n == mvp.n) {
228
1.61k
                mvstack[n].weight += 2;
229
1.61k
                return;
230
1.61k
            }
231
582
        if (last < 8) {
232
563
            mvstack[last].mv = mvp;
233
563
            mvstack[last].weight = 2;
234
563
            *cnt = last + 1;
235
563
        }
236
582
    }
237
5.39k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
32.1k
{
246
32.1k
    refmvs_candidate *const diff = &same[2];
247
32.1k
    int *const diff_count = &same_count[2];
248
249
81.9k
    for (int n = 0; n < 2; n++) {
250
63.0k
        const int cand_ref = cand_b->ref.ref[n];
251
252
63.0k
        if (cand_ref <= 0) break;
253
254
49.8k
        mv cand_mv = cand_b->mv.mv[n];
255
49.8k
        if (cand_ref == ref.ref[0]) {
256
17.4k
            if (same_count[0] < 2)
257
16.8k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
17.4k
            if (diff_count[1] < 2) {
259
14.8k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
181
                    cand_mv.y = -cand_mv.y;
261
181
                    cand_mv.x = -cand_mv.x;
262
181
                }
263
14.8k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
14.8k
            }
265
32.4k
        } else if (cand_ref == ref.ref[1]) {
266
17.0k
            if (same_count[1] < 2)
267
16.6k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
17.0k
            if (diff_count[0] < 2) {
269
13.9k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
189
                    cand_mv.y = -cand_mv.y;
271
189
                    cand_mv.x = -cand_mv.x;
272
189
                }
273
13.9k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
13.9k
            }
275
17.0k
        } else {
276
15.3k
            mv i_cand_mv = (union mv) {
277
15.3k
                .x = -cand_mv.x,
278
15.3k
                .y = -cand_mv.y
279
15.3k
            };
280
281
15.3k
            if (diff_count[0] < 2) {
282
12.1k
                diff[diff_count[0]++].mv.mv[0] =
283
12.1k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
12.0k
                    i_cand_mv : cand_mv;
285
12.1k
            }
286
287
15.3k
            if (diff_count[1] < 2) {
288
11.4k
                diff[diff_count[1]++].mv.mv[1] =
289
11.4k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
11.3k
                    i_cand_mv : cand_mv;
291
11.4k
            }
292
15.3k
        }
293
49.8k
    }
294
32.1k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
101k
{
300
202k
    for (int n = 0; n < 2; n++) {
301
197k
        const int cand_ref = cand_b->ref.ref[n];
302
303
197k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
101k
        mv cand_mv = cand_b->mv.mv[n];
311
101k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
201
            cand_mv.y = -cand_mv.y;
313
201
            cand_mv.x = -cand_mv.x;
314
201
        }
315
316
101k
        int m;
317
101k
        const int last = *cnt;
318
117k
        for (m = 0; m < last; m++)
319
94.1k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
78.4k
                break;
321
101k
        if (m == last) {
322
22.9k
            mvstack[m].mv.mv[0] = cand_mv;
323
22.9k
            mvstack[m].weight = 2; // "minimal"
324
22.9k
            *cnt = last + 1;
325
22.9k
        }
326
101k
    }
327
101k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
269k
{
355
269k
    const refmvs_frame *const rf = rt->rf;
356
269k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
269k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
269k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
269k
    mv gmv[2], tgmv[2];
360
361
269k
    *cnt = 0;
362
269k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
269k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
269k
    if (ref.ref[0] > 0) {
365
128k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
128k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
128k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
100k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
140k
    } else {
370
140k
        tgmv[0] = (mv) { .n = 0 };
371
140k
        gmv[0] = (mv) { .n = INVALID_MV };
372
140k
    }
373
269k
    if (ref.ref[1] > 0) {
374
25.0k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
25.0k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
25.0k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
20.4k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
25.0k
    }
379
380
    // top
381
269k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
269k
    unsigned max_rows = 0, n_rows = ~0;
383
269k
    const refmvs_block *b_top;
384
269k
    if (by4 > rt->tile_row.start) {
385
187k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
187k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
187k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
187k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
187k
                          &have_newmv, &have_row_mvs);
390
187k
    }
391
392
    // left
393
269k
    unsigned max_cols = 0, n_cols = ~0U;
394
269k
    refmvs_block *const *b_left;
395
269k
    if (bx4 > rt->tile_col.start) {
396
228k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
228k
        b_left = &rt->r[(by4 & 31) + 5];
398
228k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
228k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
228k
                          &have_newmv, &have_col_mvs);
401
228k
    }
402
403
    // top/right
404
269k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
114k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
86.1k
    {
407
86.1k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
86.1k
                              &have_newmv, &have_row_mvs);
409
86.1k
    }
410
411
269k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
269k
    const int nearest_cnt = *cnt;
413
601k
    for (int n = 0; n < nearest_cnt; n++)
414
332k
        mvstack[n].weight += 640;
415
416
    // temporal
417
269k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
269k
    if (rf->use_ref_frame_mvs) {
419
2.26k
        const ptrdiff_t stride = rf->rp_stride;
420
2.26k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
2.26k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
2.26k
        const refmvs_temporal_block *rb = rbi;
423
2.26k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
2.26k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
5.96k
        for (int y = 0; y < h8; y += step_v) {
426
9.62k
            for (int x = 0; x < w8; x+= step_h) {
427
5.92k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
5.92k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
5.92k
            }
430
3.70k
            rb += stride * step_v;
431
3.70k
        }
432
2.26k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
1.48k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
1.48k
            rb = &rbi[bh8 * stride];
435
1.48k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
1.48k
                                                    (by8 & ~7) + 8);
437
1.48k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
627
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
627
                                       NULL, NULL);
440
627
            }
441
1.48k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
991
                if (has_bottom) {
443
628
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
628
                                           NULL, NULL);
445
628
                }
446
991
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
969
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
969
                                           ref, NULL, NULL);
449
969
                }
450
991
            }
451
1.48k
        }
452
2.26k
    }
453
269k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
269k
    int have_dummy_newmv_match;
457
269k
    if ((n_rows | n_cols) != ~0U) {
458
150k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
150k
                              &have_dummy_newmv_match, &have_row_mvs);
460
150k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
807k
    for (int n = 2; n <= 3; n++) {
465
538k
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
159k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
159k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
159k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
159k
                               &have_dummy_newmv_match, &have_row_mvs);
470
159k
        }
471
472
538k
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
237k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
237k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
237k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
237k
                               &have_dummy_newmv_match, &have_col_mvs);
477
237k
        }
478
538k
    }
479
269k
    assert(*cnt <= 8);
480
481
269k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
269k
    int refmv_ctx, newmv_ctx;
485
269k
    switch (nearest_match) {
486
43.6k
    case 0:
487
43.6k
        refmv_ctx = imin(2, ref_match_count);
488
43.6k
        newmv_ctx = ref_match_count > 0;
489
43.6k
        break;
490
127k
    case 1:
491
127k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
127k
        newmv_ctx = 3 - have_newmv;
493
127k
        break;
494
98.4k
    case 2:
495
98.4k
        refmv_ctx = 5;
496
98.4k
        newmv_ctx = 5 - have_newmv;
497
98.4k
        break;
498
269k
    }
499
500
    // sorting (nearest, then "secondary")
501
269k
    int len = nearest_cnt;
502
549k
    while (len) {
503
280k
        int last = 0;
504
405k
        for (int n = 1; n < len; n++) {
505
125k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
61.0k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
56.2k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
56.2k
                last = n;
509
56.2k
            }
510
125k
        }
511
280k
        len = last;
512
280k
    }
513
269k
    len = *cnt;
514
424k
    while (len > nearest_cnt) {
515
154k
        int last = nearest_cnt;
516
217k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
63.1k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
4.80k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
4.80k
#undef EXCHANGE
520
4.80k
                last = n;
521
4.80k
            }
522
63.1k
        }
523
154k
        len = last;
524
154k
    }
525
526
269k
    if (ref.ref[1] > 0) {
527
25.0k
        if (*cnt < 2) {
528
18.1k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
18.1k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
18.1k
            const int sz4 = imin(w4, h4);
531
18.1k
            refmvs_candidate *const same = &mvstack[*cnt];
532
18.1k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
27.3k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
14.2k
                const refmvs_block *const cand_b = &b_top[x];
537
14.2k
                add_compound_extended_candidate(same, same_count, cand_b,
538
14.2k
                                                sign0, sign1, ref, rf->sign_bias);
539
14.2k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
14.2k
            }
541
542
            // non-self references in left
543
33.7k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
17.8k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
17.8k
                add_compound_extended_candidate(same, same_count, cand_b,
546
17.8k
                                                sign0, sign1, ref, rf->sign_bias);
547
17.8k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
17.8k
            }
549
550
18.1k
            refmvs_candidate *const diff = &same[2];
551
18.1k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
54.4k
            for (int n = 0; n < 2; n++) {
555
36.3k
                int m = same_count[n];
556
557
36.3k
                if (m >= 2) continue;
558
559
27.7k
                const int l = diff_count[n];
560
27.7k
                if (l) {
561
23.8k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
23.8k
                    if (++m == 2) continue;
563
8.77k
                    if (l == 2) {
564
6.72k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
6.72k
                        continue;
566
6.72k
                    }
567
8.77k
                }
568
8.51k
                do {
569
8.51k
                    same[m].mv.mv[n] = tgmv[n];
570
8.51k
                } while (++m < 2);
571
5.91k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
18.1k
            int n = *cnt;
576
18.1k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
6.35k
                mvstack[1].mv = mvstack[2].mv;
578
27.4k
            do {
579
27.4k
                mvstack[n].weight = 2;
580
27.4k
            } while (++n < 2);
581
18.1k
            *cnt = 2;
582
18.1k
        }
583
584
        // clamping
585
25.0k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
25.0k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
25.0k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
25.0k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
25.0k
        const int n_refmvs = *cnt;
591
25.0k
        int n = 0;
592
54.1k
        do {
593
54.1k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
54.1k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
54.1k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
54.1k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
54.1k
        } while (++n < n_refmvs);
598
599
25.0k
        switch (refmv_ctx >> 1) {
600
10.5k
        case 0:
601
10.5k
            *ctx = imin(newmv_ctx, 1);
602
10.5k
            break;
603
9.31k
        case 1:
604
9.31k
            *ctx = 1 + imin(newmv_ctx, 3);
605
9.31k
            break;
606
5.13k
        case 2:
607
5.13k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
5.13k
            break;
609
25.0k
        }
610
611
25.0k
        return;
612
244k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
64.8k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
64.8k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
92.7k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
47.1k
            const refmvs_block *const cand_b = &b_top[x];
619
47.1k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
47.1k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
47.1k
        }
622
623
        // non-self references in left
624
111k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
54.1k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
54.1k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
54.1k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
54.1k
        }
629
64.8k
    }
630
244k
    assert(*cnt <= 8);
631
632
    // clamping
633
244k
    int n_refmvs = *cnt;
634
244k
    if (n_refmvs) {
635
228k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
228k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
228k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
228k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
228k
        int n = 0;
641
540k
        do {
642
540k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
540k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
540k
        } while (++n < n_refmvs);
645
228k
    }
646
647
341k
    for (int n = *cnt; n < 2; n++)
648
96.9k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
244k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
244k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
52.1k
{
658
52.1k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
52.1k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
52.1k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
52.1k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
28.6k
        35 * 2 * rf->n_blocks : 0;
663
52.1k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
52.1k
    const int sbsz = rf->sbsz;
665
52.1k
    const int off = (sbsz * sby) & 16;
666
1.19M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
1.14M
        rt->r[off + 5 + i] = r;
668
52.1k
    rt->r[off + 0] = r;
669
52.1k
    r += r_stride;
670
52.1k
    rt->r[off + 1] = NULL;
671
52.1k
    rt->r[off + 2] = r;
672
52.1k
    r += r_stride;
673
52.1k
    rt->r[off + 3] = NULL;
674
52.1k
    rt->r[off + 4] = r;
675
52.1k
    if (sby & 1) {
676
61.0k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
20.3k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
20.3k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
20.3k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
20.3k
#undef EXCHANGE
681
20.3k
    }
682
683
52.1k
    rt->rf = rf;
684
52.1k
    rt->tile_row.start = tile_row_start4;
685
52.1k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
52.1k
    rt->tile_col.start = tile_col_start4;
687
52.1k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
52.1k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
969
{
694
969
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
969
    assert(row_start8 >= 0);
696
969
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
969
    row_end8 = imin(row_end8, rf->ih8);
698
969
    const int col_start8i = imax(col_start8 - 8, 0);
699
969
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
969
    const ptrdiff_t stride = rf->rp_stride;
702
969
    refmvs_temporal_block *rp_proj =
703
969
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
8.93k
    for (int y = row_start8; y < row_end8; y++) {
705
76.3k
        for (int x = col_start8; x < col_end8; x++)
706
68.4k
            rp_proj[x].mv.n = INVALID_MV;
707
7.96k
        rp_proj += stride;
708
7.96k
    }
709
710
969
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
1.42k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
453
        const int ref2cur = rf->mfmv_ref2cur[n];
713
453
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
431
        const int ref = rf->mfmv_ref[n];
716
431
        const int ref_sign = ref - 4;
717
431
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
2.87k
        for (int y = row_start8; y < row_end8; y++) {
719
2.43k
            const int y_sb_align = y & ~7;
720
2.43k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
2.43k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
7.76k
            for (int x = col_start8i; x < col_end8i; x++) {
723
5.32k
                const refmvs_temporal_block *rb = &r[x];
724
5.32k
                const int b_ref = rb->ref;
725
5.32k
                if (!b_ref) continue;
726
2.48k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
2.48k
                if (!ref2ref) continue;
728
2.22k
                const mv b_mv = rb->mv;
729
2.22k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
2.22k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
2.22k
                                           offset.x ^ ref_sign);
732
2.22k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
2.22k
                                                 offset.y ^ ref_sign);
734
2.22k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
1.84k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
7.03k
                    for (;;) {
737
7.03k
                        const int x_sb_align = x & ~7;
738
7.03k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
6.81k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
6.56k
                        {
741
6.56k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
6.56k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
6.56k
                        }
744
7.03k
                        if (++x >= col_end8i) break;
745
5.57k
                        rb++;
746
5.57k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
5.19k
                        pos_x++;
748
5.19k
                    }
749
1.84k
                } else {
750
1.16k
                    for (;;) {
751
1.16k
                        if (++x >= col_end8i) break;
752
919
                        rb++;
753
919
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
919
                    }
755
385
                }
756
2.22k
                x--;
757
2.22k
            }
758
2.43k
            r += stride;
759
2.43k
        }
760
431
    }
761
969
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
1.75k
{
769
17.2k
    for (int y = row_start8; y < row_end8; y++) {
770
15.4k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
57.2k
        for (int x = col_start8; x < col_end8;) {
773
41.8k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
41.8k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
41.8k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
8.20k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
4.99k
            {
779
22.2k
                for (int n = 0; n < bw8; n++, x++)
780
17.3k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
17.3k
                                                      .ref = cand_b->ref.ref[1] };
782
36.8k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
21.8k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
18.6k
            {
785
75.4k
                for (int n = 0; n < bw8; n++, x++)
786
56.8k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
56.8k
                                                      .ref = cand_b->ref.ref[0] };
788
18.6k
            } else {
789
79.7k
                for (int n = 0; n < bw8; n++, x++) {
790
61.6k
                    rp[x].mv.n = 0;
791
61.6k
                    rp[x].ref = 0; // "invalid"
792
61.6k
                }
793
18.1k
            }
794
41.8k
        }
795
15.4k
        rp += stride;
796
15.4k
    }
797
1.75k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
7.79k
{
808
7.79k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
7.79k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
7.79k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
7.79k
    rf->sbsz = 16 << seq_hdr->sb128;
813
7.79k
    rf->frm_hdr = frm_hdr;
814
7.79k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
7.79k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
7.79k
    rf->iw4 = rf->iw8 << 1;
817
7.79k
    rf->ih4 = rf->ih8 << 1;
818
7.79k
    rf->rp = rp;
819
7.79k
    rf->rp_stride = rp_stride;
820
7.79k
    rf->n_tile_threads = n_tile_threads;
821
7.79k
    rf->n_frame_threads = n_frame_threads;
822
823
7.79k
    if (n_blocks != rf->n_blocks) {
824
7.79k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
7.79k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
7.79k
        dav1d_free_aligned(rf->r);
829
7.79k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
7.79k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
7.79k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
7.79k
        rf->n_blocks = n_blocks;
837
7.79k
    }
838
839
7.79k
    const int poc = frm_hdr->frame_offset;
840
62.3k
    for (int i = 0; i < 7; i++) {
841
54.5k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
54.5k
                                          ref_poc[i], poc);
843
54.5k
        rf->sign_bias[i] = poc_diff > 0;
844
54.5k
        rf->mfmv_sign[i] = poc_diff < 0;
845
54.5k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
54.5k
                                            poc, ref_poc[i]), -31, 31);
847
54.5k
    }
848
849
    // temporal MV setup
850
7.79k
    rf->n_mfmvs = 0;
851
7.79k
    rf->rp_ref = rp_ref;
852
7.79k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
970
        int total = 2;
854
970
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
131
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
131
            total = 3;
857
131
        }
858
970
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
128
                                      frm_hdr->frame_offset) > 0)
860
38
        {
861
38
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
38
        }
863
970
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
187
                                      frm_hdr->frame_offset) > 0)
865
61
        {
866
61
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
61
        }
868
970
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
118
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
118
                         frm_hdr->frame_offset) > 0)
871
50
        {
872
50
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
50
        }
874
970
        if (rf->n_mfmvs < total && rp_ref[1])
875
167
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
1.41k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
447
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
447
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
447
                                           rpoc, frm_hdr->frame_offset);
881
447
            if (abs(diff1) > 31) {
882
22
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
425
            } else {
884
425
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
3.40k
                for (int m = 0; m < 7; m++) {
886
2.97k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
2.97k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
2.97k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
2.97k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
2.97k
                }
892
425
            }
893
447
        }
894
970
    }
895
7.79k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
7.79k
    return 0;
898
7.79k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
451k
{
903
1.68M
    do {
904
1.68M
        refmvs_block *const r = *rr++ + bx4;
905
12.9M
        for (int x = 0; x < bw4; x++)
906
11.2M
            r[x] = *rmv;
907
1.68M
    } while (--bh4);
908
451k
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
20.4k
{
922
20.4k
    c->load_tmvs = load_tmvs_c;
923
20.4k
    c->save_tmvs = save_tmvs_c;
924
20.4k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
20.4k
}