Coverage Report

Created: 2026-05-30 06:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
758k
{
47
758k
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
682k
    if (ref.ref[1] == -1) {
50
781k
        for (int n = 0; n < 2; n++) {
51
701k
            if (b->ref.ref[n] == ref.ref[0]) {
52
529k
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
516k
                                   gmv[0] : b->mv.mv[n];
54
55
529k
                *have_refmv_match = 1;
56
529k
                *have_newmv_match |= b->mf >> 1;
57
58
529k
                const int last = *cnt;
59
941k
                for (int m = 0; m < last; m++)
60
574k
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
163k
                        mvstack[m].weight += weight;
62
163k
                        return;
63
163k
                    }
64
65
366k
                if (last < 8) {
66
366k
                    mvstack[last].mv.mv[0] = cand_mv;
67
366k
                    mvstack[last].weight = weight;
68
366k
                    *cnt = last + 1;
69
366k
                }
70
366k
                return;
71
529k
            }
72
701k
        }
73
609k
    } else if (b->ref.pair == ref.pair) {
74
23.7k
        const refmvs_mvpair cand_mv = { .mv = {
75
23.7k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
23.7k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
23.7k
        }};
78
79
23.7k
        *have_refmv_match = 1;
80
23.7k
        *have_newmv_match |= b->mf >> 1;
81
82
23.7k
        const int last = *cnt;
83
37.7k
        for (int n = 0; n < last; n++)
84
21.9k
            if (mvstack[n].mv.n == cand_mv.n) {
85
7.89k
                mvstack[n].weight += weight;
86
7.89k
                return;
87
7.89k
            }
88
89
15.8k
        if (last < 8) {
90
15.8k
            mvstack[last].mv = cand_mv;
91
15.8k
            mvstack[last].weight = weight;
92
15.8k
            *cnt = last + 1;
93
15.8k
        }
94
15.8k
    }
95
682k
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
262k
{
103
262k
    const refmvs_block *cand_b = b;
104
262k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
262k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
262k
    int cand_bw4 = first_cand_b_dim[0];
107
262k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
262k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
233k
        const int weight = bw4 == 1 ? 2 :
115
233k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
233k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
233k
                              have_newmv_match, have_refmv_match);
118
233k
        return weight >> 1;
119
233k
    }
120
121
49.3k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
49.3k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
49.3k
                              have_newmv_match, have_refmv_match);
127
49.3k
        x += len;
128
49.3k
        if (x >= w4) return 1;
129
20.3k
        cand_b = &b[x];
130
20.3k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
20.3k
        assert(cand_bw4 < bw4);
132
20.3k
        len = imax(step, cand_bw4);
133
20.3k
    }
134
29.0k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
290k
{
142
290k
    const refmvs_block *cand_b = &b[0][bx4];
143
290k
    const enum BlockSize first_cand_bs = cand_b->bs;
144
290k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
290k
    int cand_bh4 = first_cand_b_dim[1];
146
290k
    int len = imax(step, imin(bh4, cand_bh4));
147
148
290k
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
263k
        const int weight = bh4 == 1 ? 2 :
154
263k
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
263k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
263k
                            have_newmv_match, have_refmv_match);
157
263k
        return weight >> 1;
158
263k
    }
159
160
49.1k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
49.1k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
49.1k
                              have_newmv_match, have_refmv_match);
166
49.1k
        y += len;
167
49.1k
        if (y >= h4) return 1;
168
22.1k
        cand_b = &b[y][bx4];
169
22.1k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
22.1k
        assert(cand_bh4 < bh4);
171
22.1k
        len = imax(step, cand_bh4);
172
22.1k
    }
173
27.1k
}
174
175
7.38k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
7.38k
    static const uint16_t div_mult[32] = {
177
7.38k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
7.38k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
7.38k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
7.38k
         682,   655,  630,  606,  585,  564,  546,  528
181
7.38k
    };
182
7.38k
    assert(den > 0 && den < 32);
183
7.38k
    assert(num > -32 && num < 32);
184
7.38k
    const int frac = num * div_mult[den];
185
7.38k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
7.38k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
7.38k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
7.38k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
7.38k
    };
191
7.38k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
5.22k
{
199
5.22k
    if (rb->mv.n == INVALID_MV) return;
200
201
3.13k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
3.13k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
3.13k
    const int last = *cnt;
205
3.13k
    if (ref.ref[1] == -1) {
206
1.59k
        if (globalmv_ctx)
207
536
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
2.75k
        for (int n = 0; n < last; n++)
210
2.29k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
1.14k
                mvstack[n].weight += 2;
212
1.14k
                return;
213
1.14k
            }
214
459
        if (last < 8) {
215
459
            mvstack[last].mv.mv[0] = mv;
216
459
            mvstack[last].weight = 2;
217
459
            *cnt = last + 1;
218
459
        }
219
1.53k
    } else {
220
1.53k
        refmvs_mvpair mvp = { .mv = {
221
1.53k
            [0] = mv,
222
1.53k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
1.53k
        }};
224
1.53k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
2.19k
        for (int n = 0; n < last; n++)
227
1.89k
            if (mvstack[n].mv.n == mvp.n) {
228
1.22k
                mvstack[n].weight += 2;
229
1.22k
                return;
230
1.22k
            }
231
303
        if (last < 8) {
232
303
            mvstack[last].mv = mvp;
233
303
            mvstack[last].weight = 2;
234
303
            *cnt = last + 1;
235
303
        }
236
303
    }
237
3.13k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
18.3k
{
246
18.3k
    refmvs_candidate *const diff = &same[2];
247
18.3k
    int *const diff_count = &same_count[2];
248
249
47.3k
    for (int n = 0; n < 2; n++) {
250
36.2k
        const int cand_ref = cand_b->ref.ref[n];
251
252
36.2k
        if (cand_ref <= 0) break;
253
254
29.0k
        mv cand_mv = cand_b->mv.mv[n];
255
29.0k
        if (cand_ref == ref.ref[0]) {
256
9.99k
            if (same_count[0] < 2)
257
9.70k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
9.99k
            if (diff_count[1] < 2) {
259
8.62k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
63
                    cand_mv.y = -cand_mv.y;
261
63
                    cand_mv.x = -cand_mv.x;
262
63
                }
263
8.62k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
8.62k
            }
265
19.0k
        } else if (cand_ref == ref.ref[1]) {
266
10.1k
            if (same_count[1] < 2)
267
9.92k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
10.1k
            if (diff_count[0] < 2) {
269
8.41k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
55
                    cand_mv.y = -cand_mv.y;
271
55
                    cand_mv.x = -cand_mv.x;
272
55
                }
273
8.41k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
8.41k
            }
275
10.1k
        } else {
276
8.86k
            mv i_cand_mv = (union mv) {
277
8.86k
                .x = -cand_mv.x,
278
8.86k
                .y = -cand_mv.y
279
8.86k
            };
280
281
8.86k
            if (diff_count[0] < 2) {
282
7.03k
                diff[diff_count[0]++].mv.mv[0] =
283
7.03k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
6.99k
                    i_cand_mv : cand_mv;
285
7.03k
            }
286
287
8.86k
            if (diff_count[1] < 2) {
288
6.72k
                diff[diff_count[1]++].mv.mv[1] =
289
6.72k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
6.67k
                    i_cand_mv : cand_mv;
291
6.72k
            }
292
8.86k
        }
293
29.0k
    }
294
18.3k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
67.4k
{
300
135k
    for (int n = 0; n < 2; n++) {
301
132k
        const int cand_ref = cand_b->ref.ref[n];
302
303
132k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
68.4k
        mv cand_mv = cand_b->mv.mv[n];
311
68.4k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
90
            cand_mv.y = -cand_mv.y;
313
90
            cand_mv.x = -cand_mv.x;
314
90
        }
315
316
68.4k
        int m;
317
68.4k
        const int last = *cnt;
318
77.7k
        for (m = 0; m < last; m++)
319
63.8k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
54.6k
                break;
321
68.4k
        if (m == last) {
322
13.8k
            mvstack[m].mv.mv[0] = cand_mv;
323
13.8k
            mvstack[m].weight = 2; // "minimal"
324
13.8k
            *cnt = last + 1;
325
13.8k
        }
326
68.4k
    }
327
67.4k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
188k
{
355
188k
    const refmvs_frame *const rf = rt->rf;
356
188k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
188k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
188k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
188k
    mv gmv[2], tgmv[2];
360
361
188k
    *cnt = 0;
362
188k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
188k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
188k
    if (ref.ref[0] > 0) {
365
84.9k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
84.9k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
84.9k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
72.1k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
103k
    } else {
370
103k
        tgmv[0] = (mv) { .n = 0 };
371
103k
        gmv[0] = (mv) { .n = INVALID_MV };
372
103k
    }
373
188k
    if (ref.ref[1] > 0) {
374
15.1k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
15.1k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
15.1k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
13.1k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
15.1k
    }
379
380
    // top
381
188k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
188k
    unsigned max_rows = 0, n_rows = ~0;
383
188k
    const refmvs_block *b_top;
384
188k
    if (by4 > rt->tile_row.start) {
385
138k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
138k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
138k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
138k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
138k
                          &have_newmv, &have_row_mvs);
390
138k
    }
391
392
    // left
393
188k
    unsigned max_cols = 0, n_cols = ~0U;
394
188k
    refmvs_block *const *b_left;
395
188k
    if (bx4 > rt->tile_col.start) {
396
146k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
146k
        b_left = &rt->r[(by4 & 31) + 5];
398
146k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
146k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
146k
                          &have_newmv, &have_col_mvs);
401
146k
    }
402
403
    // top/right
404
188k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
91.5k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
63.4k
    {
407
63.4k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
63.4k
                              &have_newmv, &have_row_mvs);
409
63.4k
    }
410
411
188k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
188k
    const int nearest_cnt = *cnt;
413
425k
    for (int n = 0; n < nearest_cnt; n++)
414
236k
        mvstack[n].weight += 640;
415
416
    // temporal
417
188k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
188k
    if (rf->use_ref_frame_mvs) {
419
1.38k
        const ptrdiff_t stride = rf->rp_stride;
420
1.38k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
1.38k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
1.38k
        const refmvs_temporal_block *rb = rbi;
423
1.38k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
1.38k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
3.77k
        for (int y = 0; y < h8; y += step_v) {
426
6.55k
            for (int x = 0; x < w8; x+= step_h) {
427
4.16k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
4.16k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
4.16k
            }
430
2.38k
            rb += stride * step_v;
431
2.38k
        }
432
1.38k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
887
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
887
            rb = &rbi[bh8 * stride];
435
887
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
887
                                                    (by8 & ~7) + 8);
437
887
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
296
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
296
                                       NULL, NULL);
440
296
            }
441
887
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
506
                if (has_bottom) {
443
270
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
270
                                           NULL, NULL);
445
270
                }
446
506
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
493
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
493
                                           ref, NULL, NULL);
449
493
                }
450
506
            }
451
887
        }
452
1.38k
    }
453
188k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
188k
    int have_dummy_newmv_match;
457
188k
    if ((n_rows | n_cols) != ~0U) {
458
100k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
100k
                              &have_dummy_newmv_match, &have_row_mvs);
460
100k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
566k
    for (int n = 2; n <= 3; n++) {
465
377k
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
123k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
123k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
123k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
123k
                               &have_dummy_newmv_match, &have_row_mvs);
470
123k
        }
471
472
377k
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
143k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
143k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
143k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
143k
                               &have_dummy_newmv_match, &have_col_mvs);
477
143k
        }
478
377k
    }
479
188k
    assert(*cnt <= 8);
480
481
188k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
188k
    int refmv_ctx, newmv_ctx;
485
188k
    switch (nearest_match) {
486
31.5k
    case 0:
487
31.5k
        refmv_ctx = imin(2, ref_match_count);
488
31.5k
        newmv_ctx = ref_match_count > 0;
489
31.5k
        break;
490
87.6k
    case 1:
491
87.6k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
87.6k
        newmv_ctx = 3 - have_newmv;
493
87.6k
        break;
494
69.6k
    case 2:
495
69.6k
        refmv_ctx = 5;
496
69.6k
        newmv_ctx = 5 - have_newmv;
497
69.6k
        break;
498
188k
    }
499
500
    // sorting (nearest, then "secondary")
501
188k
    int len = nearest_cnt;
502
386k
    while (len) {
503
197k
        int last = 0;
504
288k
        for (int n = 1; n < len; n++) {
505
90.5k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
44.9k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
41.9k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
41.9k
                last = n;
509
41.9k
            }
510
90.5k
        }
511
197k
        len = last;
512
197k
    }
513
188k
    len = *cnt;
514
295k
    while (len > nearest_cnt) {
515
106k
        int last = nearest_cnt;
516
150k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
44.1k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
3.01k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
3.01k
#undef EXCHANGE
520
3.01k
                last = n;
521
3.01k
            }
522
44.1k
        }
523
106k
        len = last;
524
106k
    }
525
526
188k
    if (ref.ref[1] > 0) {
527
15.1k
        if (*cnt < 2) {
528
11.2k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
11.2k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
11.2k
            const int sz4 = imin(w4, h4);
531
11.2k
            refmvs_candidate *const same = &mvstack[*cnt];
532
11.2k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
16.4k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
8.55k
                const refmvs_block *const cand_b = &b_top[x];
537
8.55k
                add_compound_extended_candidate(same, same_count, cand_b,
538
8.55k
                                                sign0, sign1, ref, rf->sign_bias);
539
8.55k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
8.55k
            }
541
542
            // non-self references in left
543
18.6k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
9.75k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
9.75k
                add_compound_extended_candidate(same, same_count, cand_b,
546
9.75k
                                                sign0, sign1, ref, rf->sign_bias);
547
9.75k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
9.75k
            }
549
550
11.2k
            refmvs_candidate *const diff = &same[2];
551
11.2k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
33.6k
            for (int n = 0; n < 2; n++) {
555
22.4k
                int m = same_count[n];
556
557
22.4k
                if (m >= 2) continue;
558
559
17.6k
                const int l = diff_count[n];
560
17.6k
                if (l) {
561
14.5k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
14.5k
                    if (++m == 2) continue;
563
5.41k
                    if (l == 2) {
564
4.00k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
4.00k
                        continue;
566
4.00k
                    }
567
5.41k
                }
568
6.63k
                do {
569
6.63k
                    same[m].mv.mv[n] = tgmv[n];
570
6.63k
                } while (++m < 2);
571
4.48k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
11.2k
            int n = *cnt;
576
11.2k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
3.94k
                mvstack[1].mv = mvstack[2].mv;
578
17.1k
            do {
579
17.1k
                mvstack[n].weight = 2;
580
17.1k
            } while (++n < 2);
581
11.2k
            *cnt = 2;
582
11.2k
        }
583
584
        // clamping
585
15.1k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
15.1k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
15.1k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
15.1k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
15.1k
        const int n_refmvs = *cnt;
591
15.1k
        int n = 0;
592
33.2k
        do {
593
33.2k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
33.2k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
33.2k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
33.2k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
33.2k
        } while (++n < n_refmvs);
598
599
15.1k
        switch (refmv_ctx >> 1) {
600
6.58k
        case 0:
601
6.58k
            *ctx = imin(newmv_ctx, 1);
602
6.58k
            break;
603
5.42k
        case 1:
604
5.42k
            *ctx = 1 + imin(newmv_ctx, 3);
605
5.42k
            break;
606
3.09k
        case 2:
607
3.09k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
3.09k
            break;
609
15.1k
        }
610
611
15.1k
        return;
612
173k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
44.0k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
44.0k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
63.4k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
32.2k
            const refmvs_block *const cand_b = &b_top[x];
619
32.2k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
32.2k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
32.2k
        }
622
623
        // non-self references in left
624
72.1k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
35.1k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
35.1k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
35.1k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
35.1k
        }
629
44.0k
    }
630
173k
    assert(*cnt <= 8);
631
632
    // clamping
633
173k
    int n_refmvs = *cnt;
634
173k
    if (n_refmvs) {
635
159k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
159k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
159k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
159k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
159k
        int n = 0;
641
381k
        do {
642
381k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
381k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
381k
        } while (++n < n_refmvs);
645
159k
    }
646
647
249k
    for (int n = *cnt; n < 2; n++)
648
75.3k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
173k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
173k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
58.0k
{
658
58.0k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
58.0k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
58.0k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
58.0k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
32.0k
        35 * 2 * rf->n_blocks : 0;
663
58.0k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
58.0k
    const int sbsz = rf->sbsz;
665
58.0k
    const int off = (sbsz * sby) & 16;
666
1.49M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
1.43M
        rt->r[off + 5 + i] = r;
668
58.0k
    rt->r[off + 0] = r;
669
58.0k
    r += r_stride;
670
58.0k
    rt->r[off + 1] = NULL;
671
58.0k
    rt->r[off + 2] = r;
672
58.0k
    r += r_stride;
673
58.0k
    rt->r[off + 3] = NULL;
674
58.0k
    rt->r[off + 4] = r;
675
58.0k
    if (sby & 1) {
676
72.4k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
24.1k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
24.1k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
24.1k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
24.1k
#undef EXCHANGE
681
24.1k
    }
682
683
58.0k
    rt->rf = rf;
684
58.0k
    rt->tile_row.start = tile_row_start4;
685
58.0k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
58.0k
    rt->tile_col.start = tile_col_start4;
687
58.0k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
58.0k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
904
{
694
904
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
904
    assert(row_start8 >= 0);
696
904
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
904
    row_end8 = imin(row_end8, rf->ih8);
698
904
    const int col_start8i = imax(col_start8 - 8, 0);
699
904
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
904
    const ptrdiff_t stride = rf->rp_stride;
702
904
    refmvs_temporal_block *rp_proj =
703
904
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
8.72k
    for (int y = row_start8; y < row_end8; y++) {
705
75.9k
        for (int x = col_start8; x < col_end8; x++)
706
68.0k
            rp_proj[x].mv.n = INVALID_MV;
707
7.81k
        rp_proj += stride;
708
7.81k
    }
709
710
904
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
1.31k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
413
        const int ref2cur = rf->mfmv_ref2cur[n];
713
413
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
406
        const int ref = rf->mfmv_ref[n];
716
406
        const int ref_sign = ref - 4;
717
406
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
3.02k
        for (int y = row_start8; y < row_end8; y++) {
719
2.62k
            const int y_sb_align = y & ~7;
720
2.62k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
2.62k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
6.86k
            for (int x = col_start8i; x < col_end8i; x++) {
723
4.24k
                const refmvs_temporal_block *rb = &r[x];
724
4.24k
                const int b_ref = rb->ref;
725
4.24k
                if (!b_ref) continue;
726
2.99k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
2.99k
                if (!ref2ref) continue;
728
2.72k
                const mv b_mv = rb->mv;
729
2.72k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
2.72k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
2.72k
                                           offset.x ^ ref_sign);
732
2.72k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
2.72k
                                                 offset.y ^ ref_sign);
734
2.72k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
2.23k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
7.96k
                    for (;;) {
737
7.96k
                        const int x_sb_align = x & ~7;
738
7.96k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
7.43k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
7.22k
                        {
741
7.22k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
7.22k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
7.22k
                        }
744
7.96k
                        if (++x >= col_end8i) break;
745
5.98k
                        rb++;
746
5.98k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
5.72k
                        pos_x++;
748
5.72k
                    }
749
2.23k
                } else {
750
1.34k
                    for (;;) {
751
1.34k
                        if (++x >= col_end8i) break;
752
1.07k
                        rb++;
753
1.07k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
1.07k
                    }
755
481
                }
756
2.72k
                x--;
757
2.72k
            }
758
2.62k
            r += stride;
759
2.62k
        }
760
406
    }
761
904
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
1.27k
{
769
11.1k
    for (int y = row_start8; y < row_end8; y++) {
770
9.88k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
40.7k
        for (int x = col_start8; x < col_end8;) {
773
30.8k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
30.8k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
30.8k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
5.91k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
3.55k
            {
779
15.2k
                for (int n = 0; n < bw8; n++, x++)
780
11.7k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
11.7k
                                                      .ref = cand_b->ref.ref[1] };
782
27.3k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
10.0k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
7.65k
            {
785
36.0k
                for (int n = 0; n < bw8; n++, x++)
786
28.3k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
28.3k
                                                      .ref = cand_b->ref.ref[0] };
788
19.6k
            } else {
789
73.0k
                for (int n = 0; n < bw8; n++, x++) {
790
53.3k
                    rp[x].mv.n = 0;
791
53.3k
                    rp[x].ref = 0; // "invalid"
792
53.3k
                }
793
19.6k
            }
794
30.8k
        }
795
9.88k
        rp += stride;
796
9.88k
    }
797
1.27k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
6.61k
{
808
6.61k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
6.61k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
6.61k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
6.61k
    rf->sbsz = 16 << seq_hdr->sb128;
813
6.61k
    rf->frm_hdr = frm_hdr;
814
6.61k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
6.61k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
6.61k
    rf->iw4 = rf->iw8 << 1;
817
6.61k
    rf->ih4 = rf->ih8 << 1;
818
6.61k
    rf->rp = rp;
819
6.61k
    rf->rp_stride = rp_stride;
820
6.61k
    rf->n_tile_threads = n_tile_threads;
821
6.61k
    rf->n_frame_threads = n_frame_threads;
822
823
6.61k
    if (n_blocks != rf->n_blocks) {
824
6.61k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
6.61k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
6.61k
        dav1d_free_aligned(rf->r);
829
6.61k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
6.61k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
6.61k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
6.61k
        rf->n_blocks = n_blocks;
837
6.61k
    }
838
839
6.61k
    const int poc = frm_hdr->frame_offset;
840
52.9k
    for (int i = 0; i < 7; i++) {
841
46.3k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
46.3k
                                          ref_poc[i], poc);
843
46.3k
        rf->sign_bias[i] = poc_diff > 0;
844
46.3k
        rf->mfmv_sign[i] = poc_diff < 0;
845
46.3k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
46.3k
                                            poc, ref_poc[i]), -31, 31);
847
46.3k
    }
848
849
    // temporal MV setup
850
6.61k
    rf->n_mfmvs = 0;
851
6.61k
    rf->rp_ref = rp_ref;
852
6.61k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
919
        int total = 2;
854
919
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
137
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
137
            total = 3;
857
137
        }
858
919
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
107
                                      frm_hdr->frame_offset) > 0)
860
53
        {
861
53
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
53
        }
863
919
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
139
                                      frm_hdr->frame_offset) > 0)
865
74
        {
866
74
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
74
        }
868
919
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
112
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
112
                         frm_hdr->frame_offset) > 0)
871
58
        {
872
58
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
58
        }
874
919
        if (rf->n_mfmvs < total && rp_ref[1])
875
96
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
1.33k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
418
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
418
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
418
                                           rpoc, frm_hdr->frame_offset);
881
418
            if (abs(diff1) > 31) {
882
7
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
411
            } else {
884
411
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
3.28k
                for (int m = 0; m < 7; m++) {
886
2.87k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
2.87k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
2.87k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
2.87k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
2.87k
                }
892
411
            }
893
418
        }
894
919
    }
895
6.61k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
6.61k
    return 0;
898
6.61k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
343k
{
903
1.40M
    do {
904
1.40M
        refmvs_block *const r = *rr++ + bx4;
905
11.6M
        for (int x = 0; x < bw4; x++)
906
10.2M
            r[x] = *rmv;
907
1.40M
    } while (--bh4);
908
343k
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
18.9k
{
922
18.9k
    c->load_tmvs = load_tmvs_c;
923
18.9k
    c->save_tmvs = save_tmvs_c;
924
18.9k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
18.9k
}