Coverage Report

Created: 2026-06-15 06:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
1.11M
{
47
1.11M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
1.00M
    if (ref.ref[1] == -1) {
50
1.17M
        for (int n = 0; n < 2; n++) {
51
1.03M
            if (b->ref.ref[n] == ref.ref[0]) {
52
743k
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
722k
                                   gmv[0] : b->mv.mv[n];
54
55
743k
                *have_refmv_match = 1;
56
743k
                *have_newmv_match |= b->mf >> 1;
57
58
743k
                const int last = *cnt;
59
1.31M
                for (int m = 0; m < last; m++)
60
801k
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
234k
                        mvstack[m].weight += weight;
62
234k
                        return;
63
234k
                    }
64
65
509k
                if (last < 8) {
66
508k
                    mvstack[last].mv.mv[0] = cand_mv;
67
508k
                    mvstack[last].weight = weight;
68
508k
                    *cnt = last + 1;
69
508k
                }
70
509k
                return;
71
743k
            }
72
1.03M
        }
73
882k
    } else if (b->ref.pair == ref.pair) {
74
40.3k
        const refmvs_mvpair cand_mv = { .mv = {
75
40.3k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
40.3k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
40.3k
        }};
78
79
40.3k
        *have_refmv_match = 1;
80
40.3k
        *have_newmv_match |= b->mf >> 1;
81
82
40.3k
        const int last = *cnt;
83
61.9k
        for (int n = 0; n < last; n++)
84
35.2k
            if (mvstack[n].mv.n == cand_mv.n) {
85
13.7k
                mvstack[n].weight += weight;
86
13.7k
                return;
87
13.7k
            }
88
89
26.6k
        if (last < 8) {
90
26.5k
            mvstack[last].mv = cand_mv;
91
26.5k
            mvstack[last].weight = weight;
92
26.5k
            *cnt = last + 1;
93
26.5k
        }
94
26.6k
    }
95
1.00M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
351k
{
103
351k
    const refmvs_block *cand_b = b;
104
351k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
351k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
351k
    int cand_bw4 = first_cand_b_dim[0];
107
351k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
351k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
312k
        const int weight = bw4 == 1 ? 2 :
115
312k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
312k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
312k
                              have_newmv_match, have_refmv_match);
118
312k
        return weight >> 1;
119
312k
    }
120
121
66.9k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
66.9k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
66.9k
                              have_newmv_match, have_refmv_match);
127
66.9k
        x += len;
128
66.9k
        if (x >= w4) return 1;
129
28.6k
        cand_b = &b[x];
130
28.6k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
28.6k
        assert(cand_bw4 < bw4);
132
28.6k
        len = imax(step, cand_bw4);
133
28.6k
    }
134
38.3k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
460k
{
142
460k
    const refmvs_block *cand_b = &b[0][bx4];
143
460k
    const enum BlockSize first_cand_bs = cand_b->bs;
144
460k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
460k
    int cand_bh4 = first_cand_b_dim[1];
146
460k
    int len = imax(step, imin(bh4, cand_bh4));
147
148
460k
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
414k
        const int weight = bh4 == 1 ? 2 :
154
414k
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
414k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
414k
                            have_newmv_match, have_refmv_match);
157
414k
        return weight >> 1;
158
414k
    }
159
160
87.7k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
87.7k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
87.7k
                              have_newmv_match, have_refmv_match);
166
87.7k
        y += len;
167
87.7k
        if (y >= h4) return 1;
168
42.6k
        cand_b = &b[y][bx4];
169
42.6k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
42.6k
        assert(cand_bh4 < bh4);
171
42.6k
        len = imax(step, cand_bh4);
172
42.6k
    }
173
45.5k
}
174
175
11.0k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
11.0k
    static const uint16_t div_mult[32] = {
177
11.0k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
11.0k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
11.0k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
11.0k
         682,   655,  630,  606,  585,  564,  546,  528
181
11.0k
    };
182
11.0k
    assert(den > 0 && den < 32);
183
11.0k
    assert(num > -32 && num < 32);
184
11.0k
    const int frac = num * div_mult[den];
185
11.0k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
11.0k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
11.0k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
11.0k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
11.0k
    };
191
11.0k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
9.89k
{
199
9.89k
    if (rb->mv.n == INVALID_MV) return;
200
201
6.21k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
6.21k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
6.21k
    const int last = *cnt;
205
6.21k
    if (ref.ref[1] == -1) {
206
3.68k
        if (globalmv_ctx)
207
1.21k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
6.51k
        for (int n = 0; n < last; n++)
210
5.53k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
2.70k
                mvstack[n].weight += 2;
212
2.70k
                return;
213
2.70k
            }
214
979
        if (last < 8) {
215
959
            mvstack[last].mv.mv[0] = mv;
216
959
            mvstack[last].weight = 2;
217
959
            *cnt = last + 1;
218
959
        }
219
2.52k
    } else {
220
2.52k
        refmvs_mvpair mvp = { .mv = {
221
2.52k
            [0] = mv,
222
2.52k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
2.52k
        }};
224
2.52k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
4.25k
        for (int n = 0; n < last; n++)
227
3.58k
            if (mvstack[n].mv.n == mvp.n) {
228
1.85k
                mvstack[n].weight += 2;
229
1.85k
                return;
230
1.85k
            }
231
671
        if (last < 8) {
232
651
            mvstack[last].mv = mvp;
233
651
            mvstack[last].weight = 2;
234
651
            *cnt = last + 1;
235
651
        }
236
671
    }
237
6.21k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
31.7k
{
246
31.7k
    refmvs_candidate *const diff = &same[2];
247
31.7k
    int *const diff_count = &same_count[2];
248
249
81.1k
    for (int n = 0; n < 2; n++) {
250
62.2k
        const int cand_ref = cand_b->ref.ref[n];
251
252
62.2k
        if (cand_ref <= 0) break;
253
254
49.3k
        mv cand_mv = cand_b->mv.mv[n];
255
49.3k
        if (cand_ref == ref.ref[0]) {
256
17.1k
            if (same_count[0] < 2)
257
16.5k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
17.1k
            if (diff_count[1] < 2) {
259
14.7k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
182
                    cand_mv.y = -cand_mv.y;
261
182
                    cand_mv.x = -cand_mv.x;
262
182
                }
263
14.7k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
14.7k
            }
265
32.2k
        } else if (cand_ref == ref.ref[1]) {
266
17.2k
            if (same_count[1] < 2)
267
16.8k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
17.2k
            if (diff_count[0] < 2) {
269
13.9k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
187
                    cand_mv.y = -cand_mv.y;
271
187
                    cand_mv.x = -cand_mv.x;
272
187
                }
273
13.9k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
13.9k
            }
275
17.2k
        } else {
276
15.0k
            mv i_cand_mv = (union mv) {
277
15.0k
                .x = -cand_mv.x,
278
15.0k
                .y = -cand_mv.y
279
15.0k
            };
280
281
15.0k
            if (diff_count[0] < 2) {
282
11.8k
                diff[diff_count[0]++].mv.mv[0] =
283
11.8k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
11.7k
                    i_cand_mv : cand_mv;
285
11.8k
            }
286
287
15.0k
            if (diff_count[1] < 2) {
288
11.2k
                diff[diff_count[1]++].mv.mv[1] =
289
11.2k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
11.1k
                    i_cand_mv : cand_mv;
291
11.2k
            }
292
15.0k
        }
293
49.3k
    }
294
31.7k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
102k
{
300
205k
    for (int n = 0; n < 2; n++) {
301
200k
        const int cand_ref = cand_b->ref.ref[n];
302
303
200k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
102k
        mv cand_mv = cand_b->mv.mv[n];
311
102k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
226
            cand_mv.y = -cand_mv.y;
313
226
            cand_mv.x = -cand_mv.x;
314
226
        }
315
316
102k
        int m;
317
102k
        const int last = *cnt;
318
118k
        for (m = 0; m < last; m++)
319
95.1k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
79.2k
                break;
321
102k
        if (m == last) {
322
23.4k
            mvstack[m].mv.mv[0] = cand_mv;
323
23.4k
            mvstack[m].weight = 2; // "minimal"
324
23.4k
            *cnt = last + 1;
325
23.4k
        }
326
102k
    }
327
102k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
268k
{
355
268k
    const refmvs_frame *const rf = rt->rf;
356
268k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
268k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
268k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
268k
    mv gmv[2], tgmv[2];
360
361
268k
    *cnt = 0;
362
268k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
268k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
268k
    if (ref.ref[0] > 0) {
365
129k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
129k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
129k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
102k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
139k
    } else {
370
139k
        tgmv[0] = (mv) { .n = 0 };
371
139k
        gmv[0] = (mv) { .n = INVALID_MV };
372
139k
    }
373
268k
    if (ref.ref[1] > 0) {
374
24.8k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
24.8k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
24.8k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
20.4k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
24.8k
    }
379
380
    // top
381
268k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
268k
    unsigned max_rows = 0, n_rows = ~0;
383
268k
    const refmvs_block *b_top;
384
268k
    if (by4 > rt->tile_row.start) {
385
188k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
188k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
188k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
188k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
188k
                          &have_newmv, &have_row_mvs);
390
188k
    }
391
392
    // left
393
268k
    unsigned max_cols = 0, n_cols = ~0U;
394
268k
    refmvs_block *const *b_left;
395
268k
    if (bx4 > rt->tile_col.start) {
396
226k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
226k
        b_left = &rt->r[(by4 & 31) + 5];
398
226k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
226k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
226k
                          &have_newmv, &have_col_mvs);
401
226k
    }
402
403
    // top/right
404
268k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
115k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
86.3k
    {
407
86.3k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
86.3k
                              &have_newmv, &have_row_mvs);
409
86.3k
    }
410
411
268k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
268k
    const int nearest_cnt = *cnt;
413
598k
    for (int n = 0; n < nearest_cnt; n++)
414
329k
        mvstack[n].weight += 640;
415
416
    // temporal
417
268k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
268k
    if (rf->use_ref_frame_mvs) {
419
2.72k
        const ptrdiff_t stride = rf->rp_stride;
420
2.72k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
2.72k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
2.72k
        const refmvs_temporal_block *rb = rbi;
423
2.72k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
2.72k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
7.16k
        for (int y = 0; y < h8; y += step_v) {
426
11.5k
            for (int x = 0; x < w8; x+= step_h) {
427
7.12k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
7.12k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
7.12k
            }
430
4.44k
            rb += stride * step_v;
431
4.44k
        }
432
2.72k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
1.81k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
1.81k
            rb = &rbi[bh8 * stride];
435
1.81k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
1.81k
                                                    (by8 & ~7) + 8);
437
1.81k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
790
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
790
                                       NULL, NULL);
440
790
            }
441
1.81k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
1.22k
                if (has_bottom) {
443
790
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
790
                                           NULL, NULL);
445
790
                }
446
1.22k
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
1.19k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
1.19k
                                           ref, NULL, NULL);
449
1.19k
                }
450
1.22k
            }
451
1.81k
        }
452
2.72k
    }
453
268k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
268k
    int have_dummy_newmv_match;
457
268k
    if ((n_rows | n_cols) != ~0U) {
458
149k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
149k
                              &have_dummy_newmv_match, &have_row_mvs);
460
149k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
805k
    for (int n = 2; n <= 3; n++) {
465
536k
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
162k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
162k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
162k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
162k
                               &have_dummy_newmv_match, &have_row_mvs);
470
162k
        }
471
472
536k
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
233k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
233k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
233k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
233k
                               &have_dummy_newmv_match, &have_col_mvs);
477
233k
        }
478
536k
    }
479
268k
    assert(*cnt <= 8);
480
481
268k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
268k
    int refmv_ctx, newmv_ctx;
485
268k
    switch (nearest_match) {
486
44.1k
    case 0:
487
44.1k
        refmv_ctx = imin(2, ref_match_count);
488
44.1k
        newmv_ctx = ref_match_count > 0;
489
44.1k
        break;
490
126k
    case 1:
491
126k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
126k
        newmv_ctx = 3 - have_newmv;
493
126k
        break;
494
97.4k
    case 2:
495
97.4k
        refmv_ctx = 5;
496
97.4k
        newmv_ctx = 5 - have_newmv;
497
97.4k
        break;
498
268k
    }
499
500
    // sorting (nearest, then "secondary")
501
268k
    int len = nearest_cnt;
502
545k
    while (len) {
503
277k
        int last = 0;
504
400k
        for (int n = 1; n < len; n++) {
505
123k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
59.7k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
55.0k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
55.0k
                last = n;
509
55.0k
            }
510
123k
        }
511
277k
        len = last;
512
277k
    }
513
268k
    len = *cnt;
514
421k
    while (len > nearest_cnt) {
515
153k
        int last = nearest_cnt;
516
214k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
61.4k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
4.75k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
4.75k
#undef EXCHANGE
520
4.75k
                last = n;
521
4.75k
            }
522
61.4k
        }
523
153k
        len = last;
524
153k
    }
525
526
268k
    if (ref.ref[1] > 0) {
527
24.8k
        if (*cnt < 2) {
528
17.9k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
17.9k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
17.9k
            const int sz4 = imin(w4, h4);
531
17.9k
            refmvs_candidate *const same = &mvstack[*cnt];
532
17.9k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
27.0k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
14.0k
                const refmvs_block *const cand_b = &b_top[x];
537
14.0k
                add_compound_extended_candidate(same, same_count, cand_b,
538
14.0k
                                                sign0, sign1, ref, rf->sign_bias);
539
14.0k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
14.0k
            }
541
542
            // non-self references in left
543
33.3k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
17.6k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
17.6k
                add_compound_extended_candidate(same, same_count, cand_b,
546
17.6k
                                                sign0, sign1, ref, rf->sign_bias);
547
17.6k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
17.6k
            }
549
550
17.9k
            refmvs_candidate *const diff = &same[2];
551
17.9k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
53.9k
            for (int n = 0; n < 2; n++) {
555
35.9k
                int m = same_count[n];
556
557
35.9k
                if (m >= 2) continue;
558
559
27.3k
                const int l = diff_count[n];
560
27.3k
                if (l) {
561
23.4k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
23.4k
                    if (++m == 2) continue;
563
8.45k
                    if (l == 2) {
564
6.47k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
6.47k
                        continue;
566
6.47k
                    }
567
8.45k
                }
568
8.56k
                do {
569
8.56k
                    same[m].mv.mv[n] = tgmv[n];
570
8.56k
                } while (++m < 2);
571
5.88k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
17.9k
            int n = *cnt;
576
17.9k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
6.43k
                mvstack[1].mv = mvstack[2].mv;
578
27.0k
            do {
579
27.0k
                mvstack[n].weight = 2;
580
27.0k
            } while (++n < 2);
581
17.9k
            *cnt = 2;
582
17.9k
        }
583
584
        // clamping
585
24.8k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
24.8k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
24.8k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
24.8k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
24.8k
        const int n_refmvs = *cnt;
591
24.8k
        int n = 0;
592
54.2k
        do {
593
54.2k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
54.2k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
54.2k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
54.2k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
54.2k
        } while (++n < n_refmvs);
598
599
24.8k
        switch (refmv_ctx >> 1) {
600
10.3k
        case 0:
601
10.3k
            *ctx = imin(newmv_ctx, 1);
602
10.3k
            break;
603
9.16k
        case 1:
604
9.16k
            *ctx = 1 + imin(newmv_ctx, 3);
605
9.16k
            break;
606
5.36k
        case 2:
607
5.36k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
5.36k
            break;
609
24.8k
        }
610
611
24.8k
        return;
612
243k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
65.5k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
65.5k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
94.8k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
48.2k
            const refmvs_block *const cand_b = &b_top[x];
619
48.2k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
48.2k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
48.2k
        }
622
623
        // non-self references in left
624
112k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
54.5k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
54.5k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
54.5k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
54.5k
        }
629
65.5k
    }
630
243k
    assert(*cnt <= 8);
631
632
    // clamping
633
243k
    int n_refmvs = *cnt;
634
243k
    if (n_refmvs) {
635
227k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
227k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
227k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
227k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
227k
        int n = 0;
641
534k
        do {
642
534k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
534k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
534k
        } while (++n < n_refmvs);
645
227k
    }
646
647
341k
    for (int n = *cnt; n < 2; n++)
648
97.8k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
243k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
243k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
52.8k
{
658
52.8k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
52.8k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
52.8k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
52.8k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
28.9k
        35 * 2 * rf->n_blocks : 0;
663
52.8k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
52.8k
    const int sbsz = rf->sbsz;
665
52.8k
    const int off = (sbsz * sby) & 16;
666
1.20M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
1.14M
        rt->r[off + 5 + i] = r;
668
52.8k
    rt->r[off + 0] = r;
669
52.8k
    r += r_stride;
670
52.8k
    rt->r[off + 1] = NULL;
671
52.8k
    rt->r[off + 2] = r;
672
52.8k
    r += r_stride;
673
52.8k
    rt->r[off + 3] = NULL;
674
52.8k
    rt->r[off + 4] = r;
675
52.8k
    if (sby & 1) {
676
62.0k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
20.6k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
20.6k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
20.6k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
20.6k
#undef EXCHANGE
681
20.6k
    }
682
683
52.8k
    rt->rf = rf;
684
52.8k
    rt->tile_row.start = tile_row_start4;
685
52.8k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
52.8k
    rt->tile_col.start = tile_col_start4;
687
52.8k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
52.8k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
993
{
694
993
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
993
    assert(row_start8 >= 0);
696
993
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
993
    row_end8 = imin(row_end8, rf->ih8);
698
993
    const int col_start8i = imax(col_start8 - 8, 0);
699
993
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
993
    const ptrdiff_t stride = rf->rp_stride;
702
993
    refmvs_temporal_block *rp_proj =
703
993
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
9.02k
    for (int y = row_start8; y < row_end8; y++) {
705
76.0k
        for (int x = col_start8; x < col_end8; x++)
706
68.0k
            rp_proj[x].mv.n = INVALID_MV;
707
8.03k
        rp_proj += stride;
708
8.03k
    }
709
710
993
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
1.48k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
495
        const int ref2cur = rf->mfmv_ref2cur[n];
713
495
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
476
        const int ref = rf->mfmv_ref[n];
716
476
        const int ref_sign = ref - 4;
717
476
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
3.48k
        for (int y = row_start8; y < row_end8; y++) {
719
3.01k
            const int y_sb_align = y & ~7;
720
3.01k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
3.01k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
10.5k
            for (int x = col_start8i; x < col_end8i; x++) {
723
7.53k
                const refmvs_temporal_block *rb = &r[x];
724
7.53k
                const int b_ref = rb->ref;
725
7.53k
                if (!b_ref) continue;
726
2.59k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
2.59k
                if (!ref2ref) continue;
728
2.28k
                const mv b_mv = rb->mv;
729
2.28k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
2.28k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
2.28k
                                           offset.x ^ ref_sign);
732
2.28k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
2.28k
                                                 offset.y ^ ref_sign);
734
2.28k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
1.92k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
7.41k
                    for (;;) {
737
7.41k
                        const int x_sb_align = x & ~7;
738
7.41k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
7.16k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
6.88k
                        {
741
6.88k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
6.88k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
6.88k
                        }
744
7.41k
                        if (++x >= col_end8i) break;
745
5.93k
                        rb++;
746
5.93k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
5.49k
                        pos_x++;
748
5.49k
                    }
749
1.92k
                } else {
750
1.00k
                    for (;;) {
751
1.00k
                        if (++x >= col_end8i) break;
752
791
                        rb++;
753
791
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
791
                    }
755
361
                }
756
2.28k
                x--;
757
2.28k
            }
758
3.01k
            r += stride;
759
3.01k
        }
760
476
    }
761
993
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
1.90k
{
769
19.0k
    for (int y = row_start8; y < row_end8; y++) {
770
17.1k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
63.2k
        for (int x = col_start8; x < col_end8;) {
773
46.0k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
46.0k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
46.0k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
9.39k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
5.61k
            {
779
24.0k
                for (int n = 0; n < bw8; n++, x++)
780
18.3k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
18.3k
                                                      .ref = cand_b->ref.ref[1] };
782
40.4k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
26.2k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
22.4k
            {
785
87.3k
                for (int n = 0; n < bw8; n++, x++)
786
64.9k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
64.9k
                                                      .ref = cand_b->ref.ref[0] };
788
22.4k
            } else {
789
78.6k
                for (int n = 0; n < bw8; n++, x++) {
790
60.6k
                    rp[x].mv.n = 0;
791
60.6k
                    rp[x].ref = 0; // "invalid"
792
60.6k
                }
793
17.9k
            }
794
46.0k
        }
795
17.1k
        rp += stride;
796
17.1k
    }
797
1.90k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
7.83k
{
808
7.83k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
7.83k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
7.83k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
7.83k
    rf->sbsz = 16 << seq_hdr->sb128;
813
7.83k
    rf->frm_hdr = frm_hdr;
814
7.83k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
7.83k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
7.83k
    rf->iw4 = rf->iw8 << 1;
817
7.83k
    rf->ih4 = rf->ih8 << 1;
818
7.83k
    rf->rp = rp;
819
7.83k
    rf->rp_stride = rp_stride;
820
7.83k
    rf->n_tile_threads = n_tile_threads;
821
7.83k
    rf->n_frame_threads = n_frame_threads;
822
823
7.83k
    if (n_blocks != rf->n_blocks) {
824
7.83k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
7.83k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
7.83k
        dav1d_free_aligned(rf->r);
829
7.83k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
7.83k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
7.83k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
7.83k
        rf->n_blocks = n_blocks;
837
7.83k
    }
838
839
7.83k
    const int poc = frm_hdr->frame_offset;
840
62.7k
    for (int i = 0; i < 7; i++) {
841
54.8k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
54.8k
                                          ref_poc[i], poc);
843
54.8k
        rf->sign_bias[i] = poc_diff > 0;
844
54.8k
        rf->mfmv_sign[i] = poc_diff < 0;
845
54.8k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
54.8k
                                            poc, ref_poc[i]), -31, 31);
847
54.8k
    }
848
849
    // temporal MV setup
850
7.83k
    rf->n_mfmvs = 0;
851
7.83k
    rf->rp_ref = rp_ref;
852
7.83k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
986
        int total = 2;
854
986
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
142
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
142
            total = 3;
857
142
        }
858
986
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
131
                                      frm_hdr->frame_offset) > 0)
860
42
        {
861
42
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
42
        }
863
986
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
191
                                      frm_hdr->frame_offset) > 0)
865
60
        {
866
60
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
60
        }
868
986
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
137
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
137
                         frm_hdr->frame_offset) > 0)
871
58
        {
872
58
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
58
        }
874
986
        if (rf->n_mfmvs < total && rp_ref[1])
875
167
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
1.45k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
469
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
469
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
469
                                           rpoc, frm_hdr->frame_offset);
881
469
            if (abs(diff1) > 31) {
882
19
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
450
            } else {
884
450
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
3.60k
                for (int m = 0; m < 7; m++) {
886
3.15k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
3.15k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
3.15k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
3.15k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
3.15k
                }
892
450
            }
893
469
        }
894
986
    }
895
7.83k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
7.83k
    return 0;
898
7.83k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
449k
{
903
1.68M
    do {
904
1.68M
        refmvs_block *const r = *rr++ + bx4;
905
12.8M
        for (int x = 0; x < bw4; x++)
906
11.1M
            r[x] = *rmv;
907
1.68M
    } while (--bh4);
908
449k
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
20.6k
{
922
20.6k
    c->load_tmvs = load_tmvs_c;
923
20.6k
    c->save_tmvs = save_tmvs_c;
924
20.6k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
20.6k
}