Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
4.04M
{
47
4.04M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
3.45M
    if (ref.ref[1] == -1) {
50
3.82M
        for (int n = 0; n < 2; n++) {
51
3.49M
            if (b->ref.ref[n] == ref.ref[0]) {
52
2.78M
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
2.71M
                                   gmv[0] : b->mv.mv[n];
54
55
2.78M
                *have_refmv_match = 1;
56
2.78M
                *have_newmv_match |= b->mf >> 1;
57
58
2.78M
                const int last = *cnt;
59
5.31M
                for (int m = 0; m < last; m++)
60
3.38M
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
852k
                        mvstack[m].weight += weight;
62
852k
                        return;
63
852k
                    }
64
65
1.92M
                if (last < 8) {
66
1.92M
                    mvstack[last].mv.mv[0] = cand_mv;
67
1.92M
                    mvstack[last].weight = weight;
68
1.92M
                    *cnt = last + 1;
69
1.92M
                }
70
1.92M
                return;
71
2.78M
            }
72
3.49M
        }
73
3.11M
    } else if (b->ref.pair == ref.pair) {
74
113k
        const refmvs_mvpair cand_mv = { .mv = {
75
113k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
113k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
113k
        }};
78
79
113k
        *have_refmv_match = 1;
80
113k
        *have_newmv_match |= b->mf >> 1;
81
82
113k
        const int last = *cnt;
83
169k
        for (int n = 0; n < last; n++)
84
95.9k
            if (mvstack[n].mv.n == cand_mv.n) {
85
39.4k
                mvstack[n].weight += weight;
86
39.4k
                return;
87
39.4k
            }
88
89
73.6k
        if (last < 8) {
90
73.5k
            mvstack[last].mv = cand_mv;
91
73.5k
            mvstack[last].weight = weight;
92
73.5k
            *cnt = last + 1;
93
73.5k
        }
94
73.6k
    }
95
3.45M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
1.18M
{
103
1.18M
    const refmvs_block *cand_b = b;
104
1.18M
    const enum BlockSize first_cand_bs = cand_b->bs;
105
1.18M
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
1.18M
    int cand_bw4 = first_cand_b_dim[0];
107
1.18M
    int len = imax(step, imin(bw4, cand_bw4));
108
109
1.18M
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
1.04M
        const int weight = bw4 == 1 ? 2 :
115
1.04M
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
1.04M
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
1.04M
                              have_newmv_match, have_refmv_match);
118
1.04M
        return weight >> 1;
119
1.04M
    }
120
121
268k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
268k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
268k
                              have_newmv_match, have_refmv_match);
127
268k
        x += len;
128
268k
        if (x >= w4) return 1;
129
128k
        cand_b = &b[x];
130
128k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
128k
        assert(cand_bw4 < bw4);
132
128k
        len = imax(step, cand_bw4);
133
128k
    }
134
140k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
1.69M
{
142
1.69M
    const refmvs_block *cand_b = &b[0][bx4];
143
1.69M
    const enum BlockSize first_cand_bs = cand_b->bs;
144
1.69M
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
1.69M
    int cand_bh4 = first_cand_b_dim[1];
146
1.69M
    int len = imax(step, imin(bh4, cand_bh4));
147
148
1.69M
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
1.52M
        const int weight = bh4 == 1 ? 2 :
154
1.52M
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
1.52M
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
1.52M
                            have_newmv_match, have_refmv_match);
157
1.52M
        return weight >> 1;
158
1.52M
    }
159
160
322k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
322k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
322k
                              have_newmv_match, have_refmv_match);
166
322k
        y += len;
167
322k
        if (y >= h4) return 1;
168
154k
        cand_b = &b[y][bx4];
169
154k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
154k
        assert(cand_bh4 < bh4);
171
154k
        len = imax(step, cand_bh4);
172
154k
    }
173
168k
}
174
175
61.4k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
61.4k
    static const uint16_t div_mult[32] = {
177
61.4k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
61.4k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
61.4k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
61.4k
         682,   655,  630,  606,  585,  564,  546,  528
181
61.4k
    };
182
61.4k
    assert(den > 0 && den < 32);
183
61.4k
    assert(num > -32 && num < 32);
184
61.4k
    const int frac = num * div_mult[den];
185
61.4k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
61.4k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
61.4k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
61.4k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
61.4k
    };
191
61.4k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
63.9k
{
199
63.9k
    if (rb->mv.n == INVALID_MV) return;
200
201
30.8k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
30.8k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
30.8k
    const int last = *cnt;
205
30.8k
    if (ref.ref[1] == -1) {
206
19.9k
        if (globalmv_ctx)
207
5.27k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
29.0k
        for (int n = 0; n < last; n++)
210
24.7k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
15.6k
                mvstack[n].weight += 2;
212
15.6k
                return;
213
15.6k
            }
214
4.30k
        if (last < 8) {
215
4.27k
            mvstack[last].mv.mv[0] = mv;
216
4.27k
            mvstack[last].weight = 2;
217
4.27k
            *cnt = last + 1;
218
4.27k
        }
219
10.8k
    } else {
220
10.8k
        refmvs_mvpair mvp = { .mv = {
221
10.8k
            [0] = mv,
222
10.8k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
10.8k
        }};
224
10.8k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
14.9k
        for (int n = 0; n < last; n++)
227
12.2k
            if (mvstack[n].mv.n == mvp.n) {
228
8.15k
                mvstack[n].weight += 2;
229
8.15k
                return;
230
8.15k
            }
231
2.69k
        if (last < 8) {
232
2.67k
            mvstack[last].mv = mvp;
233
2.67k
            mvstack[last].weight = 2;
234
2.67k
            *cnt = last + 1;
235
2.67k
        }
236
2.69k
    }
237
30.8k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
93.3k
{
246
93.3k
    refmvs_candidate *const diff = &same[2];
247
93.3k
    int *const diff_count = &same_count[2];
248
249
240k
    for (int n = 0; n < 2; n++) {
250
183k
        const int cand_ref = cand_b->ref.ref[n];
251
252
183k
        if (cand_ref <= 0) break;
253
254
147k
        mv cand_mv = cand_b->mv.mv[n];
255
147k
        if (cand_ref == ref.ref[0]) {
256
51.0k
            if (same_count[0] < 2)
257
49.3k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
51.0k
            if (diff_count[1] < 2) {
259
44.0k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
1.72k
                    cand_mv.y = -cand_mv.y;
261
1.72k
                    cand_mv.x = -cand_mv.x;
262
1.72k
                }
263
44.0k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
44.0k
            }
265
96.0k
        } else if (cand_ref == ref.ref[1]) {
266
51.8k
            if (same_count[1] < 2)
267
50.7k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
51.8k
            if (diff_count[0] < 2) {
269
42.9k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
1.81k
                    cand_mv.y = -cand_mv.y;
271
1.81k
                    cand_mv.x = -cand_mv.x;
272
1.81k
                }
273
42.9k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
42.9k
            }
275
51.8k
        } else {
276
44.2k
            mv i_cand_mv = (union mv) {
277
44.2k
                .x = -cand_mv.x,
278
44.2k
                .y = -cand_mv.y
279
44.2k
            };
280
281
44.2k
            if (diff_count[0] < 2) {
282
35.1k
                diff[diff_count[0]++].mv.mv[0] =
283
35.1k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
34.5k
                    i_cand_mv : cand_mv;
285
35.1k
            }
286
287
44.2k
            if (diff_count[1] < 2) {
288
33.4k
                diff[diff_count[1]++].mv.mv[1] =
289
33.4k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
32.7k
                    i_cand_mv : cand_mv;
291
33.4k
            }
292
44.2k
        }
293
147k
    }
294
93.3k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
358k
{
300
721k
    for (int n = 0; n < 2; n++) {
301
704k
        const int cand_ref = cand_b->ref.ref[n];
302
303
704k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
362k
        mv cand_mv = cand_b->mv.mv[n];
311
362k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
2.04k
            cand_mv.y = -cand_mv.y;
313
2.04k
            cand_mv.x = -cand_mv.x;
314
2.04k
        }
315
316
362k
        int m;
317
362k
        const int last = *cnt;
318
403k
        for (m = 0; m < last; m++)
319
338k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
296k
                break;
321
362k
        if (m == last) {
322
65.4k
            mvstack[m].mv.mv[0] = cand_mv;
323
65.4k
            mvstack[m].weight = 2; // "minimal"
324
65.4k
            *cnt = last + 1;
325
65.4k
        }
326
362k
    }
327
358k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
980k
{
355
980k
    const refmvs_frame *const rf = rt->rf;
356
980k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
980k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
980k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
980k
    mv gmv[2], tgmv[2];
360
361
980k
    *cnt = 0;
362
980k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
980k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
980k
    if (ref.ref[0] > 0) {
365
461k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
461k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
461k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
343k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
519k
    } else {
370
519k
        tgmv[0] = (mv) { .n = 0 };
371
519k
        gmv[0] = (mv) { .n = INVALID_MV };
372
519k
    }
373
980k
    if (ref.ref[1] > 0) {
374
88.8k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
88.8k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
88.8k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
61.3k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
88.8k
    }
379
380
    // top
381
980k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
980k
    unsigned max_rows = 0, n_rows = ~0;
383
980k
    const refmvs_block *b_top;
384
980k
    if (by4 > rt->tile_row.start) {
385
657k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
657k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
657k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
657k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
657k
                          &have_newmv, &have_row_mvs);
390
657k
    }
391
392
    // left
393
980k
    unsigned max_cols = 0, n_cols = ~0U;
394
980k
    refmvs_block *const *b_left;
395
980k
    if (bx4 > rt->tile_col.start) {
396
825k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
825k
        b_left = &rt->r[(by4 & 31) + 5];
398
825k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
825k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
825k
                          &have_newmv, &have_col_mvs);
401
825k
    }
402
403
    // top/right
404
980k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
399k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
328k
    {
407
328k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
328k
                              &have_newmv, &have_row_mvs);
409
328k
    }
410
411
980k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
980k
    const int nearest_cnt = *cnt;
413
2.18M
    for (int n = 0; n < nearest_cnt; n++)
414
1.20M
        mvstack[n].weight += 640;
415
416
    // temporal
417
980k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
980k
    if (rf->use_ref_frame_mvs) {
419
20.0k
        const ptrdiff_t stride = rf->rp_stride;
420
20.0k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
20.0k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
20.0k
        const refmvs_temporal_block *rb = rbi;
423
20.0k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
20.0k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
59.2k
        for (int y = 0; y < h8; y += step_v) {
426
95.3k
            for (int x = 0; x < w8; x+= step_h) {
427
56.1k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
56.1k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
56.1k
            }
430
39.1k
            rb += stride * step_v;
431
39.1k
        }
432
20.0k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
10.9k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
10.9k
            rb = &rbi[bh8 * stride];
435
10.9k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
10.9k
                                                    (by8 & ~7) + 8);
437
10.9k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
2.26k
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
2.26k
                                       NULL, NULL);
440
2.26k
            }
441
10.9k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
3.79k
                if (has_bottom) {
443
2.17k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
2.17k
                                           NULL, NULL);
445
2.17k
                }
446
3.79k
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
3.35k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
3.35k
                                           ref, NULL, NULL);
449
3.35k
                }
450
3.79k
            }
451
10.9k
        }
452
20.0k
    }
453
980k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
980k
    int have_dummy_newmv_match;
457
980k
    if ((n_rows | n_cols) != ~0U) {
458
563k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
563k
                              &have_dummy_newmv_match, &have_row_mvs);
460
563k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
2.94M
    for (int n = 2; n <= 3; n++) {
465
1.96M
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
524k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
524k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
524k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
524k
                               &have_dummy_newmv_match, &have_row_mvs);
470
524k
        }
471
472
1.96M
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
869k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
869k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
869k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
869k
                               &have_dummy_newmv_match, &have_col_mvs);
477
869k
        }
478
1.96M
    }
479
980k
    assert(*cnt <= 8);
480
481
980k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
980k
    int refmv_ctx, newmv_ctx;
485
980k
    switch (nearest_match) {
486
195k
    case 0:
487
195k
        refmv_ctx = imin(2, ref_match_count);
488
195k
        newmv_ctx = ref_match_count > 0;
489
195k
        break;
490
412k
    case 1:
491
412k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
412k
        newmv_ctx = 3 - have_newmv;
493
412k
        break;
494
375k
    case 2:
495
375k
        refmv_ctx = 5;
496
375k
        newmv_ctx = 5 - have_newmv;
497
375k
        break;
498
980k
    }
499
500
    // sorting (nearest, then "secondary")
501
982k
    int len = nearest_cnt;
502
1.98M
    while (len) {
503
999k
        int last = 0;
504
1.50M
        for (int n = 1; n < len; n++) {
505
502k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
287k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
223k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
223k
                last = n;
509
223k
            }
510
502k
        }
511
999k
        len = last;
512
999k
    }
513
982k
    len = *cnt;
514
1.56M
    while (len > nearest_cnt) {
515
578k
        int last = nearest_cnt;
516
882k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
303k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
63.9k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
63.9k
#undef EXCHANGE
520
63.9k
                last = n;
521
63.9k
            }
522
303k
        }
523
578k
        len = last;
524
578k
    }
525
526
982k
    if (ref.ref[1] > 0) {
527
88.8k
        if (*cnt < 2) {
528
70.2k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
70.2k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
70.2k
            const int sz4 = imin(w4, h4);
531
70.2k
            refmvs_candidate *const same = &mvstack[*cnt];
532
70.2k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
83.3k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
43.4k
                const refmvs_block *const cand_b = &b_top[x];
537
43.4k
                add_compound_extended_candidate(same, same_count, cand_b,
538
43.4k
                                                sign0, sign1, ref, rf->sign_bias);
539
43.4k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
43.4k
            }
541
542
            // non-self references in left
543
94.6k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
49.8k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
49.8k
                add_compound_extended_candidate(same, same_count, cand_b,
546
49.8k
                                                sign0, sign1, ref, rf->sign_bias);
547
49.8k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
49.8k
            }
549
550
70.2k
            refmvs_candidate *const diff = &same[2];
551
70.2k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
210k
            for (int n = 0; n < 2; n++) {
555
140k
                int m = same_count[n];
556
557
140k
                if (m >= 2) continue;
558
559
116k
                const int l = diff_count[n];
560
116k
                if (l) {
561
75.1k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
75.1k
                    if (++m == 2) continue;
563
27.4k
                    if (l == 2) {
564
20.4k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
20.4k
                        continue;
566
20.4k
                    }
567
27.4k
                }
568
85.3k
                do {
569
85.3k
                    same[m].mv.mv[n] = tgmv[n];
570
85.3k
                } while (++m < 2);
571
48.4k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
70.2k
            int n = *cnt;
576
70.2k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
20.6k
                mvstack[1].mv = mvstack[2].mv;
578
112k
            do {
579
112k
                mvstack[n].weight = 2;
580
112k
            } while (++n < 2);
581
70.2k
            *cnt = 2;
582
70.2k
        }
583
584
        // clamping
585
88.8k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
88.8k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
88.8k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
88.8k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
88.8k
        const int n_refmvs = *cnt;
591
88.8k
        int n = 0;
592
189k
        do {
593
189k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
189k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
189k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
189k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
189k
        } while (++n < n_refmvs);
598
599
88.8k
        switch (refmv_ctx >> 1) {
600
46.8k
        case 0:
601
46.8k
            *ctx = imin(newmv_ctx, 1);
602
46.8k
            break;
603
27.4k
        case 1:
604
27.4k
            *ctx = 1 + imin(newmv_ctx, 3);
605
27.4k
            break;
606
14.5k
        case 2:
607
14.5k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
14.5k
            break;
609
88.8k
        }
610
611
88.8k
        return;
612
894k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
268k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
268k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
348k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
177k
            const refmvs_block *const cand_b = &b_top[x];
619
177k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
177k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
177k
        }
622
623
        // non-self references in left
624
366k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
180k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
180k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
180k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
180k
        }
629
268k
    }
630
894k
    assert(*cnt <= 8);
631
632
    // clamping
633
894k
    int n_refmvs = *cnt;
634
894k
    if (n_refmvs) {
635
802k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
802k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
802k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
802k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
802k
        int n = 0;
641
2.00M
        do {
642
2.00M
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
2.00M
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
2.00M
        } while (++n < n_refmvs);
645
802k
    }
646
647
1.31M
    for (int n = *cnt; n < 2; n++)
648
421k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
894k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
894k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
251k
{
658
251k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
251k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
251k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
251k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
139k
        35 * 2 * rf->n_blocks : 0;
663
251k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
251k
    const int sbsz = rf->sbsz;
665
251k
    const int off = (sbsz * sby) & 16;
666
5.69M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
5.44M
        rt->r[off + 5 + i] = r;
668
251k
    rt->r[off + 0] = r;
669
251k
    r += r_stride;
670
251k
    rt->r[off + 1] = NULL;
671
251k
    rt->r[off + 2] = r;
672
251k
    r += r_stride;
673
251k
    rt->r[off + 3] = NULL;
674
251k
    rt->r[off + 4] = r;
675
251k
    if (sby & 1) {
676
173k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
57.8k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
57.8k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
57.8k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
57.8k
#undef EXCHANGE
681
57.8k
    }
682
683
251k
    rt->rf = rf;
684
251k
    rt->tile_row.start = tile_row_start4;
685
251k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
251k
    rt->tile_col.start = tile_col_start4;
687
251k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
251k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
12.7k
{
694
12.7k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
12.7k
    assert(row_start8 >= 0);
696
12.7k
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
12.7k
    row_end8 = imin(row_end8, rf->ih8);
698
12.7k
    const int col_start8i = imax(col_start8 - 8, 0);
699
12.7k
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
12.7k
    const ptrdiff_t stride = rf->rp_stride;
702
12.7k
    refmvs_temporal_block *rp_proj =
703
12.7k
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
82.6k
    for (int y = row_start8; y < row_end8; y++) {
705
237k
        for (int x = col_start8; x < col_end8; x++)
706
167k
            rp_proj[x].mv.n = INVALID_MV;
707
69.8k
        rp_proj += stride;
708
69.8k
    }
709
710
12.7k
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
25.4k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
12.6k
        const int ref2cur = rf->mfmv_ref2cur[n];
713
12.6k
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
10.6k
        const int ref = rf->mfmv_ref[n];
716
10.6k
        const int ref_sign = ref - 4;
717
10.6k
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
72.4k
        for (int y = row_start8; y < row_end8; y++) {
719
61.8k
            const int y_sb_align = y & ~7;
720
61.8k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
61.8k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
139k
            for (int x = col_start8i; x < col_end8i; x++) {
723
77.5k
                const refmvs_temporal_block *rb = &r[x];
724
77.5k
                const int b_ref = rb->ref;
725
77.5k
                if (!b_ref) continue;
726
32.8k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
32.8k
                if (!ref2ref) continue;
728
19.8k
                const mv b_mv = rb->mv;
729
19.8k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
19.8k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
19.8k
                                           offset.x ^ ref_sign);
732
19.8k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
19.8k
                                                 offset.y ^ ref_sign);
734
19.8k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
18.0k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
47.9k
                    for (;;) {
737
47.9k
                        const int x_sb_align = x & ~7;
738
47.9k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
47.2k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
46.1k
                        {
741
46.1k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
46.1k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
46.1k
                        }
744
47.9k
                        if (++x >= col_end8i) break;
745
33.6k
                        rb++;
746
33.6k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
29.8k
                        pos_x++;
748
29.8k
                    }
749
18.0k
                } else {
750
3.32k
                    for (;;) {
751
3.32k
                        if (++x >= col_end8i) break;
752
2.29k
                        rb++;
753
2.29k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
2.29k
                    }
755
1.76k
                }
756
19.8k
                x--;
757
19.8k
            }
758
61.8k
            r += stride;
759
61.8k
        }
760
10.6k
    }
761
12.7k
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
26.1k
{
769
178k
    for (int y = row_start8; y < row_end8; y++) {
770
152k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
350k
        for (int x = col_start8; x < col_end8;) {
773
198k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
198k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
198k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
19.7k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
15.6k
            {
779
71.4k
                for (int n = 0; n < bw8; n++, x++)
780
55.7k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
55.7k
                                                      .ref = cand_b->ref.ref[1] };
782
183k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
70.9k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
66.4k
            {
785
326k
                for (int n = 0; n < bw8; n++, x++)
786
260k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
260k
                                                      .ref = cand_b->ref.ref[0] };
788
116k
            } else {
789
623k
                for (int n = 0; n < bw8; n++, x++) {
790
507k
                    rp[x].mv.n = 0;
791
507k
                    rp[x].ref = 0; // "invalid"
792
507k
                }
793
116k
            }
794
198k
        }
795
152k
        rp += stride;
796
152k
    }
797
26.1k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
85.3k
{
808
85.3k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
85.3k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
85.3k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
85.3k
    rf->sbsz = 16 << seq_hdr->sb128;
813
85.3k
    rf->frm_hdr = frm_hdr;
814
85.3k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
85.3k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
85.3k
    rf->iw4 = rf->iw8 << 1;
817
85.3k
    rf->ih4 = rf->ih8 << 1;
818
85.3k
    rf->rp = rp;
819
85.3k
    rf->rp_stride = rp_stride;
820
85.3k
    rf->n_tile_threads = n_tile_threads;
821
85.3k
    rf->n_frame_threads = n_frame_threads;
822
823
85.3k
    if (n_blocks != rf->n_blocks) {
824
46.2k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
46.2k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
46.2k
        dav1d_free_aligned(rf->r);
829
46.2k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
46.2k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
46.2k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
46.2k
        rf->n_blocks = n_blocks;
837
46.2k
    }
838
839
85.3k
    const int poc = frm_hdr->frame_offset;
840
682k
    for (int i = 0; i < 7; i++) {
841
597k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
597k
                                          ref_poc[i], poc);
843
597k
        rf->sign_bias[i] = poc_diff > 0;
844
597k
        rf->mfmv_sign[i] = poc_diff < 0;
845
597k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
597k
                                            poc, ref_poc[i]), -31, 31);
847
597k
    }
848
849
    // temporal MV setup
850
85.3k
    rf->n_mfmvs = 0;
851
85.3k
    rf->rp_ref = rp_ref;
852
85.3k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
11.9k
        int total = 2;
854
11.9k
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
2.56k
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
2.56k
            total = 3;
857
2.56k
        }
858
11.9k
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
7.68k
                                      frm_hdr->frame_offset) > 0)
860
879
        {
861
879
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
879
        }
863
11.9k
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
6.30k
                                      frm_hdr->frame_offset) > 0)
865
607
        {
866
607
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
607
        }
868
11.9k
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
4.66k
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
4.66k
                         frm_hdr->frame_offset) > 0)
871
1.87k
        {
872
1.87k
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
1.87k
        }
874
11.9k
        if (rf->n_mfmvs < total && rp_ref[1])
875
6.15k
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
24.0k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
12.0k
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
12.0k
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
12.0k
                                           rpoc, frm_hdr->frame_offset);
881
12.0k
            if (abs(diff1) > 31) {
882
1.77k
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
10.3k
            } else {
884
10.3k
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
82.4k
                for (int m = 0; m < 7; m++) {
886
72.1k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
72.1k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
72.1k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
72.1k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
72.1k
                }
892
10.3k
            }
893
12.0k
        }
894
11.9k
    }
895
85.3k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
85.3k
    return 0;
898
85.3k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
1.86M
{
903
8.28M
    do {
904
8.28M
        refmvs_block *const r = *rr++ + bx4;
905
81.9M
        for (int x = 0; x < bw4; x++)
906
73.6M
            r[x] = *rmv;
907
8.28M
    } while (--bh4);
908
1.86M
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
76.7k
{
922
76.7k
    c->load_tmvs = load_tmvs_c;
923
76.7k
    c->save_tmvs = save_tmvs_c;
924
76.7k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
76.7k
}