Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
2.95M
{
47
2.95M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
2.50M
    if (ref.ref[1] == -1) {
50
2.77M
        for (int n = 0; n < 2; n++) {
51
2.54M
            if (b->ref.ref[n] == ref.ref[0]) {
52
2.06M
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
2.02M
                                   gmv[0] : b->mv.mv[n];
54
55
2.06M
                *have_refmv_match = 1;
56
2.06M
                *have_newmv_match |= b->mf >> 1;
57
58
2.06M
                const int last = *cnt;
59
3.81M
                for (int m = 0; m < last; m++)
60
2.37M
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
629k
                        mvstack[m].weight += weight;
62
629k
                        return;
63
629k
                    }
64
65
1.43M
                if (last < 8) {
66
1.43M
                    mvstack[last].mv.mv[0] = cand_mv;
67
1.43M
                    mvstack[last].weight = weight;
68
1.43M
                    *cnt = last + 1;
69
1.43M
                }
70
1.43M
                return;
71
2.06M
            }
72
2.54M
        }
73
2.28M
    } else if (b->ref.pair == ref.pair) {
74
74.0k
        const refmvs_mvpair cand_mv = { .mv = {
75
74.0k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
74.0k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
74.0k
        }};
78
79
74.0k
        *have_refmv_match = 1;
80
74.0k
        *have_newmv_match |= b->mf >> 1;
81
82
74.0k
        const int last = *cnt;
83
111k
        for (int n = 0; n < last; n++)
84
63.1k
            if (mvstack[n].mv.n == cand_mv.n) {
85
25.9k
                mvstack[n].weight += weight;
86
25.9k
                return;
87
25.9k
            }
88
89
48.1k
        if (last < 8) {
90
48.1k
            mvstack[last].mv = cand_mv;
91
48.1k
            mvstack[last].weight = weight;
92
48.1k
            *cnt = last + 1;
93
48.1k
        }
94
48.1k
    }
95
2.50M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
890k
{
103
890k
    const refmvs_block *cand_b = b;
104
890k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
890k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
890k
    int cand_bw4 = first_cand_b_dim[0];
107
890k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
890k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
782k
        const int weight = bw4 == 1 ? 2 :
115
782k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
782k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
782k
                              have_newmv_match, have_refmv_match);
118
782k
        return weight >> 1;
119
782k
    }
120
121
201k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
201k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
201k
                              have_newmv_match, have_refmv_match);
127
201k
        x += len;
128
201k
        if (x >= w4) return 1;
129
94.5k
        cand_b = &b[x];
130
94.5k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
94.5k
        assert(cand_bw4 < bw4);
132
94.5k
        len = imax(step, cand_bw4);
133
94.5k
    }
134
107k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
1.22M
{
142
1.22M
    const refmvs_block *cand_b = &b[0][bx4];
143
1.22M
    const enum BlockSize first_cand_bs = cand_b->bs;
144
1.22M
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
1.22M
    int cand_bh4 = first_cand_b_dim[1];
146
1.22M
    int len = imax(step, imin(bh4, cand_bh4));
147
148
1.22M
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
1.11M
        const int weight = bh4 == 1 ? 2 :
154
1.11M
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
1.11M
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
1.11M
                            have_newmv_match, have_refmv_match);
157
1.11M
        return weight >> 1;
158
1.11M
    }
159
160
215k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
215k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
215k
                              have_newmv_match, have_refmv_match);
166
215k
        y += len;
167
215k
        if (y >= h4) return 1;
168
100k
        cand_b = &b[y][bx4];
169
100k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
100k
        assert(cand_bh4 < bh4);
171
100k
        len = imax(step, cand_bh4);
172
100k
    }
173
115k
}
174
175
33.3k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
33.3k
    static const uint16_t div_mult[32] = {
177
33.3k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
33.3k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
33.3k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
33.3k
         682,   655,  630,  606,  585,  564,  546,  528
181
33.3k
    };
182
33.3k
    assert(den > 0 && den < 32);
183
33.3k
    assert(num > -32 && num < 32);
184
33.3k
    const int frac = num * div_mult[den];
185
33.3k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
33.3k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
33.3k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
33.3k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
33.3k
    };
191
33.3k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
34.0k
{
199
34.0k
    if (rb->mv.n == INVALID_MV) return;
200
201
17.1k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
17.1k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
17.1k
    const int last = *cnt;
205
17.1k
    if (ref.ref[1] == -1) {
206
10.9k
        if (globalmv_ctx)
207
3.37k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
17.3k
        for (int n = 0; n < last; n++)
210
14.4k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
8.06k
                mvstack[n].weight += 2;
212
8.06k
                return;
213
8.06k
            }
214
2.93k
        if (last < 8) {
215
2.92k
            mvstack[last].mv.mv[0] = mv;
216
2.92k
            mvstack[last].weight = 2;
217
2.92k
            *cnt = last + 1;
218
2.92k
        }
219
6.19k
    } else {
220
6.19k
        refmvs_mvpair mvp = { .mv = {
221
6.19k
            [0] = mv,
222
6.19k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
6.19k
        }};
224
6.19k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
8.21k
        for (int n = 0; n < last; n++)
227
6.88k
            if (mvstack[n].mv.n == mvp.n) {
228
4.86k
                mvstack[n].weight += 2;
229
4.86k
                return;
230
4.86k
            }
231
1.33k
        if (last < 8) {
232
1.33k
            mvstack[last].mv = mvp;
233
1.33k
            mvstack[last].weight = 2;
234
1.33k
            *cnt = last + 1;
235
1.33k
        }
236
1.33k
    }
237
17.1k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
59.5k
{
246
59.5k
    refmvs_candidate *const diff = &same[2];
247
59.5k
    int *const diff_count = &same_count[2];
248
249
154k
    for (int n = 0; n < 2; n++) {
250
117k
        const int cand_ref = cand_b->ref.ref[n];
251
252
117k
        if (cand_ref <= 0) break;
253
254
94.6k
        mv cand_mv = cand_b->mv.mv[n];
255
94.6k
        if (cand_ref == ref.ref[0]) {
256
32.7k
            if (same_count[0] < 2)
257
32.0k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
32.7k
            if (diff_count[1] < 2) {
259
28.7k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
859
                    cand_mv.y = -cand_mv.y;
261
859
                    cand_mv.x = -cand_mv.x;
262
859
                }
263
28.7k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
28.7k
            }
265
61.8k
        } else if (cand_ref == ref.ref[1]) {
266
33.8k
            if (same_count[1] < 2)
267
33.2k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
33.8k
            if (diff_count[0] < 2) {
269
28.4k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
920
                    cand_mv.y = -cand_mv.y;
271
920
                    cand_mv.x = -cand_mv.x;
272
920
                }
273
28.4k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
28.4k
            }
275
33.8k
        } else {
276
27.9k
            mv i_cand_mv = (union mv) {
277
27.9k
                .x = -cand_mv.x,
278
27.9k
                .y = -cand_mv.y
279
27.9k
            };
280
281
27.9k
            if (diff_count[0] < 2) {
282
22.4k
                diff[diff_count[0]++].mv.mv[0] =
283
22.4k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
22.0k
                    i_cand_mv : cand_mv;
285
22.4k
            }
286
287
27.9k
            if (diff_count[1] < 2) {
288
21.5k
                diff[diff_count[1]++].mv.mv[1] =
289
21.5k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
21.1k
                    i_cand_mv : cand_mv;
291
21.5k
            }
292
27.9k
        }
293
94.6k
    }
294
59.5k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
275k
{
300
555k
    for (int n = 0; n < 2; n++) {
301
544k
        const int cand_ref = cand_b->ref.ref[n];
302
303
544k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
279k
        mv cand_mv = cand_b->mv.mv[n];
311
279k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
908
            cand_mv.y = -cand_mv.y;
313
908
            cand_mv.x = -cand_mv.x;
314
908
        }
315
316
279k
        int m;
317
279k
        const int last = *cnt;
318
307k
        for (m = 0; m < last; m++)
319
260k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
232k
                break;
321
279k
        if (m == last) {
322
46.6k
            mvstack[m].mv.mv[0] = cand_mv;
323
46.6k
            mvstack[m].weight = 2; // "minimal"
324
46.6k
            *cnt = last + 1;
325
46.6k
        }
326
279k
    }
327
275k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
755k
{
355
755k
    const refmvs_frame *const rf = rt->rf;
356
755k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
755k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
755k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
755k
    mv gmv[2], tgmv[2];
360
361
755k
    *cnt = 0;
362
755k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
755k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
755k
    if (ref.ref[0] > 0) {
365
357k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
357k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
357k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
279k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
397k
    } else {
370
397k
        tgmv[0] = (mv) { .n = 0 };
371
397k
        gmv[0] = (mv) { .n = INVALID_MV };
372
397k
    }
373
755k
    if (ref.ref[1] > 0) {
374
60.9k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
60.9k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
60.9k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
43.6k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
60.9k
    }
379
380
    // top
381
755k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
755k
    unsigned max_rows = 0, n_rows = ~0;
383
755k
    const refmvs_block *b_top;
384
755k
    if (by4 > rt->tile_row.start) {
385
494k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
494k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
494k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
494k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
494k
                          &have_newmv, &have_row_mvs);
390
494k
    }
391
392
    // left
393
755k
    unsigned max_cols = 0, n_cols = ~0U;
394
755k
    refmvs_block *const *b_left;
395
755k
    if (bx4 > rt->tile_col.start) {
396
608k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
608k
        b_left = &rt->r[(by4 & 31) + 5];
398
608k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
608k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
608k
                          &have_newmv, &have_col_mvs);
401
608k
    }
402
403
    // top/right
404
755k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
303k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
238k
    {
407
238k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
238k
                              &have_newmv, &have_row_mvs);
409
238k
    }
410
411
755k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
755k
    const int nearest_cnt = *cnt;
413
1.65M
    for (int n = 0; n < nearest_cnt; n++)
414
895k
        mvstack[n].weight += 640;
415
416
    // temporal
417
755k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
755k
    if (rf->use_ref_frame_mvs) {
419
11.0k
        const ptrdiff_t stride = rf->rp_stride;
420
11.0k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
11.0k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
11.0k
        const refmvs_temporal_block *rb = rbi;
423
11.0k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
11.0k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
31.1k
        for (int y = 0; y < h8; y += step_v) {
426
49.7k
            for (int x = 0; x < w8; x+= step_h) {
427
29.7k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
29.7k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
29.7k
            }
430
20.0k
            rb += stride * step_v;
431
20.0k
        }
432
11.0k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
5.98k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
5.98k
            rb = &rbi[bh8 * stride];
435
5.98k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
5.98k
                                                    (by8 & ~7) + 8);
437
5.98k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
1.22k
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
1.22k
                                       NULL, NULL);
440
1.22k
            }
441
5.98k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
2.13k
                if (has_bottom) {
443
1.16k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
1.16k
                                           NULL, NULL);
445
1.16k
                }
446
2.13k
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
1.93k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
1.93k
                                           ref, NULL, NULL);
449
1.93k
                }
450
2.13k
            }
451
5.98k
        }
452
11.0k
    }
453
755k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
755k
    int have_dummy_newmv_match;
457
755k
    if ((n_rows | n_cols) != ~0U) {
458
405k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
405k
                              &have_dummy_newmv_match, &have_row_mvs);
460
405k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
2.26M
    for (int n = 2; n <= 3; n++) {
465
1.50M
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
395k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
395k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
395k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
395k
                               &have_dummy_newmv_match, &have_row_mvs);
470
395k
        }
471
472
1.50M
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
618k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
618k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
618k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
618k
                               &have_dummy_newmv_match, &have_col_mvs);
477
618k
        }
478
1.50M
    }
479
755k
    assert(*cnt <= 8);
480
481
755k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
755k
    int refmv_ctx, newmv_ctx;
485
755k
    switch (nearest_match) {
486
155k
    case 0:
487
155k
        refmv_ctx = imin(2, ref_match_count);
488
155k
        newmv_ctx = ref_match_count > 0;
489
155k
        break;
490
326k
    case 1:
491
326k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
326k
        newmv_ctx = 3 - have_newmv;
493
326k
        break;
494
273k
    case 2:
495
273k
        refmv_ctx = 5;
496
273k
        newmv_ctx = 5 - have_newmv;
497
273k
        break;
498
755k
    }
499
500
    // sorting (nearest, then "secondary")
501
754k
    int len = nearest_cnt;
502
1.50M
    while (len) {
503
752k
        int last = 0;
504
1.10M
        for (int n = 1; n < len; n++) {
505
356k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
188k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
159k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
159k
                last = n;
509
159k
            }
510
356k
        }
511
752k
        len = last;
512
752k
    }
513
754k
    len = *cnt;
514
1.17M
    while (len > nearest_cnt) {
515
418k
        int last = nearest_cnt;
516
626k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
208k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
29.1k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
29.1k
#undef EXCHANGE
520
29.1k
                last = n;
521
29.1k
            }
522
208k
        }
523
418k
        len = last;
524
418k
    }
525
526
754k
    if (ref.ref[1] > 0) {
527
60.9k
        if (*cnt < 2) {
528
49.1k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
49.1k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
49.1k
            const int sz4 = imin(w4, h4);
531
49.1k
            refmvs_candidate *const same = &mvstack[*cnt];
532
49.1k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
56.2k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
29.3k
                const refmvs_block *const cand_b = &b_top[x];
537
29.3k
                add_compound_extended_candidate(same, same_count, cand_b,
538
29.3k
                                                sign0, sign1, ref, rf->sign_bias);
539
29.3k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
29.3k
            }
541
542
            // non-self references in left
543
57.9k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
30.1k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
30.1k
                add_compound_extended_candidate(same, same_count, cand_b,
546
30.1k
                                                sign0, sign1, ref, rf->sign_bias);
547
30.1k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
30.1k
            }
549
550
49.1k
            refmvs_candidate *const diff = &same[2];
551
49.1k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
147k
            for (int n = 0; n < 2; n++) {
555
98.3k
                int m = same_count[n];
556
557
98.3k
                if (m >= 2) continue;
558
559
83.6k
                const int l = diff_count[n];
560
83.6k
                if (l) {
561
50.6k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
50.6k
                    if (++m == 2) continue;
563
18.1k
                    if (l == 2) {
564
12.9k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
12.9k
                        continue;
566
12.9k
                    }
567
18.1k
                }
568
67.8k
                do {
569
67.8k
                    same[m].mv.mv[n] = tgmv[n];
570
67.8k
                } while (++m < 2);
571
38.2k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
49.1k
            int n = *cnt;
576
49.1k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
14.0k
                mvstack[1].mv = mvstack[2].mv;
578
80.0k
            do {
579
80.0k
                mvstack[n].weight = 2;
580
80.0k
            } while (++n < 2);
581
49.1k
            *cnt = 2;
582
49.1k
        }
583
584
        // clamping
585
60.9k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
60.9k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
60.9k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
60.9k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
60.9k
        const int n_refmvs = *cnt;
591
60.9k
        int n = 0;
592
129k
        do {
593
129k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
129k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
129k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
129k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
129k
        } while (++n < n_refmvs);
598
599
60.9k
        switch (refmv_ctx >> 1) {
600
33.3k
        case 0:
601
33.3k
            *ctx = imin(newmv_ctx, 1);
602
33.3k
            break;
603
18.3k
        case 1:
604
18.3k
            *ctx = 1 + imin(newmv_ctx, 3);
605
18.3k
            break;
606
9.20k
        case 2:
607
9.20k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
9.20k
            break;
609
60.9k
        }
610
611
60.9k
        return;
612
693k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
220k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
220k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
267k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
136k
            const refmvs_block *const cand_b = &b_top[x];
619
136k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
136k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
136k
        }
622
623
        // non-self references in left
624
281k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
139k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
139k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
139k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
139k
        }
629
220k
    }
630
693k
    assert(*cnt <= 8);
631
632
    // clamping
633
693k
    int n_refmvs = *cnt;
634
693k
    if (n_refmvs) {
635
615k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
615k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
615k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
615k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
615k
        int n = 0;
641
1.48M
        do {
642
1.48M
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
1.48M
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
1.48M
        } while (++n < n_refmvs);
645
615k
    }
646
647
1.04M
    for (int n = *cnt; n < 2; n++)
648
350k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
693k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
693k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
231k
{
658
231k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
231k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
231k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
231k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
128k
        35 * 2 * rf->n_blocks : 0;
663
231k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
231k
    const int sbsz = rf->sbsz;
665
231k
    const int off = (sbsz * sby) & 16;
666
5.38M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
5.14M
        rt->r[off + 5 + i] = r;
668
231k
    rt->r[off + 0] = r;
669
231k
    r += r_stride;
670
231k
    rt->r[off + 1] = NULL;
671
231k
    rt->r[off + 2] = r;
672
231k
    r += r_stride;
673
231k
    rt->r[off + 3] = NULL;
674
231k
    rt->r[off + 4] = r;
675
231k
    if (sby & 1) {
676
166k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
55.3k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
55.3k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
55.3k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
55.3k
#undef EXCHANGE
681
55.3k
    }
682
683
231k
    rt->rf = rf;
684
231k
    rt->tile_row.start = tile_row_start4;
685
231k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
231k
    rt->tile_col.start = tile_col_start4;
687
231k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
231k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
7.54k
{
694
7.54k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
7.54k
    assert(row_start8 >= 0);
696
7.54k
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
7.54k
    row_end8 = imin(row_end8, rf->ih8);
698
7.54k
    const int col_start8i = imax(col_start8 - 8, 0);
699
7.54k
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
7.54k
    const ptrdiff_t stride = rf->rp_stride;
702
7.54k
    refmvs_temporal_block *rp_proj =
703
7.54k
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
47.7k
    for (int y = row_start8; y < row_end8; y++) {
705
169k
        for (int x = col_start8; x < col_end8; x++)
706
128k
            rp_proj[x].mv.n = INVALID_MV;
707
40.1k
        rp_proj += stride;
708
40.1k
    }
709
710
7.54k
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
13.9k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
6.38k
        const int ref2cur = rf->mfmv_ref2cur[n];
713
6.38k
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
4.84k
        const int ref = rf->mfmv_ref[n];
716
4.84k
        const int ref_sign = ref - 4;
717
4.84k
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
31.9k
        for (int y = row_start8; y < row_end8; y++) {
719
27.1k
            const int y_sb_align = y & ~7;
720
27.1k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
27.1k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
63.3k
            for (int x = col_start8i; x < col_end8i; x++) {
723
36.2k
                const refmvs_temporal_block *rb = &r[x];
724
36.2k
                const int b_ref = rb->ref;
725
36.2k
                if (!b_ref) continue;
726
17.0k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
17.0k
                if (!ref2ref) continue;
728
9.98k
                const mv b_mv = rb->mv;
729
9.98k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
9.98k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
9.98k
                                           offset.x ^ ref_sign);
732
9.98k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
9.98k
                                                 offset.y ^ ref_sign);
734
9.98k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
8.82k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
29.4k
                    for (;;) {
737
29.4k
                        const int x_sb_align = x & ~7;
738
29.4k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
28.5k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
28.0k
                        {
741
28.0k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
28.0k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
28.0k
                        }
744
29.4k
                        if (++x >= col_end8i) break;
745
22.3k
                        rb++;
746
22.3k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
20.6k
                        pos_x++;
748
20.6k
                    }
749
8.82k
                } else {
750
2.66k
                    for (;;) {
751
2.66k
                        if (++x >= col_end8i) break;
752
1.95k
                        rb++;
753
1.95k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
1.95k
                    }
755
1.15k
                }
756
9.98k
                x--;
757
9.98k
            }
758
27.1k
            r += stride;
759
27.1k
        }
760
4.84k
    }
761
7.54k
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
21.3k
{
769
155k
    for (int y = row_start8; y < row_end8; y++) {
770
133k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
307k
        for (int x = col_start8; x < col_end8;) {
773
174k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
174k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
174k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
15.0k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
12.0k
            {
779
46.9k
                for (int n = 0; n < bw8; n++, x++)
780
34.9k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
34.9k
                                                      .ref = cand_b->ref.ref[1] };
782
162k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
64.0k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
60.3k
            {
785
367k
                for (int n = 0; n < bw8; n++, x++)
786
307k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
307k
                                                      .ref = cand_b->ref.ref[0] };
788
101k
            } else {
789
539k
                for (int n = 0; n < bw8; n++, x++) {
790
437k
                    rp[x].mv.n = 0;
791
437k
                    rp[x].ref = 0; // "invalid"
792
437k
                }
793
101k
            }
794
174k
        }
795
133k
        rp += stride;
796
133k
    }
797
21.3k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
76.4k
{
808
76.4k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
76.4k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
76.4k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
76.4k
    rf->sbsz = 16 << seq_hdr->sb128;
813
76.4k
    rf->frm_hdr = frm_hdr;
814
76.4k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
76.4k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
76.4k
    rf->iw4 = rf->iw8 << 1;
817
76.4k
    rf->ih4 = rf->ih8 << 1;
818
76.4k
    rf->rp = rp;
819
76.4k
    rf->rp_stride = rp_stride;
820
76.4k
    rf->n_tile_threads = n_tile_threads;
821
76.4k
    rf->n_frame_threads = n_frame_threads;
822
823
76.4k
    if (n_blocks != rf->n_blocks) {
824
40.5k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
40.5k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
40.5k
        dav1d_free_aligned(rf->r);
829
40.5k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
40.5k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
40.5k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
40.5k
        rf->n_blocks = n_blocks;
837
40.5k
    }
838
839
76.4k
    const int poc = frm_hdr->frame_offset;
840
611k
    for (int i = 0; i < 7; i++) {
841
534k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
534k
                                          ref_poc[i], poc);
843
534k
        rf->sign_bias[i] = poc_diff > 0;
844
534k
        rf->mfmv_sign[i] = poc_diff < 0;
845
534k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
534k
                                            poc, ref_poc[i]), -31, 31);
847
534k
    }
848
849
    // temporal MV setup
850
76.4k
    rf->n_mfmvs = 0;
851
76.4k
    rf->rp_ref = rp_ref;
852
76.4k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
7.17k
        int total = 2;
854
7.17k
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
1.52k
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
1.52k
            total = 3;
857
1.52k
        }
858
7.17k
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
3.19k
                                      frm_hdr->frame_offset) > 0)
860
577
        {
861
577
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
577
        }
863
7.17k
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
2.97k
                                      frm_hdr->frame_offset) > 0)
865
452
        {
866
452
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
452
        }
868
7.17k
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
2.82k
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
2.82k
                         frm_hdr->frame_offset) > 0)
871
1.31k
        {
872
1.31k
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
1.31k
        }
874
7.17k
        if (rf->n_mfmvs < total && rp_ref[1])
875
2.21k
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
13.2k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
6.08k
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
6.08k
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
6.08k
                                           rpoc, frm_hdr->frame_offset);
881
6.08k
            if (abs(diff1) > 31) {
882
1.40k
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
4.67k
            } else {
884
4.67k
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
37.3k
                for (int m = 0; m < 7; m++) {
886
32.7k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
32.7k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
32.7k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
32.7k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
32.7k
                }
892
4.67k
            }
893
6.08k
        }
894
7.17k
    }
895
76.4k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
76.4k
    return 0;
898
76.4k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
1.41M
{
903
5.97M
    do {
904
5.97M
        refmvs_block *const r = *rr++ + bx4;
905
58.9M
        for (int x = 0; x < bw4; x++)
906
52.9M
            r[x] = *rmv;
907
5.97M
    } while (--bh4);
908
1.41M
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
65.3k
{
922
65.3k
    c->load_tmvs = load_tmvs_c;
923
65.3k
    c->save_tmvs = save_tmvs_c;
924
65.3k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
65.3k
}