Coverage Report

Created: 2026-06-10 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
1.37M
{
47
1.37M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
1.21M
    if (ref.ref[1] == -1) {
50
1.36M
        for (int n = 0; n < 2; n++) {
51
1.22M
            if (b->ref.ref[n] == ref.ref[0]) {
52
944k
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
924k
                                   gmv[0] : b->mv.mv[n];
54
55
944k
                *have_refmv_match = 1;
56
944k
                *have_newmv_match |= b->mf >> 1;
57
58
944k
                const int last = *cnt;
59
1.83M
                for (int m = 0; m < last; m++)
60
1.08M
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
192k
                        mvstack[m].weight += weight;
62
192k
                        return;
63
192k
                    }
64
65
752k
                if (last < 8) {
66
752k
                    mvstack[last].mv.mv[0] = cand_mv;
67
752k
                    mvstack[last].weight = weight;
68
752k
                    *cnt = last + 1;
69
752k
                }
70
751k
                return;
71
944k
            }
72
1.22M
        }
73
1.07M
    } else if (b->ref.pair == ref.pair) {
74
47.7k
        const refmvs_mvpair cand_mv = { .mv = {
75
47.7k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
47.7k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
47.7k
        }};
78
79
47.7k
        *have_refmv_match = 1;
80
47.7k
        *have_newmv_match |= b->mf >> 1;
81
82
47.7k
        const int last = *cnt;
83
70.8k
        for (int n = 0; n < last; n++)
84
40.4k
            if (mvstack[n].mv.n == cand_mv.n) {
85
17.3k
                mvstack[n].weight += weight;
86
17.3k
                return;
87
17.3k
            }
88
89
30.4k
        if (last < 8) {
90
30.4k
            mvstack[last].mv = cand_mv;
91
30.4k
            mvstack[last].weight = weight;
92
30.4k
            *cnt = last + 1;
93
30.4k
        }
94
30.4k
    }
95
1.21M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
355k
{
103
355k
    const refmvs_block *cand_b = b;
104
355k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
355k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
355k
    int cand_bw4 = first_cand_b_dim[0];
107
355k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
355k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
314k
        const int weight = bw4 == 1 ? 2 :
115
314k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
314k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
314k
                              have_newmv_match, have_refmv_match);
118
314k
        return weight >> 1;
119
314k
    }
120
121
75.2k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
75.2k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
75.2k
                              have_newmv_match, have_refmv_match);
127
75.2k
        x += len;
128
75.2k
        if (x >= w4) return 1;
129
34.1k
        cand_b = &b[x];
130
34.1k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
34.1k
        assert(cand_bw4 < bw4);
132
34.1k
        len = imax(step, cand_bw4);
133
34.1k
    }
134
41.0k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
660k
{
142
660k
    const refmvs_block *cand_b = &b[0][bx4];
143
660k
    const enum BlockSize first_cand_bs = cand_b->bs;
144
660k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
660k
    int cand_bh4 = first_cand_b_dim[1];
146
660k
    int len = imax(step, imin(bh4, cand_bh4));
147
148
660k
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
612k
        const int weight = bh4 == 1 ? 2 :
154
612k
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
612k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
612k
                            have_newmv_match, have_refmv_match);
157
612k
        return weight >> 1;
158
612k
    }
159
160
84.3k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
84.3k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
84.3k
                              have_newmv_match, have_refmv_match);
166
84.3k
        y += len;
167
84.3k
        if (y >= h4) return 1;
168
37.1k
        cand_b = &b[y][bx4];
169
37.1k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
37.1k
        assert(cand_bh4 < bh4);
171
37.1k
        len = imax(step, cand_bh4);
172
37.1k
    }
173
47.7k
}
174
175
26.9k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
26.9k
    static const uint16_t div_mult[32] = {
177
26.9k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
26.9k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
26.9k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
26.9k
         682,   655,  630,  606,  585,  564,  546,  528
181
26.9k
    };
182
26.9k
    assert(den > 0 && den < 32);
183
26.9k
    assert(num > -32 && num < 32);
184
26.9k
    const int frac = num * div_mult[den];
185
26.9k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
26.9k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
26.9k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
26.9k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
26.9k
    };
191
26.9k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
22.4k
{
199
22.4k
    if (rb->mv.n == INVALID_MV) return;
200
201
15.7k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
15.7k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
15.7k
    const int last = *cnt;
205
15.7k
    if (ref.ref[1] == -1) {
206
9.41k
        if (globalmv_ctx)
207
2.59k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
15.5k
        for (int n = 0; n < last; n++)
210
13.6k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
7.54k
                mvstack[n].weight += 2;
212
7.54k
                return;
213
7.54k
            }
214
1.86k
        if (last < 8) {
215
1.86k
            mvstack[last].mv.mv[0] = mv;
216
1.86k
            mvstack[last].weight = 2;
217
1.86k
            *cnt = last + 1;
218
1.86k
        }
219
6.29k
    } else {
220
6.29k
        refmvs_mvpair mvp = { .mv = {
221
6.29k
            [0] = mv,
222
6.29k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
6.29k
        }};
224
6.29k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
8.74k
        for (int n = 0; n < last; n++)
227
7.48k
            if (mvstack[n].mv.n == mvp.n) {
228
5.03k
                mvstack[n].weight += 2;
229
5.03k
                return;
230
5.03k
            }
231
1.26k
        if (last < 8) {
232
1.26k
            mvstack[last].mv = mvp;
233
1.26k
            mvstack[last].weight = 2;
234
1.26k
            *cnt = last + 1;
235
1.26k
        }
236
1.26k
    }
237
15.7k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
38.8k
{
246
38.8k
    refmvs_candidate *const diff = &same[2];
247
38.8k
    int *const diff_count = &same_count[2];
248
249
100k
    for (int n = 0; n < 2; n++) {
250
76.5k
        const int cand_ref = cand_b->ref.ref[n];
251
252
76.5k
        if (cand_ref <= 0) break;
253
254
61.7k
        mv cand_mv = cand_b->mv.mv[n];
255
61.7k
        if (cand_ref == ref.ref[0]) {
256
21.3k
            if (same_count[0] < 2)
257
20.6k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
21.3k
            if (diff_count[1] < 2) {
259
18.4k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
1.09k
                    cand_mv.y = -cand_mv.y;
261
1.09k
                    cand_mv.x = -cand_mv.x;
262
1.09k
                }
263
18.4k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
18.4k
            }
265
40.4k
        } else if (cand_ref == ref.ref[1]) {
266
22.4k
            if (same_count[1] < 2)
267
21.9k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
22.4k
            if (diff_count[0] < 2) {
269
18.6k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
1.10k
                    cand_mv.y = -cand_mv.y;
271
1.10k
                    cand_mv.x = -cand_mv.x;
272
1.10k
                }
273
18.6k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
18.6k
            }
275
22.4k
        } else {
276
17.9k
            mv i_cand_mv = (union mv) {
277
17.9k
                .x = -cand_mv.x,
278
17.9k
                .y = -cand_mv.y
279
17.9k
            };
280
281
17.9k
            if (diff_count[0] < 2) {
282
14.4k
                diff[diff_count[0]++].mv.mv[0] =
283
14.4k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
14.0k
                    i_cand_mv : cand_mv;
285
14.4k
            }
286
287
17.9k
            if (diff_count[1] < 2) {
288
13.8k
                diff[diff_count[1]++].mv.mv[1] =
289
13.8k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
13.3k
                    i_cand_mv : cand_mv;
291
13.8k
            }
292
17.9k
        }
293
61.7k
    }
294
38.8k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
104k
{
300
210k
    for (int n = 0; n < 2; n++) {
301
204k
        const int cand_ref = cand_b->ref.ref[n];
302
303
204k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
106k
        mv cand_mv = cand_b->mv.mv[n];
311
106k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
721
            cand_mv.y = -cand_mv.y;
313
721
            cand_mv.x = -cand_mv.x;
314
721
        }
315
316
106k
        int m;
317
106k
        const int last = *cnt;
318
122k
        for (m = 0; m < last; m++)
319
95.7k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
79.4k
                break;
321
106k
        if (m == last) {
322
27.1k
            mvstack[m].mv.mv[0] = cand_mv;
323
27.1k
            mvstack[m].weight = 2; // "minimal"
324
27.1k
            *cnt = last + 1;
325
27.1k
        }
326
106k
    }
327
104k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
374k
{
355
374k
    const refmvs_frame *const rf = rt->rf;
356
374k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
374k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
374k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
374k
    mv gmv[2], tgmv[2];
360
361
374k
    *cnt = 0;
362
374k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
374k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
374k
    if (ref.ref[0] > 0) {
365
153k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
153k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
153k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
108k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
220k
    } else {
370
220k
        tgmv[0] = (mv) { .n = 0 };
371
220k
        gmv[0] = (mv) { .n = INVALID_MV };
372
220k
    }
373
374k
    if (ref.ref[1] > 0) {
374
32.9k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
32.9k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
32.9k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
25.5k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
32.9k
    }
379
380
    // top
381
374k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
374k
    unsigned max_rows = 0, n_rows = ~0;
383
374k
    const refmvs_block *b_top;
384
374k
    if (by4 > rt->tile_row.start) {
385
219k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
219k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
219k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
219k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
219k
                          &have_newmv, &have_row_mvs);
390
219k
    }
391
392
    // left
393
374k
    unsigned max_cols = 0, n_cols = ~0U;
394
374k
    refmvs_block *const *b_left;
395
374k
    if (bx4 > rt->tile_col.start) {
396
330k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
330k
        b_left = &rt->r[(by4 & 31) + 5];
398
330k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
330k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
330k
                          &have_newmv, &have_col_mvs);
401
330k
    }
402
403
    // top/right
404
374k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
126k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
103k
    {
407
103k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
103k
                              &have_newmv, &have_row_mvs);
409
103k
    }
410
411
374k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
374k
    const int nearest_cnt = *cnt;
413
844k
    for (int n = 0; n < nearest_cnt; n++)
414
469k
        mvstack[n].weight += 640;
415
416
    // temporal
417
374k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
374k
    if (rf->use_ref_frame_mvs) {
419
6.55k
        const ptrdiff_t stride = rf->rp_stride;
420
6.55k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
6.55k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
6.55k
        const refmvs_temporal_block *rb = rbi;
423
6.55k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
6.55k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
18.0k
        for (int y = 0; y < h8; y += step_v) {
426
28.9k
            for (int x = 0; x < w8; x+= step_h) {
427
17.4k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
17.4k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
17.4k
            }
430
11.4k
            rb += stride * step_v;
431
11.4k
        }
432
6.55k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
3.80k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
3.80k
            rb = &rbi[bh8 * stride];
435
3.80k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
3.80k
                                                    (by8 & ~7) + 8);
437
3.80k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
1.51k
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
1.51k
                                       NULL, NULL);
440
1.51k
            }
441
3.80k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
2.08k
                if (has_bottom) {
443
1.41k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
1.41k
                                           NULL, NULL);
445
1.41k
                }
446
2.08k
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
2.01k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
2.01k
                                           ref, NULL, NULL);
449
2.01k
                }
450
2.08k
            }
451
3.80k
        }
452
6.55k
    }
453
374k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
374k
    int have_dummy_newmv_match;
457
374k
    if ((n_rows | n_cols) != ~0U) {
458
185k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
185k
                              &have_dummy_newmv_match, &have_row_mvs);
460
185k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
1.12M
    for (int n = 2; n <= 3; n++) {
465
751k
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
136k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
136k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
136k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
136k
                               &have_dummy_newmv_match, &have_row_mvs);
470
136k
        }
471
472
751k
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
332k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
332k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
332k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
332k
                               &have_dummy_newmv_match, &have_col_mvs);
477
332k
        }
478
751k
    }
479
374k
    assert(*cnt <= 8);
480
481
374k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
374k
    int refmv_ctx, newmv_ctx;
485
374k
    switch (nearest_match) {
486
62.4k
    case 0:
487
62.4k
        refmv_ctx = imin(2, ref_match_count);
488
62.4k
        newmv_ctx = ref_match_count > 0;
489
62.4k
        break;
490
188k
    case 1:
491
188k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
188k
        newmv_ctx = 3 - have_newmv;
493
188k
        break;
494
126k
    case 2:
495
126k
        refmv_ctx = 5;
496
126k
        newmv_ctx = 5 - have_newmv;
497
126k
        break;
498
374k
    }
499
500
    // sorting (nearest, then "secondary")
501
376k
    int len = nearest_cnt;
502
782k
    while (len) {
503
405k
        int last = 0;
504
603k
        for (int n = 1; n < len; n++) {
505
197k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
99.5k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
94.1k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
94.1k
                last = n;
509
94.1k
            }
510
197k
        }
511
405k
        len = last;
512
405k
    }
513
376k
    len = *cnt;
514
607k
    while (len > nearest_cnt) {
515
230k
        int last = nearest_cnt;
516
328k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
97.9k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
5.44k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
5.44k
#undef EXCHANGE
520
5.44k
                last = n;
521
5.44k
            }
522
97.9k
        }
523
230k
        len = last;
524
230k
    }
525
526
376k
    if (ref.ref[1] > 0) {
527
32.8k
        if (*cnt < 2) {
528
25.3k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
25.3k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
25.3k
            const int sz4 = imin(w4, h4);
531
25.3k
            refmvs_candidate *const same = &mvstack[*cnt];
532
25.3k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
37.1k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
19.5k
                const refmvs_block *const cand_b = &b_top[x];
537
19.5k
                add_compound_extended_candidate(same, same_count, cand_b,
538
19.5k
                                                sign0, sign1, ref, rf->sign_bias);
539
19.5k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
19.5k
            }
541
542
            // non-self references in left
543
37.0k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
19.2k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
19.2k
                add_compound_extended_candidate(same, same_count, cand_b,
546
19.2k
                                                sign0, sign1, ref, rf->sign_bias);
547
19.2k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
19.2k
            }
549
550
25.3k
            refmvs_candidate *const diff = &same[2];
551
25.3k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
76.0k
            for (int n = 0; n < 2; n++) {
555
50.7k
                int m = same_count[n];
556
557
50.7k
                if (m >= 2) continue;
558
559
40.6k
                const int l = diff_count[n];
560
40.6k
                if (l) {
561
31.9k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
31.9k
                    if (++m == 2) continue;
563
11.4k
                    if (l == 2) {
564
8.32k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
8.32k
                        continue;
566
8.32k
                    }
567
11.4k
                }
568
18.5k
                do {
569
18.5k
                    same[m].mv.mv[n] = tgmv[n];
570
18.5k
                } while (++m < 2);
571
11.8k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
25.3k
            int n = *cnt;
576
25.3k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
8.87k
                mvstack[1].mv = mvstack[2].mv;
578
38.7k
            do {
579
38.7k
                mvstack[n].weight = 2;
580
38.7k
            } while (++n < 2);
581
25.3k
            *cnt = 2;
582
25.3k
        }
583
584
        // clamping
585
32.8k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
32.8k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
32.8k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
32.8k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
32.8k
        const int n_refmvs = *cnt;
591
32.8k
        int n = 0;
592
70.4k
        do {
593
70.4k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
70.4k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
70.4k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
70.4k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
70.4k
        } while (++n < n_refmvs);
598
599
32.8k
        switch (refmv_ctx >> 1) {
600
15.2k
        case 0:
601
15.2k
            *ctx = imin(newmv_ctx, 1);
602
15.2k
            break;
603
11.6k
        case 1:
604
11.6k
            *ctx = 1 + imin(newmv_ctx, 3);
605
11.6k
            break;
606
6.03k
        case 2:
607
6.03k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
6.03k
            break;
609
32.8k
        }
610
611
32.8k
        return;
612
343k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
81.9k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
81.9k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
107k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
54.9k
            const refmvs_block *const cand_b = &b_top[x];
619
54.9k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
54.9k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
54.9k
        }
622
623
        // non-self references in left
624
102k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
49.2k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
49.2k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
49.2k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
49.2k
        }
629
81.9k
    }
630
343k
    assert(*cnt <= 8);
631
632
    // clamping
633
343k
    int n_refmvs = *cnt;
634
343k
    if (n_refmvs) {
635
316k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
316k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
316k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
316k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
316k
        int n = 0;
641
786k
        do {
642
786k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
786k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
786k
        } while (++n < n_refmvs);
645
316k
    }
646
647
472k
    for (int n = *cnt; n < 2; n++)
648
128k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
343k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
343k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
62.6k
{
658
62.6k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
62.6k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
62.6k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
62.6k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
38.7k
        35 * 2 * rf->n_blocks : 0;
663
62.6k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
62.6k
    const int sbsz = rf->sbsz;
665
62.6k
    const int off = (sbsz * sby) & 16;
666
1.37M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
1.31M
        rt->r[off + 5 + i] = r;
668
62.6k
    rt->r[off + 0] = r;
669
62.6k
    r += r_stride;
670
62.6k
    rt->r[off + 1] = NULL;
671
62.6k
    rt->r[off + 2] = r;
672
62.6k
    r += r_stride;
673
62.6k
    rt->r[off + 3] = NULL;
674
62.6k
    rt->r[off + 4] = r;
675
62.6k
    if (sby & 1) {
676
55.4k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
18.4k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
18.4k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
18.4k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
18.4k
#undef EXCHANGE
681
18.4k
    }
682
683
62.6k
    rt->rf = rf;
684
62.6k
    rt->tile_row.start = tile_row_start4;
685
62.6k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
62.6k
    rt->tile_col.start = tile_col_start4;
687
62.6k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
62.6k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
2.78k
{
694
2.78k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
2.78k
    assert(row_start8 >= 0);
696
2.78k
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
2.78k
    row_end8 = imin(row_end8, rf->ih8);
698
2.78k
    const int col_start8i = imax(col_start8 - 8, 0);
699
2.78k
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
2.78k
    const ptrdiff_t stride = rf->rp_stride;
702
2.78k
    refmvs_temporal_block *rp_proj =
703
2.78k
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
18.1k
    for (int y = row_start8; y < row_end8; y++) {
705
54.2k
        for (int x = col_start8; x < col_end8; x++)
706
38.8k
            rp_proj[x].mv.n = INVALID_MV;
707
15.4k
        rp_proj += stride;
708
15.4k
    }
709
710
2.78k
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
4.74k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
1.95k
        const int ref2cur = rf->mfmv_ref2cur[n];
713
1.95k
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
1.60k
        const int ref = rf->mfmv_ref[n];
716
1.60k
        const int ref_sign = ref - 4;
717
1.60k
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
11.1k
        for (int y = row_start8; y < row_end8; y++) {
719
9.50k
            const int y_sb_align = y & ~7;
720
9.50k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
9.50k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
26.3k
            for (int x = col_start8i; x < col_end8i; x++) {
723
16.8k
                const refmvs_temporal_block *rb = &r[x];
724
16.8k
                const int b_ref = rb->ref;
725
16.8k
                if (!b_ref) continue;
726
9.56k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
9.56k
                if (!ref2ref) continue;
728
4.91k
                const mv b_mv = rb->mv;
729
4.91k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
4.91k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
4.91k
                                           offset.x ^ ref_sign);
732
4.91k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
4.91k
                                                 offset.y ^ ref_sign);
734
4.91k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
4.43k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
24.0k
                    for (;;) {
737
24.0k
                        const int x_sb_align = x & ~7;
738
24.0k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
23.8k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
23.5k
                        {
741
23.5k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
23.5k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
23.5k
                        }
744
24.0k
                        if (++x >= col_end8i) break;
745
20.8k
                        rb++;
746
20.8k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
19.6k
                        pos_x++;
748
19.6k
                    }
749
4.43k
                } else {
750
1.17k
                    for (;;) {
751
1.17k
                        if (++x >= col_end8i) break;
752
857
                        rb++;
753
857
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
857
                    }
755
476
                }
756
4.91k
                x--;
757
4.91k
            }
758
9.50k
            r += stride;
759
9.50k
        }
760
1.60k
    }
761
2.78k
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
9.24k
{
769
63.7k
    for (int y = row_start8; y < row_end8; y++) {
770
54.5k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
128k
        for (int x = col_start8; x < col_end8;) {
773
73.5k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
73.5k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
73.5k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
7.94k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
7.08k
            {
779
19.5k
                for (int n = 0; n < bw8; n++, x++)
780
12.4k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
12.4k
                                                      .ref = cand_b->ref.ref[1] };
782
66.4k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
26.8k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
25.6k
            {
785
96.9k
                for (int n = 0; n < bw8; n++, x++)
786
71.2k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
71.2k
                                                      .ref = cand_b->ref.ref[0] };
788
40.7k
            } else {
789
137k
                for (int n = 0; n < bw8; n++, x++) {
790
96.5k
                    rp[x].mv.n = 0;
791
96.5k
                    rp[x].ref = 0; // "invalid"
792
96.5k
                }
793
40.7k
            }
794
73.5k
        }
795
54.5k
        rp += stride;
796
54.5k
    }
797
9.24k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
20.1k
{
808
20.1k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
20.1k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
20.1k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
20.1k
    rf->sbsz = 16 << seq_hdr->sb128;
813
20.1k
    rf->frm_hdr = frm_hdr;
814
20.1k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
20.1k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
20.1k
    rf->iw4 = rf->iw8 << 1;
817
20.1k
    rf->ih4 = rf->ih8 << 1;
818
20.1k
    rf->rp = rp;
819
20.1k
    rf->rp_stride = rp_stride;
820
20.1k
    rf->n_tile_threads = n_tile_threads;
821
20.1k
    rf->n_frame_threads = n_frame_threads;
822
823
20.1k
    if (n_blocks != rf->n_blocks) {
824
20.0k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
20.0k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
20.0k
        dav1d_free_aligned(rf->r);
829
20.0k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
20.0k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
20.0k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
20.0k
        rf->n_blocks = n_blocks;
837
20.0k
    }
838
839
20.1k
    const int poc = frm_hdr->frame_offset;
840
161k
    for (int i = 0; i < 7; i++) {
841
140k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
140k
                                          ref_poc[i], poc);
843
140k
        rf->sign_bias[i] = poc_diff > 0;
844
140k
        rf->mfmv_sign[i] = poc_diff < 0;
845
140k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
140k
                                            poc, ref_poc[i]), -31, 31);
847
140k
    }
848
849
    // temporal MV setup
850
20.1k
    rf->n_mfmvs = 0;
851
20.1k
    rf->rp_ref = rp_ref;
852
20.1k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
2.58k
        int total = 2;
854
2.58k
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
343
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
343
            total = 3;
857
343
        }
858
2.58k
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
580
                                      frm_hdr->frame_offset) > 0)
860
348
        {
861
348
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
348
        }
863
2.58k
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
574
                                      frm_hdr->frame_offset) > 0)
865
342
        {
866
342
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
342
        }
868
2.58k
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
360
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
360
                         frm_hdr->frame_offset) > 0)
871
179
        {
872
179
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
179
        }
874
2.58k
        if (rf->n_mfmvs < total && rp_ref[1])
875
682
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
4.47k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
1.89k
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
1.89k
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
1.89k
                                           rpoc, frm_hdr->frame_offset);
881
1.89k
            if (abs(diff1) > 31) {
882
344
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
1.55k
            } else {
884
1.55k
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
12.4k
                for (int m = 0; m < 7; m++) {
886
10.8k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
10.8k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
10.8k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
10.8k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
10.8k
                }
892
1.55k
            }
893
1.89k
        }
894
2.58k
    }
895
20.1k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
20.1k
    return 0;
898
20.1k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
746k
{
903
2.89M
    do {
904
2.89M
        refmvs_block *const r = *rr++ + bx4;
905
26.7M
        for (int x = 0; x < bw4; x++)
906
23.8M
            r[x] = *rmv;
907
2.89M
    } while (--bh4);
908
746k
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
39.2k
{
922
39.2k
    c->load_tmvs = load_tmvs_c;
923
39.2k
    c->save_tmvs = save_tmvs_c;
924
39.2k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
39.2k
}