Coverage Report

Created: 2026-06-15 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
1.37M
{
47
1.37M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
1.22M
    if (ref.ref[1] == -1) {
50
1.36M
        for (int n = 0; n < 2; n++) {
51
1.23M
            if (b->ref.ref[n] == ref.ref[0]) {
52
946k
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
926k
                                   gmv[0] : b->mv.mv[n];
54
55
946k
                *have_refmv_match = 1;
56
946k
                *have_newmv_match |= b->mf >> 1;
57
58
946k
                const int last = *cnt;
59
1.83M
                for (int m = 0; m < last; m++)
60
1.08M
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
192k
                        mvstack[m].weight += weight;
62
192k
                        return;
63
192k
                    }
64
65
754k
                if (last < 8) {
66
753k
                    mvstack[last].mv.mv[0] = cand_mv;
67
753k
                    mvstack[last].weight = weight;
68
753k
                    *cnt = last + 1;
69
753k
                }
70
754k
                return;
71
946k
            }
72
1.23M
        }
73
1.07M
    } else if (b->ref.pair == ref.pair) {
74
47.6k
        const refmvs_mvpair cand_mv = { .mv = {
75
47.6k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
47.6k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
47.6k
        }};
78
79
47.6k
        *have_refmv_match = 1;
80
47.6k
        *have_newmv_match |= b->mf >> 1;
81
82
47.6k
        const int last = *cnt;
83
70.5k
        for (int n = 0; n < last; n++)
84
40.2k
            if (mvstack[n].mv.n == cand_mv.n) {
85
17.2k
                mvstack[n].weight += weight;
86
17.2k
                return;
87
17.2k
            }
88
89
30.3k
        if (last < 8) {
90
30.3k
            mvstack[last].mv = cand_mv;
91
30.3k
            mvstack[last].weight = weight;
92
30.3k
            *cnt = last + 1;
93
30.3k
        }
94
30.3k
    }
95
1.22M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
355k
{
103
355k
    const refmvs_block *cand_b = b;
104
355k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
355k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
355k
    int cand_bw4 = first_cand_b_dim[0];
107
355k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
355k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
314k
        const int weight = bw4 == 1 ? 2 :
115
314k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
314k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
314k
                              have_newmv_match, have_refmv_match);
118
314k
        return weight >> 1;
119
314k
    }
120
121
75.2k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
75.2k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
75.2k
                              have_newmv_match, have_refmv_match);
127
75.2k
        x += len;
128
75.2k
        if (x >= w4) return 1;
129
34.1k
        cand_b = &b[x];
130
34.1k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
34.1k
        assert(cand_bw4 < bw4);
132
34.1k
        len = imax(step, cand_bw4);
133
34.1k
    }
134
41.2k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
661k
{
142
661k
    const refmvs_block *cand_b = &b[0][bx4];
143
661k
    const enum BlockSize first_cand_bs = cand_b->bs;
144
661k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
661k
    int cand_bh4 = first_cand_b_dim[1];
146
661k
    int len = imax(step, imin(bh4, cand_bh4));
147
148
661k
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
613k
        const int weight = bh4 == 1 ? 2 :
154
613k
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
613k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
613k
                            have_newmv_match, have_refmv_match);
157
613k
        return weight >> 1;
158
613k
    }
159
160
84.1k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
84.1k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
84.1k
                              have_newmv_match, have_refmv_match);
166
84.1k
        y += len;
167
84.1k
        if (y >= h4) return 1;
168
37.1k
        cand_b = &b[y][bx4];
169
37.1k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
37.1k
        assert(cand_bh4 < bh4);
171
37.1k
        len = imax(step, cand_bh4);
172
37.1k
    }
173
47.9k
}
174
175
26.8k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
26.8k
    static const uint16_t div_mult[32] = {
177
26.8k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
26.8k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
26.8k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
26.8k
         682,   655,  630,  606,  585,  564,  546,  528
181
26.8k
    };
182
26.8k
    assert(den > 0 && den < 32);
183
26.8k
    assert(num > -32 && num < 32);
184
26.8k
    const int frac = num * div_mult[den];
185
26.8k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
26.8k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
26.8k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
26.8k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
26.8k
    };
191
26.8k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
22.3k
{
199
22.3k
    if (rb->mv.n == INVALID_MV) return;
200
201
15.6k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
15.6k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
15.6k
    const int last = *cnt;
205
15.6k
    if (ref.ref[1] == -1) {
206
9.38k
        if (globalmv_ctx)
207
2.59k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
15.4k
        for (int n = 0; n < last; n++)
210
13.6k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
7.52k
                mvstack[n].weight += 2;
212
7.52k
                return;
213
7.52k
            }
214
1.86k
        if (last < 8) {
215
1.85k
            mvstack[last].mv.mv[0] = mv;
216
1.85k
            mvstack[last].weight = 2;
217
1.85k
            *cnt = last + 1;
218
1.85k
        }
219
6.29k
    } else {
220
6.29k
        refmvs_mvpair mvp = { .mv = {
221
6.29k
            [0] = mv,
222
6.29k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
6.29k
        }};
224
6.29k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
8.73k
        for (int n = 0; n < last; n++)
227
7.47k
            if (mvstack[n].mv.n == mvp.n) {
228
5.03k
                mvstack[n].weight += 2;
229
5.03k
                return;
230
5.03k
            }
231
1.26k
        if (last < 8) {
232
1.26k
            mvstack[last].mv = mvp;
233
1.26k
            mvstack[last].weight = 2;
234
1.26k
            *cnt = last + 1;
235
1.26k
        }
236
1.26k
    }
237
15.6k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
38.7k
{
246
38.7k
    refmvs_candidate *const diff = &same[2];
247
38.7k
    int *const diff_count = &same_count[2];
248
249
100k
    for (int n = 0; n < 2; n++) {
250
76.3k
        const int cand_ref = cand_b->ref.ref[n];
251
252
76.3k
        if (cand_ref <= 0) break;
253
254
61.6k
        mv cand_mv = cand_b->mv.mv[n];
255
61.6k
        if (cand_ref == ref.ref[0]) {
256
21.3k
            if (same_count[0] < 2)
257
20.6k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
21.3k
            if (diff_count[1] < 2) {
259
18.4k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
1.09k
                    cand_mv.y = -cand_mv.y;
261
1.09k
                    cand_mv.x = -cand_mv.x;
262
1.09k
                }
263
18.4k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
18.4k
            }
265
40.3k
        } else if (cand_ref == ref.ref[1]) {
266
22.3k
            if (same_count[1] < 2)
267
21.8k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
22.3k
            if (diff_count[0] < 2) {
269
18.5k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
1.10k
                    cand_mv.y = -cand_mv.y;
271
1.10k
                    cand_mv.x = -cand_mv.x;
272
1.10k
                }
273
18.5k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
18.5k
            }
275
22.3k
        } else {
276
17.9k
            mv i_cand_mv = (union mv) {
277
17.9k
                .x = -cand_mv.x,
278
17.9k
                .y = -cand_mv.y
279
17.9k
            };
280
281
17.9k
            if (diff_count[0] < 2) {
282
14.4k
                diff[diff_count[0]++].mv.mv[0] =
283
14.4k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
14.0k
                    i_cand_mv : cand_mv;
285
14.4k
            }
286
287
17.9k
            if (diff_count[1] < 2) {
288
13.8k
                diff[diff_count[1]++].mv.mv[1] =
289
13.8k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
13.3k
                    i_cand_mv : cand_mv;
291
13.8k
            }
292
17.9k
        }
293
61.6k
    }
294
38.7k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
104k
{
300
211k
    for (int n = 0; n < 2; n++) {
301
204k
        const int cand_ref = cand_b->ref.ref[n];
302
303
204k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
106k
        mv cand_mv = cand_b->mv.mv[n];
311
106k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
730
            cand_mv.y = -cand_mv.y;
313
730
            cand_mv.x = -cand_mv.x;
314
730
        }
315
316
106k
        int m;
317
106k
        const int last = *cnt;
318
123k
        for (m = 0; m < last; m++)
319
95.9k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
79.5k
                break;
321
106k
        if (m == last) {
322
27.1k
            mvstack[m].mv.mv[0] = cand_mv;
323
27.1k
            mvstack[m].weight = 2; // "minimal"
324
27.1k
            *cnt = last + 1;
325
27.1k
        }
326
106k
    }
327
104k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
376k
{
355
376k
    const refmvs_frame *const rf = rt->rf;
356
376k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
376k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
376k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
376k
    mv gmv[2], tgmv[2];
360
361
376k
    *cnt = 0;
362
376k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
376k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
376k
    if (ref.ref[0] > 0) {
365
154k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
154k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
154k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
109k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
222k
    } else {
370
222k
        tgmv[0] = (mv) { .n = 0 };
371
222k
        gmv[0] = (mv) { .n = INVALID_MV };
372
222k
    }
373
376k
    if (ref.ref[1] > 0) {
374
32.8k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
32.8k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
32.8k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
25.4k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
32.8k
    }
379
380
    // top
381
376k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
376k
    unsigned max_rows = 0, n_rows = ~0;
383
376k
    const refmvs_block *b_top;
384
376k
    if (by4 > rt->tile_row.start) {
385
219k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
219k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
219k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
219k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
219k
                          &have_newmv, &have_row_mvs);
390
219k
    }
391
392
    // left
393
376k
    unsigned max_cols = 0, n_cols = ~0U;
394
376k
    refmvs_block *const *b_left;
395
376k
    if (bx4 > rt->tile_col.start) {
396
330k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
330k
        b_left = &rt->r[(by4 & 31) + 5];
398
330k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
330k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
330k
                          &have_newmv, &have_col_mvs);
401
330k
    }
402
403
    // top/right
404
376k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
126k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
103k
    {
407
103k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
103k
                              &have_newmv, &have_row_mvs);
409
103k
    }
410
411
376k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
376k
    const int nearest_cnt = *cnt;
413
845k
    for (int n = 0; n < nearest_cnt; n++)
414
468k
        mvstack[n].weight += 640;
415
416
    // temporal
417
376k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
376k
    if (rf->use_ref_frame_mvs) {
419
6.54k
        const ptrdiff_t stride = rf->rp_stride;
420
6.54k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
6.54k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
6.54k
        const refmvs_temporal_block *rb = rbi;
423
6.54k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
6.54k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
18.0k
        for (int y = 0; y < h8; y += step_v) {
426
28.8k
            for (int x = 0; x < w8; x+= step_h) {
427
17.4k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
17.4k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
17.4k
            }
430
11.4k
            rb += stride * step_v;
431
11.4k
        }
432
6.54k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
3.79k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
3.79k
            rb = &rbi[bh8 * stride];
435
3.79k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
3.79k
                                                    (by8 & ~7) + 8);
437
3.79k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
1.51k
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
1.51k
                                       NULL, NULL);
440
1.51k
            }
441
3.79k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
2.08k
                if (has_bottom) {
443
1.42k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
1.42k
                                           NULL, NULL);
445
1.42k
                }
446
2.08k
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
2.01k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
2.01k
                                           ref, NULL, NULL);
449
2.01k
                }
450
2.08k
            }
451
3.79k
        }
452
6.54k
    }
453
376k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
376k
    int have_dummy_newmv_match;
457
376k
    if ((n_rows | n_cols) != ~0U) {
458
185k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
185k
                              &have_dummy_newmv_match, &have_row_mvs);
460
185k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
1.12M
    for (int n = 2; n <= 3; n++) {
465
751k
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
136k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
136k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
136k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
136k
                               &have_dummy_newmv_match, &have_row_mvs);
470
136k
        }
471
472
751k
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
331k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
331k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
331k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
331k
                               &have_dummy_newmv_match, &have_col_mvs);
477
331k
        }
478
751k
    }
479
376k
    assert(*cnt <= 8);
480
481
376k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
376k
    int refmv_ctx, newmv_ctx;
485
376k
    switch (nearest_match) {
486
62.2k
    case 0:
487
62.2k
        refmv_ctx = imin(2, ref_match_count);
488
62.2k
        newmv_ctx = ref_match_count > 0;
489
62.2k
        break;
490
188k
    case 1:
491
188k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
188k
        newmv_ctx = 3 - have_newmv;
493
188k
        break;
494
125k
    case 2:
495
125k
        refmv_ctx = 5;
496
125k
        newmv_ctx = 5 - have_newmv;
497
125k
        break;
498
376k
    }
499
500
    // sorting (nearest, then "secondary")
501
376k
    int len = nearest_cnt;
502
780k
    while (len) {
503
404k
        int last = 0;
504
600k
        for (int n = 1; n < len; n++) {
505
196k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
98.8k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
93.3k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
93.3k
                last = n;
509
93.3k
            }
510
196k
        }
511
404k
        len = last;
512
404k
    }
513
376k
    len = *cnt;
514
606k
    while (len > nearest_cnt) {
515
230k
        int last = nearest_cnt;
516
327k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
97.3k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
5.42k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
5.42k
#undef EXCHANGE
520
5.42k
                last = n;
521
5.42k
            }
522
97.3k
        }
523
230k
        len = last;
524
230k
    }
525
526
376k
    if (ref.ref[1] > 0) {
527
32.8k
        if (*cnt < 2) {
528
25.3k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
25.3k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
25.3k
            const int sz4 = imin(w4, h4);
531
25.3k
            refmvs_candidate *const same = &mvstack[*cnt];
532
25.3k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
37.0k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
19.4k
                const refmvs_block *const cand_b = &b_top[x];
537
19.4k
                add_compound_extended_candidate(same, same_count, cand_b,
538
19.4k
                                                sign0, sign1, ref, rf->sign_bias);
539
19.4k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
19.4k
            }
541
542
            // non-self references in left
543
37.0k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
19.2k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
19.2k
                add_compound_extended_candidate(same, same_count, cand_b,
546
19.2k
                                                sign0, sign1, ref, rf->sign_bias);
547
19.2k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
19.2k
            }
549
550
25.3k
            refmvs_candidate *const diff = &same[2];
551
25.3k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
75.9k
            for (int n = 0; n < 2; n++) {
555
50.6k
                int m = same_count[n];
556
557
50.6k
                if (m >= 2) continue;
558
559
40.5k
                const int l = diff_count[n];
560
40.5k
                if (l) {
561
31.9k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
31.9k
                    if (++m == 2) continue;
563
11.4k
                    if (l == 2) {
564
8.32k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
8.32k
                        continue;
566
8.32k
                    }
567
11.4k
                }
568
18.4k
                do {
569
18.4k
                    same[m].mv.mv[n] = tgmv[n];
570
18.4k
                } while (++m < 2);
571
11.7k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
25.3k
            int n = *cnt;
576
25.3k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
8.86k
                mvstack[1].mv = mvstack[2].mv;
578
38.7k
            do {
579
38.7k
                mvstack[n].weight = 2;
580
38.7k
            } while (++n < 2);
581
25.3k
            *cnt = 2;
582
25.3k
        }
583
584
        // clamping
585
32.8k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
32.8k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
32.8k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
32.8k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
32.8k
        const int n_refmvs = *cnt;
591
32.8k
        int n = 0;
592
70.2k
        do {
593
70.2k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
70.2k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
70.2k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
70.2k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
70.2k
        } while (++n < n_refmvs);
598
599
32.8k
        switch (refmv_ctx >> 1) {
600
15.1k
        case 0:
601
15.1k
            *ctx = imin(newmv_ctx, 1);
602
15.1k
            break;
603
11.6k
        case 1:
604
11.6k
            *ctx = 1 + imin(newmv_ctx, 3);
605
11.6k
            break;
606
6.02k
        case 2:
607
6.02k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
6.02k
            break;
609
32.8k
        }
610
611
32.8k
        return;
612
343k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
82.0k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
82.0k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
107k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
54.9k
            const refmvs_block *const cand_b = &b_top[x];
619
54.9k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
54.9k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
54.9k
        }
622
623
        // non-self references in left
624
102k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
49.4k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
49.4k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
49.4k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
49.4k
        }
629
82.0k
    }
630
343k
    assert(*cnt <= 8);
631
632
    // clamping
633
343k
    int n_refmvs = *cnt;
634
343k
    if (n_refmvs) {
635
316k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
316k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
316k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
316k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
316k
        int n = 0;
641
785k
        do {
642
785k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
785k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
785k
        } while (++n < n_refmvs);
645
316k
    }
646
647
471k
    for (int n = *cnt; n < 2; n++)
648
128k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
343k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
343k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
62.0k
{
658
62.0k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
62.0k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
62.0k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
62.0k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
38.6k
        35 * 2 * rf->n_blocks : 0;
663
62.0k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
62.0k
    const int sbsz = rf->sbsz;
665
62.0k
    const int off = (sbsz * sby) & 16;
666
1.36M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
1.29M
        rt->r[off + 5 + i] = r;
668
62.0k
    rt->r[off + 0] = r;
669
62.0k
    r += r_stride;
670
62.0k
    rt->r[off + 1] = NULL;
671
62.0k
    rt->r[off + 2] = r;
672
62.0k
    r += r_stride;
673
62.0k
    rt->r[off + 3] = NULL;
674
62.0k
    rt->r[off + 4] = r;
675
62.0k
    if (sby & 1) {
676
55.2k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
18.4k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
18.4k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
18.4k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
18.4k
#undef EXCHANGE
681
18.4k
    }
682
683
62.0k
    rt->rf = rf;
684
62.0k
    rt->tile_row.start = tile_row_start4;
685
62.0k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
62.0k
    rt->tile_col.start = tile_col_start4;
687
62.0k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
62.0k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
2.74k
{
694
2.74k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
2.74k
    assert(row_start8 >= 0);
696
2.74k
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
2.74k
    row_end8 = imin(row_end8, rf->ih8);
698
2.74k
    const int col_start8i = imax(col_start8 - 8, 0);
699
2.74k
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
2.74k
    const ptrdiff_t stride = rf->rp_stride;
702
2.74k
    refmvs_temporal_block *rp_proj =
703
2.74k
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
18.0k
    for (int y = row_start8; y < row_end8; y++) {
705
53.8k
        for (int x = col_start8; x < col_end8; x++)
706
38.5k
            rp_proj[x].mv.n = INVALID_MV;
707
15.3k
        rp_proj += stride;
708
15.3k
    }
709
710
2.74k
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
4.70k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
1.95k
        const int ref2cur = rf->mfmv_ref2cur[n];
713
1.95k
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
1.60k
        const int ref = rf->mfmv_ref[n];
716
1.60k
        const int ref_sign = ref - 4;
717
1.60k
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
11.1k
        for (int y = row_start8; y < row_end8; y++) {
719
9.54k
            const int y_sb_align = y & ~7;
720
9.54k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
9.54k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
26.3k
            for (int x = col_start8i; x < col_end8i; x++) {
723
16.8k
                const refmvs_temporal_block *rb = &r[x];
724
16.8k
                const int b_ref = rb->ref;
725
16.8k
                if (!b_ref) continue;
726
9.52k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
9.52k
                if (!ref2ref) continue;
728
4.86k
                const mv b_mv = rb->mv;
729
4.86k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
4.86k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
4.86k
                                           offset.x ^ ref_sign);
732
4.86k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
4.86k
                                                 offset.y ^ ref_sign);
734
4.86k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
4.39k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
23.9k
                    for (;;) {
737
23.9k
                        const int x_sb_align = x & ~7;
738
23.9k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
23.7k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
23.4k
                        {
741
23.4k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
23.4k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
23.4k
                        }
744
23.9k
                        if (++x >= col_end8i) break;
745
20.7k
                        rb++;
746
20.7k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
19.5k
                        pos_x++;
748
19.5k
                    }
749
4.39k
                } else {
750
1.16k
                    for (;;) {
751
1.16k
                        if (++x >= col_end8i) break;
752
852
                        rb++;
753
852
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
852
                    }
755
471
                }
756
4.86k
                x--;
757
4.86k
            }
758
9.54k
            r += stride;
759
9.54k
        }
760
1.60k
    }
761
2.74k
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
9.15k
{
769
63.4k
    for (int y = row_start8; y < row_end8; y++) {
770
54.2k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
127k
        for (int x = col_start8; x < col_end8;) {
773
73.1k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
73.1k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
73.1k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
7.88k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
7.03k
            {
779
19.3k
                for (int n = 0; n < bw8; n++, x++)
780
12.3k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
12.3k
                                                      .ref = cand_b->ref.ref[1] };
782
66.1k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
26.7k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
25.5k
            {
785
96.2k
                for (int n = 0; n < bw8; n++, x++)
786
70.7k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
70.7k
                                                      .ref = cand_b->ref.ref[0] };
788
40.6k
            } else {
789
136k
                for (int n = 0; n < bw8; n++, x++) {
790
95.9k
                    rp[x].mv.n = 0;
791
95.9k
                    rp[x].ref = 0; // "invalid"
792
95.9k
                }
793
40.6k
            }
794
73.1k
        }
795
54.2k
        rp += stride;
796
54.2k
    }
797
9.15k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
19.4k
{
808
19.4k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
19.4k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
19.4k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
19.4k
    rf->sbsz = 16 << seq_hdr->sb128;
813
19.4k
    rf->frm_hdr = frm_hdr;
814
19.4k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
19.4k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
19.4k
    rf->iw4 = rf->iw8 << 1;
817
19.4k
    rf->ih4 = rf->ih8 << 1;
818
19.4k
    rf->rp = rp;
819
19.4k
    rf->rp_stride = rp_stride;
820
19.4k
    rf->n_tile_threads = n_tile_threads;
821
19.4k
    rf->n_frame_threads = n_frame_threads;
822
823
19.4k
    if (n_blocks != rf->n_blocks) {
824
19.3k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
19.3k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
19.3k
        dav1d_free_aligned(rf->r);
829
19.3k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
19.3k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
19.3k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
19.3k
        rf->n_blocks = n_blocks;
837
19.3k
    }
838
839
19.4k
    const int poc = frm_hdr->frame_offset;
840
155k
    for (int i = 0; i < 7; i++) {
841
136k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
136k
                                          ref_poc[i], poc);
843
136k
        rf->sign_bias[i] = poc_diff > 0;
844
136k
        rf->mfmv_sign[i] = poc_diff < 0;
845
136k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
136k
                                            poc, ref_poc[i]), -31, 31);
847
136k
    }
848
849
    // temporal MV setup
850
19.4k
    rf->n_mfmvs = 0;
851
19.4k
    rf->rp_ref = rp_ref;
852
19.4k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
2.28k
        int total = 2;
854
2.28k
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
291
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
291
            total = 3;
857
291
        }
858
2.28k
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
468
                                      frm_hdr->frame_offset) > 0)
860
287
        {
861
287
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
287
        }
863
2.28k
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
478
                                      frm_hdr->frame_offset) > 0)
865
298
        {
866
298
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
298
        }
868
2.28k
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
293
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
293
                         frm_hdr->frame_offset) > 0)
871
131
        {
872
131
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
131
        }
874
2.28k
        if (rf->n_mfmvs < total && rp_ref[1])
875
578
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
3.87k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
1.58k
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
1.58k
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
1.58k
                                           rpoc, frm_hdr->frame_offset);
881
1.58k
            if (abs(diff1) > 31) {
882
271
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
1.31k
            } else {
884
1.31k
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
10.5k
                for (int m = 0; m < 7; m++) {
886
9.19k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
9.19k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
9.19k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
9.19k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
9.19k
                }
892
1.31k
            }
893
1.58k
        }
894
2.28k
    }
895
19.4k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
19.4k
    return 0;
898
19.4k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
751k
{
903
2.89M
    do {
904
2.89M
        refmvs_block *const r = *rr++ + bx4;
905
26.8M
        for (int x = 0; x < bw4; x++)
906
23.9M
            r[x] = *rmv;
907
2.89M
    } while (--bh4);
908
751k
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
39.2k
{
922
39.2k
    c->load_tmvs = load_tmvs_c;
923
39.2k
    c->save_tmvs = save_tmvs_c;
924
39.2k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
39.2k
}