Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
907k
{
47
907k
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
650k
    if (ref.ref[1] == -1) {
50
669k
        for (int n = 0; n < 2; n++) {
51
655k
            if (b->ref.ref[n] == ref.ref[0]) {
52
623k
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
612k
                                   gmv[0] : b->mv.mv[n];
54
55
623k
                *have_refmv_match = 1;
56
623k
                *have_newmv_match |= b->mf >> 1;
57
58
623k
                const int last = *cnt;
59
1.07M
                for (int m = 0; m < last; m++)
60
746k
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
292k
                        mvstack[m].weight += weight;
62
292k
                        return;
63
292k
                    }
64
65
330k
                if (last < 8) {
66
329k
                    mvstack[last].mv.mv[0] = cand_mv;
67
329k
                    mvstack[last].weight = weight;
68
329k
                    *cnt = last + 1;
69
329k
                }
70
330k
                return;
71
623k
            }
72
655k
        }
73
637k
    } else if (b->ref.pair == ref.pair) {
74
4.23k
        const refmvs_mvpair cand_mv = { .mv = {
75
4.23k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
4.23k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
4.23k
        }};
78
79
4.23k
        *have_refmv_match = 1;
80
4.23k
        *have_newmv_match |= b->mf >> 1;
81
82
4.23k
        const int last = *cnt;
83
5.68k
        for (int n = 0; n < last; n++)
84
2.75k
            if (mvstack[n].mv.n == cand_mv.n) {
85
1.30k
                mvstack[n].weight += weight;
86
1.30k
                return;
87
1.30k
            }
88
89
2.93k
        if (last < 8) {
90
2.93k
            mvstack[last].mv = cand_mv;
91
2.93k
            mvstack[last].weight = weight;
92
2.93k
            *cnt = last + 1;
93
2.93k
        }
94
2.93k
    }
95
650k
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
293k
{
103
293k
    const refmvs_block *cand_b = b;
104
293k
    const enum BlockSize first_cand_bs = cand_b->bs;
105
293k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
293k
    int cand_bw4 = first_cand_b_dim[0];
107
293k
    int len = imax(step, imin(bw4, cand_bw4));
108
109
293k
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
254k
        const int weight = bw4 == 1 ? 2 :
115
254k
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
254k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
254k
                              have_newmv_match, have_refmv_match);
118
254k
        return weight >> 1;
119
254k
    }
120
121
80.6k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
80.6k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
80.6k
                              have_newmv_match, have_refmv_match);
127
80.6k
        x += len;
128
80.6k
        if (x >= w4) return 1;
129
41.6k
        cand_b = &b[x];
130
41.6k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
41.6k
        assert(cand_bw4 < bw4);
132
41.6k
        len = imax(step, cand_bw4);
133
41.6k
    }
134
38.9k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
316k
{
142
316k
    const refmvs_block *cand_b = &b[0][bx4];
143
316k
    const enum BlockSize first_cand_bs = cand_b->bs;
144
316k
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
316k
    int cand_bh4 = first_cand_b_dim[1];
146
316k
    int len = imax(step, imin(bh4, cand_bh4));
147
148
316k
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
268k
        const int weight = bh4 == 1 ? 2 :
154
268k
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
268k
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
268k
                            have_newmv_match, have_refmv_match);
157
268k
        return weight >> 1;
158
268k
    }
159
160
95.1k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
95.1k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
95.1k
                              have_newmv_match, have_refmv_match);
166
95.1k
        y += len;
167
95.1k
        if (y >= h4) return 1;
168
47.5k
        cand_b = &b[y][bx4];
169
47.5k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
47.5k
        assert(cand_bh4 < bh4);
171
47.5k
        len = imax(step, cand_bh4);
172
47.5k
    }
173
47.6k
}
174
175
7.67k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
7.67k
    static const uint16_t div_mult[32] = {
177
7.67k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
7.67k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
7.67k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
7.67k
         682,   655,  630,  606,  585,  564,  546,  528
181
7.67k
    };
182
7.67k
    assert(den > 0 && den < 32);
183
7.67k
    assert(num > -32 && num < 32);
184
7.67k
    const int frac = num * div_mult[den];
185
7.67k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
7.67k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
7.67k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
7.67k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
7.67k
    };
191
7.67k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
11.9k
{
199
11.9k
    if (rb->mv.n == INVALID_MV) return;
200
201
4.09k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
4.09k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
4.09k
    const int last = *cnt;
205
4.09k
    if (ref.ref[1] == -1) {
206
3.46k
        if (globalmv_ctx)
207
974
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
3.89k
        for (int n = 0; n < last; n++)
210
3.05k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
2.62k
                mvstack[n].weight += 2;
212
2.62k
                return;
213
2.62k
            }
214
840
        if (last < 8) {
215
840
            mvstack[last].mv.mv[0] = mv;
216
840
            mvstack[last].weight = 2;
217
840
            *cnt = last + 1;
218
840
        }
219
840
    } else {
220
623
        refmvs_mvpair mvp = { .mv = {
221
623
            [0] = mv,
222
623
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
623
        }};
224
623
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
662
        for (int n = 0; n < last; n++)
227
401
            if (mvstack[n].mv.n == mvp.n) {
228
362
                mvstack[n].weight += 2;
229
362
                return;
230
362
            }
231
261
        if (last < 8) {
232
261
            mvstack[last].mv = mvp;
233
261
            mvstack[last].weight = 2;
234
261
            *cnt = last + 1;
235
261
        }
236
261
    }
237
4.09k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
4.02k
{
246
4.02k
    refmvs_candidate *const diff = &same[2];
247
4.02k
    int *const diff_count = &same_count[2];
248
249
10.5k
    for (int n = 0; n < 2; n++) {
250
8.00k
        const int cand_ref = cand_b->ref.ref[n];
251
252
8.00k
        if (cand_ref <= 0) break;
253
254
6.53k
        mv cand_mv = cand_b->mv.mv[n];
255
6.53k
        if (cand_ref == ref.ref[0]) {
256
2.46k
            if (same_count[0] < 2)
257
2.43k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
2.46k
            if (diff_count[1] < 2) {
259
2.25k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
85
                    cand_mv.y = -cand_mv.y;
261
85
                    cand_mv.x = -cand_mv.x;
262
85
                }
263
2.25k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
2.25k
            }
265
4.07k
        } else if (cand_ref == ref.ref[1]) {
266
2.51k
            if (same_count[1] < 2)
267
2.48k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
2.51k
            if (diff_count[0] < 2) {
269
2.26k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
94
                    cand_mv.y = -cand_mv.y;
271
94
                    cand_mv.x = -cand_mv.x;
272
94
                }
273
2.26k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
2.26k
            }
275
2.51k
        } else {
276
1.55k
            mv i_cand_mv = (union mv) {
277
1.55k
                .x = -cand_mv.x,
278
1.55k
                .y = -cand_mv.y
279
1.55k
            };
280
281
1.55k
            if (diff_count[0] < 2) {
282
1.33k
                diff[diff_count[0]++].mv.mv[0] =
283
1.33k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
1.31k
                    i_cand_mv : cand_mv;
285
1.33k
            }
286
287
1.55k
            if (diff_count[1] < 2) {
288
1.27k
                diff[diff_count[1]++].mv.mv[1] =
289
1.27k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
1.24k
                    i_cand_mv : cand_mv;
291
1.27k
            }
292
1.55k
        }
293
6.53k
    }
294
4.02k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
107k
{
300
216k
    for (int n = 0; n < 2; n++) {
301
214k
        const int cand_ref = cand_b->ref.ref[n];
302
303
214k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
108k
        mv cand_mv = cand_b->mv.mv[n];
311
108k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
132
            cand_mv.y = -cand_mv.y;
313
132
            cand_mv.x = -cand_mv.x;
314
132
        }
315
316
108k
        int m;
317
108k
        const int last = *cnt;
318
110k
        for (m = 0; m < last; m++)
319
106k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
103k
                break;
321
108k
        if (m == last) {
322
4.52k
            mvstack[m].mv.mv[0] = cand_mv;
323
4.52k
            mvstack[m].weight = 2; // "minimal"
324
4.52k
            *cnt = last + 1;
325
4.52k
        }
326
108k
    }
327
107k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
206k
{
355
206k
    const refmvs_frame *const rf = rt->rf;
356
206k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
206k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
206k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
206k
    mv gmv[2], tgmv[2];
360
361
206k
    *cnt = 0;
362
206k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
206k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
206k
    if (ref.ref[0] > 0) {
365
121k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
121k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
121k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
98.6k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
121k
    } else {
370
85.0k
        tgmv[0] = (mv) { .n = 0 };
371
85.0k
        gmv[0] = (mv) { .n = INVALID_MV };
372
85.0k
    }
373
206k
    if (ref.ref[1] > 0) {
374
13.8k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
13.8k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
13.8k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
8.16k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
13.8k
    }
379
380
    // top
381
206k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
206k
    unsigned max_rows = 0, n_rows = ~0;
383
206k
    const refmvs_block *b_top;
384
206k
    if (by4 > rt->tile_row.start) {
385
145k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
145k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
145k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
145k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
145k
                          &have_newmv, &have_row_mvs);
390
145k
    }
391
392
    // left
393
206k
    unsigned max_cols = 0, n_cols = ~0U;
394
206k
    refmvs_block *const *b_left;
395
206k
    if (bx4 > rt->tile_col.start) {
396
150k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
150k
        b_left = &rt->r[(by4 & 31) + 5];
398
150k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
150k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
150k
                          &have_newmv, &have_col_mvs);
401
150k
    }
402
403
    // top/right
404
206k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
90.1k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
77.5k
    {
407
77.5k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
77.5k
                              &have_newmv, &have_row_mvs);
409
77.5k
    }
410
411
206k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
206k
    const int nearest_cnt = *cnt;
413
409k
    for (int n = 0; n < nearest_cnt; n++)
414
202k
        mvstack[n].weight += 640;
415
416
    // temporal
417
206k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
206k
    if (rf->use_ref_frame_mvs) {
419
3.85k
        const ptrdiff_t stride = rf->rp_stride;
420
3.85k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
3.85k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
3.85k
        const refmvs_temporal_block *rb = rbi;
423
3.85k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
3.85k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
11.5k
        for (int y = 0; y < h8; y += step_v) {
426
19.4k
            for (int x = 0; x < w8; x+= step_h) {
427
11.7k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
11.7k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
11.7k
            }
430
7.70k
            rb += stride * step_v;
431
7.70k
        }
432
3.85k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
1.99k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
1.99k
            rb = &rbi[bh8 * stride];
435
1.99k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
1.99k
                                                    (by8 & ~7) + 8);
437
1.99k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
18
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
18
                                       NULL, NULL);
440
18
            }
441
1.99k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
190
                if (has_bottom) {
443
18
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
18
                                           NULL, NULL);
445
18
                }
446
190
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
103
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
103
                                           ref, NULL, NULL);
449
103
                }
450
190
            }
451
1.99k
        }
452
3.85k
    }
453
206k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
206k
    int have_dummy_newmv_match;
457
206k
    if ((n_rows | n_cols) != ~0U) {
458
130k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
130k
                              &have_dummy_newmv_match, &have_row_mvs);
460
130k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
620k
    for (int n = 2; n <= 3; n++) {
465
413k
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
148k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
148k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
148k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
148k
                               &have_dummy_newmv_match, &have_row_mvs);
470
148k
        }
471
472
413k
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
166k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
166k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
166k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
166k
                               &have_dummy_newmv_match, &have_col_mvs);
477
166k
        }
478
413k
    }
479
206k
    assert(*cnt <= 8);
480
481
206k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
206k
    int refmv_ctx, newmv_ctx;
485
206k
    switch (nearest_match) {
486
65.8k
    case 0:
487
65.8k
        refmv_ctx = imin(2, ref_match_count);
488
65.8k
        newmv_ctx = ref_match_count > 0;
489
65.8k
        break;
490
60.4k
    case 1:
491
60.4k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
60.4k
        newmv_ctx = 3 - have_newmv;
493
60.4k
        break;
494
80.4k
    case 2:
495
80.4k
        refmv_ctx = 5;
496
80.4k
        newmv_ctx = 5 - have_newmv;
497
80.4k
        break;
498
206k
    }
499
500
    // sorting (nearest, then "secondary")
501
206k
    int len = nearest_cnt;
502
371k
    while (len) {
503
164k
        int last = 0;
504
234k
        for (int n = 1; n < len; n++) {
505
70.3k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
47.7k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
26.6k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
26.6k
                last = n;
509
26.6k
            }
510
70.3k
        }
511
164k
        len = last;
512
164k
    }
513
206k
    len = *cnt;
514
297k
    while (len > nearest_cnt) {
515
90.8k
        int last = nearest_cnt;
516
156k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
66.0k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
21.1k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
21.1k
#undef EXCHANGE
520
21.1k
                last = n;
521
21.1k
            }
522
66.0k
        }
523
90.8k
        len = last;
524
90.8k
    }
525
526
206k
    if (ref.ref[1] > 0) {
527
13.8k
        if (*cnt < 2) {
528
13.2k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
13.2k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
13.2k
            const int sz4 = imin(w4, h4);
531
13.2k
            refmvs_candidate *const same = &mvstack[*cnt];
532
13.2k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
13.2k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
2.37k
                const refmvs_block *const cand_b = &b_top[x];
537
2.37k
                add_compound_extended_candidate(same, same_count, cand_b,
538
2.37k
                                                sign0, sign1, ref, rf->sign_bias);
539
2.37k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
2.37k
            }
541
542
            // non-self references in left
543
13.2k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
1.65k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
1.65k
                add_compound_extended_candidate(same, same_count, cand_b,
546
1.65k
                                                sign0, sign1, ref, rf->sign_bias);
547
1.65k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
1.65k
            }
549
550
13.2k
            refmvs_candidate *const diff = &same[2];
551
13.2k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
39.8k
            for (int n = 0; n < 2; n++) {
555
26.5k
                int m = same_count[n];
556
557
26.5k
                if (m >= 2) continue;
558
559
25.6k
                const int l = diff_count[n];
560
25.6k
                if (l) {
561
4.25k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
4.25k
                    if (++m == 2) continue;
563
1.37k
                    if (l == 2) {
564
918
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
918
                        continue;
566
918
                    }
567
1.37k
                }
568
42.9k
                do {
569
42.9k
                    same[m].mv.mv[n] = tgmv[n];
570
42.9k
                } while (++m < 2);
571
21.8k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
13.2k
            int n = *cnt;
576
13.2k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
1.45k
                mvstack[1].mv = mvstack[2].mv;
578
24.8k
            do {
579
24.8k
                mvstack[n].weight = 2;
580
24.8k
            } while (++n < 2);
581
13.2k
            *cnt = 2;
582
13.2k
        }
583
584
        // clamping
585
13.8k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
13.8k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
13.8k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
13.8k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
13.8k
        const int n_refmvs = *cnt;
591
13.8k
        int n = 0;
592
28.0k
        do {
593
28.0k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
28.0k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
28.0k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
28.0k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
28.0k
        } while (++n < n_refmvs);
598
599
13.8k
        switch (refmv_ctx >> 1) {
600
11.8k
        case 0:
601
11.8k
            *ctx = imin(newmv_ctx, 1);
602
11.8k
            break;
603
1.54k
        case 1:
604
1.54k
            *ctx = 1 + imin(newmv_ctx, 3);
605
1.54k
            break;
606
457
        case 2:
607
457
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
457
            break;
609
13.8k
        }
610
611
13.8k
        return;
612
192k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
94.1k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
94.1k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
100k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
51.4k
            const refmvs_block *const cand_b = &b_top[x];
619
51.4k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
51.4k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
51.4k
        }
622
623
        // non-self references in left
624
109k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
56.2k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
56.2k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
56.2k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
56.2k
        }
629
94.1k
    }
630
192k
    assert(*cnt <= 8);
631
632
    // clamping
633
192k
    int n_refmvs = *cnt;
634
192k
    if (n_refmvs) {
635
151k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
151k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
151k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
151k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
151k
        int n = 0;
641
334k
        do {
642
334k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
334k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
334k
        } while (++n < n_refmvs);
645
151k
    }
646
647
351k
    for (int n = *cnt; n < 2; n++)
648
158k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
192k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
192k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
109k
{
658
109k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
109k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
109k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
109k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
56.0k
        35 * 2 * rf->n_blocks : 0;
663
109k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
109k
    const int sbsz = rf->sbsz;
665
109k
    const int off = (sbsz * sby) & 16;
666
2.49M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
2.38M
        rt->r[off + 5 + i] = r;
668
109k
    rt->r[off + 0] = r;
669
109k
    r += r_stride;
670
109k
    rt->r[off + 1] = NULL;
671
109k
    rt->r[off + 2] = r;
672
109k
    r += r_stride;
673
109k
    rt->r[off + 3] = NULL;
674
109k
    rt->r[off + 4] = r;
675
109k
    if (sby & 1) {
676
36.3k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
12.1k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
12.1k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
12.1k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
12.1k
#undef EXCHANGE
681
12.1k
    }
682
683
109k
    rt->rf = rf;
684
109k
    rt->tile_row.start = tile_row_start4;
685
109k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
109k
    rt->tile_col.start = tile_col_start4;
687
109k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
109k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
3.62k
{
694
3.62k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
3.62k
    assert(row_start8 >= 0);
696
3.62k
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
3.62k
    row_end8 = imin(row_end8, rf->ih8);
698
3.62k
    const int col_start8i = imax(col_start8 - 8, 0);
699
3.62k
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
3.62k
    const ptrdiff_t stride = rf->rp_stride;
702
3.62k
    refmvs_temporal_block *rp_proj =
703
3.62k
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
19.4k
    for (int y = row_start8; y < row_end8; y++) {
705
37.8k
        for (int x = col_start8; x < col_end8; x++)
706
22.1k
            rp_proj[x].mv.n = INVALID_MV;
707
15.7k
        rp_proj += stride;
708
15.7k
    }
709
710
3.62k
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
7.80k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
4.17k
        const int ref2cur = rf->mfmv_ref2cur[n];
713
4.17k
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
3.00k
        const int ref = rf->mfmv_ref[n];
716
3.00k
        const int ref_sign = ref - 4;
717
3.00k
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
18.5k
        for (int y = row_start8; y < row_end8; y++) {
719
15.5k
            const int y_sb_align = y & ~7;
720
15.5k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
15.5k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
33.5k
            for (int x = col_start8i; x < col_end8i; x++) {
723
17.9k
                const refmvs_temporal_block *rb = &r[x];
724
17.9k
                const int b_ref = rb->ref;
725
17.9k
                if (!b_ref) continue;
726
5.84k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
5.84k
                if (!ref2ref) continue;
728
2.96k
                const mv b_mv = rb->mv;
729
2.96k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
2.96k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
2.96k
                                           offset.x ^ ref_sign);
732
2.96k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
2.96k
                                                 offset.y ^ ref_sign);
734
2.96k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
2.79k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
4.69k
                    for (;;) {
737
4.69k
                        const int x_sb_align = x & ~7;
738
4.69k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
4.63k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
4.55k
                        {
741
4.55k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
4.55k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
4.55k
                        }
744
4.69k
                        if (++x >= col_end8i) break;
745
2.71k
                        rb++;
746
2.71k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
1.90k
                        pos_x++;
748
1.90k
                    }
749
2.79k
                } else {
750
214
                    for (;;) {
751
214
                        if (++x >= col_end8i) break;
752
158
                        rb++;
753
158
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
158
                    }
755
172
                }
756
2.96k
                x--;
757
2.96k
            }
758
15.5k
            r += stride;
759
15.5k
        }
760
3.00k
    }
761
3.62k
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
10.9k
{
769
80.0k
    for (int y = row_start8; y < row_end8; y++) {
770
69.1k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
138k
        for (int x = col_start8; x < col_end8;) {
773
69.5k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
69.5k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
69.5k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
1.53k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
1.53k
            {
779
13.1k
                for (int n = 0; n < bw8; n++, x++)
780
11.6k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
11.6k
                                                      .ref = cand_b->ref.ref[1] };
782
68.0k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
28.7k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
28.7k
            {
785
236k
                for (int n = 0; n < bw8; n++, x++)
786
207k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
207k
                                                      .ref = cand_b->ref.ref[0] };
788
39.2k
            } else {
789
325k
                for (int n = 0; n < bw8; n++, x++) {
790
286k
                    rp[x].mv.n = 0;
791
286k
                    rp[x].ref = 0; // "invalid"
792
286k
                }
793
39.2k
            }
794
69.5k
        }
795
69.1k
        rp += stride;
796
69.1k
    }
797
10.9k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
50.1k
{
808
50.1k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
50.1k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
50.1k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
50.1k
    rf->sbsz = 16 << seq_hdr->sb128;
813
50.1k
    rf->frm_hdr = frm_hdr;
814
50.1k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
50.1k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
50.1k
    rf->iw4 = rf->iw8 << 1;
817
50.1k
    rf->ih4 = rf->ih8 << 1;
818
50.1k
    rf->rp = rp;
819
50.1k
    rf->rp_stride = rp_stride;
820
50.1k
    rf->n_tile_threads = n_tile_threads;
821
50.1k
    rf->n_frame_threads = n_frame_threads;
822
823
50.1k
    if (n_blocks != rf->n_blocks) {
824
14.3k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
14.3k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
14.3k
        dav1d_free_aligned(rf->r);
829
14.3k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
14.3k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
14.3k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
14.3k
        rf->n_blocks = n_blocks;
837
14.3k
    }
838
839
50.1k
    const int poc = frm_hdr->frame_offset;
840
401k
    for (int i = 0; i < 7; i++) {
841
351k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
351k
                                          ref_poc[i], poc);
843
351k
        rf->sign_bias[i] = poc_diff > 0;
844
351k
        rf->mfmv_sign[i] = poc_diff < 0;
845
351k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
351k
                                            poc, ref_poc[i]), -31, 31);
847
351k
    }
848
849
    // temporal MV setup
850
50.1k
    rf->n_mfmvs = 0;
851
50.1k
    rf->rp_ref = rp_ref;
852
50.1k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
3.56k
        int total = 2;
854
3.56k
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
1.18k
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
1.18k
            total = 3;
857
1.18k
        }
858
3.56k
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
2.62k
                                      frm_hdr->frame_offset) > 0)
860
257
        {
861
257
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
257
        }
863
3.56k
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
2.35k
                                      frm_hdr->frame_offset) > 0)
865
123
        {
866
123
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
123
        }
868
3.56k
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
2.37k
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
2.37k
                         frm_hdr->frame_offset) > 0)
871
1.10k
        {
872
1.10k
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
1.10k
        }
874
3.56k
        if (rf->n_mfmvs < total && rp_ref[1])
875
1.54k
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
7.77k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
4.21k
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
4.21k
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
4.21k
                                           rpoc, frm_hdr->frame_offset);
881
4.21k
            if (abs(diff1) > 31) {
882
1.12k
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
3.08k
            } else {
884
3.08k
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
24.6k
                for (int m = 0; m < 7; m++) {
886
21.5k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
21.5k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
21.5k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
21.5k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
21.5k
                }
892
3.08k
            }
893
4.21k
        }
894
3.56k
    }
895
50.1k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
50.1k
    return 0;
898
50.1k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
429k
{
903
2.28M
    do {
904
2.28M
        refmvs_block *const r = *rr++ + bx4;
905
26.1M
        for (int x = 0; x < bw4; x++)
906
23.8M
            r[x] = *rmv;
907
2.28M
    } while (--bh4);
908
429k
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
6.93k
{
922
6.93k
    c->load_tmvs = load_tmvs_c;
923
6.93k
    c->save_tmvs = save_tmvs_c;
924
6.93k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
6.93k
}