Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
4.04M
{
47
4.04M
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
3.45M
    if (ref.ref[1] == -1) {
50
3.83M
        for (int n = 0; n < 2; n++) {
51
3.49M
            if (b->ref.ref[n] == ref.ref[0]) {
52
2.77M
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
2.70M
                                   gmv[0] : b->mv.mv[n];
54
55
2.77M
                *have_refmv_match = 1;
56
2.77M
                *have_newmv_match |= b->mf >> 1;
57
58
2.77M
                const int last = *cnt;
59
5.30M
                for (int m = 0; m < last; m++)
60
3.37M
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
852k
                        mvstack[m].weight += weight;
62
852k
                        return;
63
852k
                    }
64
65
1.92M
                if (last < 8) {
66
1.92M
                    mvstack[last].mv.mv[0] = cand_mv;
67
1.92M
                    mvstack[last].weight = weight;
68
1.92M
                    *cnt = last + 1;
69
1.92M
                }
70
1.92M
                return;
71
2.77M
            }
72
3.49M
        }
73
3.11M
    } else if (b->ref.pair == ref.pair) {
74
113k
        const refmvs_mvpair cand_mv = { .mv = {
75
113k
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
113k
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
113k
        }};
78
79
113k
        *have_refmv_match = 1;
80
113k
        *have_newmv_match |= b->mf >> 1;
81
82
113k
        const int last = *cnt;
83
171k
        for (int n = 0; n < last; n++)
84
97.5k
            if (mvstack[n].mv.n == cand_mv.n) {
85
40.0k
                mvstack[n].weight += weight;
86
40.0k
                return;
87
40.0k
            }
88
89
73.9k
        if (last < 8) {
90
73.8k
            mvstack[last].mv = cand_mv;
91
73.8k
            mvstack[last].weight = weight;
92
73.8k
            *cnt = last + 1;
93
73.8k
        }
94
73.9k
    }
95
3.45M
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
1.18M
{
103
1.18M
    const refmvs_block *cand_b = b;
104
1.18M
    const enum BlockSize first_cand_bs = cand_b->bs;
105
1.18M
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
1.18M
    int cand_bw4 = first_cand_b_dim[0];
107
1.18M
    int len = imax(step, imin(bw4, cand_bw4));
108
109
1.18M
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
1.04M
        const int weight = bw4 == 1 ? 2 :
115
1.04M
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
1.04M
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
1.04M
                              have_newmv_match, have_refmv_match);
118
1.04M
        return weight >> 1;
119
1.04M
    }
120
121
269k
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
269k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
269k
                              have_newmv_match, have_refmv_match);
127
269k
        x += len;
128
269k
        if (x >= w4) return 1;
129
128k
        cand_b = &b[x];
130
128k
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
128k
        assert(cand_bw4 < bw4);
132
128k
        len = imax(step, cand_bw4);
133
128k
    }
134
141k
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
1.68M
{
142
1.68M
    const refmvs_block *cand_b = &b[0][bx4];
143
1.68M
    const enum BlockSize first_cand_bs = cand_b->bs;
144
1.68M
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
1.68M
    int cand_bh4 = first_cand_b_dim[1];
146
1.68M
    int len = imax(step, imin(bh4, cand_bh4));
147
148
1.68M
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
1.51M
        const int weight = bh4 == 1 ? 2 :
154
1.51M
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
1.51M
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
1.51M
                            have_newmv_match, have_refmv_match);
157
1.51M
        return weight >> 1;
158
1.51M
    }
159
160
321k
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
321k
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
321k
                              have_newmv_match, have_refmv_match);
166
321k
        y += len;
167
321k
        if (y >= h4) return 1;
168
154k
        cand_b = &b[y][bx4];
169
154k
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
154k
        assert(cand_bh4 < bh4);
171
154k
        len = imax(step, cand_bh4);
172
154k
    }
173
168k
}
174
175
62.3k
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
62.3k
    static const uint16_t div_mult[32] = {
177
62.3k
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
62.3k
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
62.3k
        1024,   963,  910,  862,  819,  780,  744,  712,
180
62.3k
         682,   655,  630,  606,  585,  564,  546,  528
181
62.3k
    };
182
62.3k
    assert(den > 0 && den < 32);
183
62.3k
    assert(num > -32 && num < 32);
184
62.3k
    const int frac = num * div_mult[den];
185
62.3k
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
62.3k
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
62.3k
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
62.3k
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
62.3k
    };
191
62.3k
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
65.7k
{
199
65.7k
    if (rb->mv.n == INVALID_MV) return;
200
201
31.5k
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
31.5k
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
31.5k
    const int last = *cnt;
205
31.5k
    if (ref.ref[1] == -1) {
206
20.4k
        if (globalmv_ctx)
207
5.42k
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
29.7k
        for (int n = 0; n < last; n++)
210
25.3k
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
16.0k
                mvstack[n].weight += 2;
212
16.0k
                return;
213
16.0k
            }
214
4.39k
        if (last < 8) {
215
4.37k
            mvstack[last].mv.mv[0] = mv;
216
4.37k
            mvstack[last].weight = 2;
217
4.37k
            *cnt = last + 1;
218
4.37k
        }
219
11.1k
    } else {
220
11.1k
        refmvs_mvpair mvp = { .mv = {
221
11.1k
            [0] = mv,
222
11.1k
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
11.1k
        }};
224
11.1k
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
15.4k
        for (int n = 0; n < last; n++)
227
12.7k
            if (mvstack[n].mv.n == mvp.n) {
228
8.37k
                mvstack[n].weight += 2;
229
8.37k
                return;
230
8.37k
            }
231
2.77k
        if (last < 8) {
232
2.75k
            mvstack[last].mv = mvp;
233
2.75k
            mvstack[last].weight = 2;
234
2.75k
            *cnt = last + 1;
235
2.75k
        }
236
2.77k
    }
237
31.5k
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
92.8k
{
246
92.8k
    refmvs_candidate *const diff = &same[2];
247
92.8k
    int *const diff_count = &same_count[2];
248
249
239k
    for (int n = 0; n < 2; n++) {
250
182k
        const int cand_ref = cand_b->ref.ref[n];
251
252
182k
        if (cand_ref <= 0) break;
253
254
146k
        mv cand_mv = cand_b->mv.mv[n];
255
146k
        if (cand_ref == ref.ref[0]) {
256
50.6k
            if (same_count[0] < 2)
257
49.0k
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
50.6k
            if (diff_count[1] < 2) {
259
43.8k
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
1.72k
                    cand_mv.y = -cand_mv.y;
261
1.72k
                    cand_mv.x = -cand_mv.x;
262
1.72k
                }
263
43.8k
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
43.8k
            }
265
95.6k
        } else if (cand_ref == ref.ref[1]) {
266
51.8k
            if (same_count[1] < 2)
267
50.7k
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
51.8k
            if (diff_count[0] < 2) {
269
42.9k
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
1.80k
                    cand_mv.y = -cand_mv.y;
271
1.80k
                    cand_mv.x = -cand_mv.x;
272
1.80k
                }
273
42.9k
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
42.9k
            }
275
51.8k
        } else {
276
43.8k
            mv i_cand_mv = (union mv) {
277
43.8k
                .x = -cand_mv.x,
278
43.8k
                .y = -cand_mv.y
279
43.8k
            };
280
281
43.8k
            if (diff_count[0] < 2) {
282
34.8k
                diff[diff_count[0]++].mv.mv[0] =
283
34.8k
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
34.1k
                    i_cand_mv : cand_mv;
285
34.8k
            }
286
287
43.8k
            if (diff_count[1] < 2) {
288
33.1k
                diff[diff_count[1]++].mv.mv[1] =
289
33.1k
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
32.4k
                    i_cand_mv : cand_mv;
291
33.1k
            }
292
43.8k
        }
293
146k
    }
294
92.8k
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
360k
{
300
723k
    for (int n = 0; n < 2; n++) {
301
707k
        const int cand_ref = cand_b->ref.ref[n];
302
303
707k
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
363k
        mv cand_mv = cand_b->mv.mv[n];
311
363k
        if (sign ^ sign_bias[cand_ref - 1]) {
312
2.08k
            cand_mv.y = -cand_mv.y;
313
2.08k
            cand_mv.x = -cand_mv.x;
314
2.08k
        }
315
316
363k
        int m;
317
363k
        const int last = *cnt;
318
404k
        for (m = 0; m < last; m++)
319
338k
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
297k
                break;
321
363k
        if (m == last) {
322
66.0k
            mvstack[m].mv.mv[0] = cand_mv;
323
66.0k
            mvstack[m].weight = 2; // "minimal"
324
66.0k
            *cnt = last + 1;
325
66.0k
        }
326
363k
    }
327
360k
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
981k
{
355
981k
    const refmvs_frame *const rf = rt->rf;
356
981k
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
981k
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
981k
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
981k
    mv gmv[2], tgmv[2];
360
361
981k
    *cnt = 0;
362
981k
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
981k
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
981k
    if (ref.ref[0] > 0) {
365
461k
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
461k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
461k
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
344k
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
520k
    } else {
370
520k
        tgmv[0] = (mv) { .n = 0 };
371
520k
        gmv[0] = (mv) { .n = INVALID_MV };
372
520k
    }
373
981k
    if (ref.ref[1] > 0) {
374
88.5k
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
88.5k
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
88.5k
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
61.1k
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
88.5k
    }
379
380
    // top
381
981k
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
981k
    unsigned max_rows = 0, n_rows = ~0;
383
981k
    const refmvs_block *b_top;
384
981k
    if (by4 > rt->tile_row.start) {
385
657k
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
657k
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
657k
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
657k
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
657k
                          &have_newmv, &have_row_mvs);
390
657k
    }
391
392
    // left
393
981k
    unsigned max_cols = 0, n_cols = ~0U;
394
981k
    refmvs_block *const *b_left;
395
981k
    if (bx4 > rt->tile_col.start) {
396
823k
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
823k
        b_left = &rt->r[(by4 & 31) + 5];
398
823k
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
823k
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
823k
                          &have_newmv, &have_col_mvs);
401
823k
    }
402
403
    // top/right
404
981k
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
400k
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
328k
    {
407
328k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
328k
                              &have_newmv, &have_row_mvs);
409
328k
    }
410
411
981k
    const int nearest_match = have_col_mvs + have_row_mvs;
412
981k
    const int nearest_cnt = *cnt;
413
2.18M
    for (int n = 0; n < nearest_cnt; n++)
414
1.20M
        mvstack[n].weight += 640;
415
416
    // temporal
417
981k
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
981k
    if (rf->use_ref_frame_mvs) {
419
20.5k
        const ptrdiff_t stride = rf->rp_stride;
420
20.5k
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
20.5k
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
20.5k
        const refmvs_temporal_block *rb = rbi;
423
20.5k
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
20.5k
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
60.5k
        for (int y = 0; y < h8; y += step_v) {
426
97.3k
            for (int x = 0; x < w8; x+= step_h) {
427
57.3k
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
57.3k
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
57.3k
            }
430
39.9k
            rb += stride * step_v;
431
39.9k
        }
432
20.5k
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
11.2k
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
11.2k
            rb = &rbi[bh8 * stride];
435
11.2k
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
11.2k
                                                    (by8 & ~7) + 8);
437
11.2k
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
2.43k
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
2.43k
                                       NULL, NULL);
440
2.43k
            }
441
11.2k
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
4.02k
                if (has_bottom) {
443
2.34k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
2.34k
                                           NULL, NULL);
445
2.34k
                }
446
4.02k
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
3.58k
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
3.58k
                                           ref, NULL, NULL);
449
3.58k
                }
450
4.02k
            }
451
11.2k
        }
452
20.5k
    }
453
981k
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
981k
    int have_dummy_newmv_match;
457
981k
    if ((n_rows | n_cols) != ~0U) {
458
562k
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
562k
                              &have_dummy_newmv_match, &have_row_mvs);
460
562k
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
2.94M
    for (int n = 2; n <= 3; n++) {
465
1.96M
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
527k
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
527k
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
527k
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
527k
                               &have_dummy_newmv_match, &have_row_mvs);
470
527k
        }
471
472
1.96M
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
865k
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
865k
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
865k
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
865k
                               &have_dummy_newmv_match, &have_col_mvs);
477
865k
        }
478
1.96M
    }
479
981k
    assert(*cnt <= 8);
480
481
981k
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
981k
    int refmv_ctx, newmv_ctx;
485
981k
    switch (nearest_match) {
486
195k
    case 0:
487
195k
        refmv_ctx = imin(2, ref_match_count);
488
195k
        newmv_ctx = ref_match_count > 0;
489
195k
        break;
490
411k
    case 1:
491
411k
        refmv_ctx = imin(ref_match_count * 3, 4);
492
411k
        newmv_ctx = 3 - have_newmv;
493
411k
        break;
494
373k
    case 2:
495
373k
        refmv_ctx = 5;
496
373k
        newmv_ctx = 5 - have_newmv;
497
373k
        break;
498
981k
    }
499
500
    // sorting (nearest, then "secondary")
501
980k
    int len = nearest_cnt;
502
1.97M
    while (len) {
503
995k
        int last = 0;
504
1.49M
        for (int n = 1; n < len; n++) {
505
499k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
285k
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
221k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
221k
                last = n;
509
221k
            }
510
499k
        }
511
995k
        len = last;
512
995k
    }
513
980k
    len = *cnt;
514
1.55M
    while (len > nearest_cnt) {
515
577k
        int last = nearest_cnt;
516
878k
        for (int n = nearest_cnt + 1; n < len; n++) {
517
301k
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
63.8k
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
63.8k
#undef EXCHANGE
520
63.8k
                last = n;
521
63.8k
            }
522
301k
        }
523
577k
        len = last;
524
577k
    }
525
526
980k
    if (ref.ref[1] > 0) {
527
88.5k
        if (*cnt < 2) {
528
69.9k
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
69.9k
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
69.9k
            const int sz4 = imin(w4, h4);
531
69.9k
            refmvs_candidate *const same = &mvstack[*cnt];
532
69.9k
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
82.8k
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
43.2k
                const refmvs_block *const cand_b = &b_top[x];
537
43.2k
                add_compound_extended_candidate(same, same_count, cand_b,
538
43.2k
                                                sign0, sign1, ref, rf->sign_bias);
539
43.2k
                x += dav1d_block_dimensions[cand_b->bs][0];
540
43.2k
            }
541
542
            // non-self references in left
543
94.1k
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
49.5k
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
49.5k
                add_compound_extended_candidate(same, same_count, cand_b,
546
49.5k
                                                sign0, sign1, ref, rf->sign_bias);
547
49.5k
                y += dav1d_block_dimensions[cand_b->bs][1];
548
49.5k
            }
549
550
69.9k
            refmvs_candidate *const diff = &same[2];
551
69.9k
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
209k
            for (int n = 0; n < 2; n++) {
555
139k
                int m = same_count[n];
556
557
139k
                if (m >= 2) continue;
558
559
116k
                const int l = diff_count[n];
560
116k
                if (l) {
561
74.6k
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
74.6k
                    if (++m == 2) continue;
563
27.0k
                    if (l == 2) {
564
20.1k
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
20.1k
                        continue;
566
20.1k
                    }
567
27.0k
                }
568
85.2k
                do {
569
85.2k
                    same[m].mv.mv[n] = tgmv[n];
570
85.2k
                } while (++m < 2);
571
48.3k
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
69.9k
            int n = *cnt;
576
69.9k
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
20.7k
                mvstack[1].mv = mvstack[2].mv;
578
112k
            do {
579
112k
                mvstack[n].weight = 2;
580
112k
            } while (++n < 2);
581
69.9k
            *cnt = 2;
582
69.9k
        }
583
584
        // clamping
585
88.5k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
88.5k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
88.5k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
88.5k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
88.5k
        const int n_refmvs = *cnt;
591
88.5k
        int n = 0;
592
188k
        do {
593
188k
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
188k
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
188k
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
188k
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
188k
        } while (++n < n_refmvs);
598
599
88.5k
        switch (refmv_ctx >> 1) {
600
46.5k
        case 0:
601
46.5k
            *ctx = imin(newmv_ctx, 1);
602
46.5k
            break;
603
27.2k
        case 1:
604
27.2k
            *ctx = 1 + imin(newmv_ctx, 3);
605
27.2k
            break;
606
14.7k
        case 2:
607
14.7k
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
14.7k
            break;
609
88.5k
        }
610
611
88.5k
        return;
612
892k
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
269k
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
269k
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
349k
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
178k
            const refmvs_block *const cand_b = &b_top[x];
619
178k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
178k
            x += dav1d_block_dimensions[cand_b->bs][0];
621
178k
        }
622
623
        // non-self references in left
624
367k
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
181k
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
181k
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
181k
            y += dav1d_block_dimensions[cand_b->bs][1];
628
181k
        }
629
269k
    }
630
892k
    assert(*cnt <= 8);
631
632
    // clamping
633
892k
    int n_refmvs = *cnt;
634
892k
    if (n_refmvs) {
635
800k
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
800k
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
800k
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
800k
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
800k
        int n = 0;
641
1.99M
        do {
642
1.99M
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
1.99M
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
1.99M
        } while (++n < n_refmvs);
645
800k
    }
646
647
1.31M
    for (int n = *cnt; n < 2; n++)
648
421k
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
892k
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
892k
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
251k
{
658
251k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
251k
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
251k
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
251k
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
139k
        35 * 2 * rf->n_blocks : 0;
663
251k
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
251k
    const int sbsz = rf->sbsz;
665
251k
    const int off = (sbsz * sby) & 16;
666
5.67M
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
5.42M
        rt->r[off + 5 + i] = r;
668
251k
    rt->r[off + 0] = r;
669
251k
    r += r_stride;
670
251k
    rt->r[off + 1] = NULL;
671
251k
    rt->r[off + 2] = r;
672
251k
    r += r_stride;
673
251k
    rt->r[off + 3] = NULL;
674
251k
    rt->r[off + 4] = r;
675
251k
    if (sby & 1) {
676
173k
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
57.9k
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
57.9k
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
57.9k
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
57.9k
#undef EXCHANGE
681
57.9k
    }
682
683
251k
    rt->rf = rf;
684
251k
    rt->tile_row.start = tile_row_start4;
685
251k
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
251k
    rt->tile_col.start = tile_col_start4;
687
251k
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
251k
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
12.7k
{
694
12.7k
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
12.7k
    assert(row_start8 >= 0);
696
12.7k
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
12.7k
    row_end8 = imin(row_end8, rf->ih8);
698
12.7k
    const int col_start8i = imax(col_start8 - 8, 0);
699
12.7k
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
12.7k
    const ptrdiff_t stride = rf->rp_stride;
702
12.7k
    refmvs_temporal_block *rp_proj =
703
12.7k
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
82.6k
    for (int y = row_start8; y < row_end8; y++) {
705
236k
        for (int x = col_start8; x < col_end8; x++)
706
167k
            rp_proj[x].mv.n = INVALID_MV;
707
69.9k
        rp_proj += stride;
708
69.9k
    }
709
710
12.7k
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
25.3k
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
12.6k
        const int ref2cur = rf->mfmv_ref2cur[n];
713
12.6k
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
10.6k
        const int ref = rf->mfmv_ref[n];
716
10.6k
        const int ref_sign = ref - 4;
717
10.6k
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
73.0k
        for (int y = row_start8; y < row_end8; y++) {
719
62.3k
            const int y_sb_align = y & ~7;
720
62.3k
            const int y_proj_start = imax(y_sb_align, row_start8);
721
62.3k
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
142k
            for (int x = col_start8i; x < col_end8i; x++) {
723
79.6k
                const refmvs_temporal_block *rb = &r[x];
724
79.6k
                const int b_ref = rb->ref;
725
79.6k
                if (!b_ref) continue;
726
32.9k
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
32.9k
                if (!ref2ref) continue;
728
19.6k
                const mv b_mv = rb->mv;
729
19.6k
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
19.6k
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
19.6k
                                           offset.x ^ ref_sign);
732
19.6k
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
19.6k
                                                 offset.y ^ ref_sign);
734
19.6k
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
17.9k
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
48.0k
                    for (;;) {
737
48.0k
                        const int x_sb_align = x & ~7;
738
48.0k
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
47.2k
                            pos_x < imin(x_sb_align + 16, col_end8))
740
46.2k
                        {
741
46.2k
                            rp_proj[pos + pos_x].mv = rb->mv;
742
46.2k
                            rp_proj[pos + pos_x].ref = ref2ref;
743
46.2k
                        }
744
48.0k
                        if (++x >= col_end8i) break;
745
33.8k
                        rb++;
746
33.8k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
30.0k
                        pos_x++;
748
30.0k
                    }
749
17.9k
                } else {
750
3.15k
                    for (;;) {
751
3.15k
                        if (++x >= col_end8i) break;
752
2.16k
                        rb++;
753
2.16k
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
2.16k
                    }
755
1.72k
                }
756
19.6k
                x--;
757
19.6k
            }
758
62.3k
            r += stride;
759
62.3k
        }
760
10.6k
    }
761
12.7k
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
26.1k
{
769
179k
    for (int y = row_start8; y < row_end8; y++) {
770
153k
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
356k
        for (int x = col_start8; x < col_end8;) {
773
202k
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
202k
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
202k
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
20.8k
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
16.2k
            {
779
73.0k
                for (int n = 0; n < bw8; n++, x++)
780
56.8k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
56.8k
                                                      .ref = cand_b->ref.ref[1] };
782
186k
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
75.1k
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
70.1k
            {
785
337k
                for (int n = 0; n < bw8; n++, x++)
786
267k
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
267k
                                                      .ref = cand_b->ref.ref[0] };
788
116k
            } else {
789
621k
                for (int n = 0; n < bw8; n++, x++) {
790
504k
                    rp[x].mv.n = 0;
791
504k
                    rp[x].ref = 0; // "invalid"
792
504k
                }
793
116k
            }
794
202k
        }
795
153k
        rp += stride;
796
153k
    }
797
26.1k
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
84.5k
{
808
84.5k
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
84.5k
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
84.5k
    const int n_blocks = rp_stride * n_tile_rows;
811
812
84.5k
    rf->sbsz = 16 << seq_hdr->sb128;
813
84.5k
    rf->frm_hdr = frm_hdr;
814
84.5k
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
84.5k
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
84.5k
    rf->iw4 = rf->iw8 << 1;
817
84.5k
    rf->ih4 = rf->ih8 << 1;
818
84.5k
    rf->rp = rp;
819
84.5k
    rf->rp_stride = rp_stride;
820
84.5k
    rf->n_tile_threads = n_tile_threads;
821
84.5k
    rf->n_frame_threads = n_frame_threads;
822
823
84.5k
    if (n_blocks != rf->n_blocks) {
824
45.4k
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
45.4k
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
45.4k
        dav1d_free_aligned(rf->r);
829
45.4k
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
45.4k
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
45.4k
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
45.4k
        rf->n_blocks = n_blocks;
837
45.4k
    }
838
839
84.5k
    const int poc = frm_hdr->frame_offset;
840
676k
    for (int i = 0; i < 7; i++) {
841
591k
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
591k
                                          ref_poc[i], poc);
843
591k
        rf->sign_bias[i] = poc_diff > 0;
844
591k
        rf->mfmv_sign[i] = poc_diff < 0;
845
591k
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
591k
                                            poc, ref_poc[i]), -31, 31);
847
591k
    }
848
849
    // temporal MV setup
850
84.5k
    rf->n_mfmvs = 0;
851
84.5k
    rf->rp_ref = rp_ref;
852
84.5k
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
11.6k
        int total = 2;
854
11.6k
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
2.50k
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
2.50k
            total = 3;
857
2.50k
        }
858
11.6k
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
7.56k
                                      frm_hdr->frame_offset) > 0)
860
816
        {
861
816
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
816
        }
863
11.6k
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
6.19k
                                      frm_hdr->frame_offset) > 0)
865
558
        {
866
558
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
558
        }
868
11.6k
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
4.59k
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
4.59k
                         frm_hdr->frame_offset) > 0)
871
1.83k
        {
872
1.83k
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
1.83k
        }
874
11.6k
        if (rf->n_mfmvs < total && rp_ref[1])
875
6.04k
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
23.4k
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
11.7k
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
11.7k
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
11.7k
                                           rpoc, frm_hdr->frame_offset);
881
11.7k
            if (abs(diff1) > 31) {
882
1.68k
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
10.0k
            } else {
884
10.0k
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
80.5k
                for (int m = 0; m < 7; m++) {
886
70.4k
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
70.4k
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
70.4k
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
70.4k
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
70.4k
                }
892
10.0k
            }
893
11.7k
        }
894
11.6k
    }
895
84.5k
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
84.5k
    return 0;
898
84.5k
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
1.86M
{
903
8.28M
    do {
904
8.28M
        refmvs_block *const r = *rr++ + bx4;
905
81.9M
        for (int x = 0; x < bw4; x++)
906
73.6M
            r[x] = *rmv;
907
8.28M
    } while (--bh4);
908
1.86M
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
76.9k
{
922
76.9k
    c->load_tmvs = load_tmvs_c;
923
76.9k
    c->save_tmvs = save_tmvs_c;
924
76.9k
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
76.9k
}