Coverage Report

Created: 2026-05-16 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/dav1d/src/refmvs.c
Line
Count
Source
1
/*
2
 * Copyright © 2020, VideoLAN and dav1d authors
3
 * Copyright © 2020, Two Orioles, LLC
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright notice, this
10
 *    list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright notice,
13
 *    this list of conditions and the following disclaimer in the documentation
14
 *    and/or other materials provided with the distribution.
15
 *
16
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
 */
27
28
#include "config.h"
29
30
#include <limits.h>
31
#include <stdlib.h>
32
33
#include "dav1d/common.h"
34
35
#include "common/intops.h"
36
37
#include "src/env.h"
38
#include "src/mem.h"
39
#include "src/refmvs.h"
40
41
static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt,
42
                                  const int weight, const refmvs_block *const b,
43
                                  const union refmvs_refpair ref, const mv gmv[2],
44
                                  int *const have_newmv_match,
45
                                  int *const have_refmv_match)
46
0
{
47
0
    if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc
48
49
0
    if (ref.ref[1] == -1) {
50
0
        for (int n = 0; n < 2; n++) {
51
0
            if (b->ref.ref[n] == ref.ref[0]) {
52
0
                const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ?
53
0
                                   gmv[0] : b->mv.mv[n];
54
55
0
                *have_refmv_match = 1;
56
0
                *have_newmv_match |= b->mf >> 1;
57
58
0
                const int last = *cnt;
59
0
                for (int m = 0; m < last; m++)
60
0
                    if (mvstack[m].mv.mv[0].n == cand_mv.n) {
61
0
                        mvstack[m].weight += weight;
62
0
                        return;
63
0
                    }
64
65
0
                if (last < 8) {
66
0
                    mvstack[last].mv.mv[0] = cand_mv;
67
0
                    mvstack[last].weight = weight;
68
0
                    *cnt = last + 1;
69
0
                }
70
0
                return;
71
0
            }
72
0
        }
73
0
    } else if (b->ref.pair == ref.pair) {
74
0
        const refmvs_mvpair cand_mv = { .mv = {
75
0
            [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0],
76
0
            [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1],
77
0
        }};
78
79
0
        *have_refmv_match = 1;
80
0
        *have_newmv_match |= b->mf >> 1;
81
82
0
        const int last = *cnt;
83
0
        for (int n = 0; n < last; n++)
84
0
            if (mvstack[n].mv.n == cand_mv.n) {
85
0
                mvstack[n].weight += weight;
86
0
                return;
87
0
            }
88
89
0
        if (last < 8) {
90
0
            mvstack[last].mv = cand_mv;
91
0
            mvstack[last].weight = weight;
92
0
            *cnt = last + 1;
93
0
        }
94
0
    }
95
0
}
96
97
static int scan_row(refmvs_candidate *const mvstack, int *const cnt,
98
                    const union refmvs_refpair ref, const mv gmv[2],
99
                    const refmvs_block *b, const int bw4, const int w4,
100
                    const int max_rows, const int step,
101
                    int *const have_newmv_match, int *const have_refmv_match)
102
0
{
103
0
    const refmvs_block *cand_b = b;
104
0
    const enum BlockSize first_cand_bs = cand_b->bs;
105
0
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
106
0
    int cand_bw4 = first_cand_b_dim[0];
107
0
    int len = imax(step, imin(bw4, cand_bw4));
108
109
0
    if (bw4 <= cand_bw4) {
110
        // FIXME weight can be higher for odd blocks (bx4 & 1), but then the
111
        // position of the first block has to be odd already, i.e. not just
112
        // for row_offset=-3/-5
113
        // FIXME why can this not be cand_bw4?
114
0
        const int weight = bw4 == 1 ? 2 :
115
0
                           imax(2, imin(2 * max_rows, first_cand_b_dim[1]));
116
0
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
117
0
                              have_newmv_match, have_refmv_match);
118
0
        return weight >> 1;
119
0
    }
120
121
0
    for (int x = 0;;) {
122
        // FIXME if we overhang above, we could fill a bitmask so we don't have
123
        // to repeat the add_spatial_candidate() for the next row, but just increase
124
        // the weight here
125
0
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
126
0
                              have_newmv_match, have_refmv_match);
127
0
        x += len;
128
0
        if (x >= w4) return 1;
129
0
        cand_b = &b[x];
130
0
        cand_bw4 = dav1d_block_dimensions[cand_b->bs][0];
131
0
        assert(cand_bw4 < bw4);
132
0
        len = imax(step, cand_bw4);
133
0
    }
134
0
}
135
136
static int scan_col(refmvs_candidate *const mvstack, int *const cnt,
137
                    const union refmvs_refpair ref, const mv gmv[2],
138
                    /*const*/ refmvs_block *const *b, const int bh4, const int h4,
139
                    const int bx4, const int max_cols, const int step,
140
                    int *const have_newmv_match, int *const have_refmv_match)
141
0
{
142
0
    const refmvs_block *cand_b = &b[0][bx4];
143
0
    const enum BlockSize first_cand_bs = cand_b->bs;
144
0
    const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs];
145
0
    int cand_bh4 = first_cand_b_dim[1];
146
0
    int len = imax(step, imin(bh4, cand_bh4));
147
148
0
    if (bh4 <= cand_bh4) {
149
        // FIXME weight can be higher for odd blocks (by4 & 1), but then the
150
        // position of the first block has to be odd already, i.e. not just
151
        // for col_offset=-3/-5
152
        // FIXME why can this not be cand_bh4?
153
0
        const int weight = bh4 == 1 ? 2 :
154
0
                           imax(2, imin(2 * max_cols, first_cand_b_dim[0]));
155
0
        add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv,
156
0
                            have_newmv_match, have_refmv_match);
157
0
        return weight >> 1;
158
0
    }
159
160
0
    for (int y = 0;;) {
161
        // FIXME if we overhang above, we could fill a bitmask so we don't have
162
        // to repeat the add_spatial_candidate() for the next row, but just increase
163
        // the weight here
164
0
        add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv,
165
0
                              have_newmv_match, have_refmv_match);
166
0
        y += len;
167
0
        if (y >= h4) return 1;
168
0
        cand_b = &b[y][bx4];
169
0
        cand_bh4 = dav1d_block_dimensions[cand_b->bs][1];
170
0
        assert(cand_bh4 < bh4);
171
0
        len = imax(step, cand_bh4);
172
0
    }
173
0
}
174
175
0
static inline union mv mv_projection(const union mv mv, const int num, const int den) {
176
0
    static const uint16_t div_mult[32] = {
177
0
           0, 16384, 8192, 5461, 4096, 3276, 2730, 2340,
178
0
        2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092,
179
0
        1024,   963,  910,  862,  819,  780,  744,  712,
180
0
         682,   655,  630,  606,  585,  564,  546,  528
181
0
    };
182
0
    assert(den > 0 && den < 32);
183
0
    assert(num > -32 && num < 32);
184
0
    const int frac = num * div_mult[den];
185
0
    const int y = mv.y * frac, x = mv.x * frac;
186
    // Round and clip according to AV1 spec section 7.9.3
187
0
    return (union mv) { // 0x3fff == (1 << 14) - 1
188
0
        .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff),
189
0
        .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff)
190
0
    };
191
0
}
192
193
static void add_temporal_candidate(const refmvs_frame *const rf,
194
                                   refmvs_candidate *const mvstack, int *const cnt,
195
                                   const refmvs_temporal_block *const rb,
196
                                   const union refmvs_refpair ref, int *const globalmv_ctx,
197
                                   const union mv gmv[])
198
0
{
199
0
    if (rb->mv.n == INVALID_MV) return;
200
201
0
    union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref);
202
0
    fix_mv_precision(rf->frm_hdr, &mv);
203
204
0
    const int last = *cnt;
205
0
    if (ref.ref[1] == -1) {
206
0
        if (globalmv_ctx)
207
0
            *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16;
208
209
0
        for (int n = 0; n < last; n++)
210
0
            if (mvstack[n].mv.mv[0].n == mv.n) {
211
0
                mvstack[n].weight += 2;
212
0
                return;
213
0
            }
214
0
        if (last < 8) {
215
0
            mvstack[last].mv.mv[0] = mv;
216
0
            mvstack[last].weight = 2;
217
0
            *cnt = last + 1;
218
0
        }
219
0
    } else {
220
0
        refmvs_mvpair mvp = { .mv = {
221
0
            [0] = mv,
222
0
            [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref),
223
0
        }};
224
0
        fix_mv_precision(rf->frm_hdr, &mvp.mv[1]);
225
226
0
        for (int n = 0; n < last; n++)
227
0
            if (mvstack[n].mv.n == mvp.n) {
228
0
                mvstack[n].weight += 2;
229
0
                return;
230
0
            }
231
0
        if (last < 8) {
232
0
            mvstack[last].mv = mvp;
233
0
            mvstack[last].weight = 2;
234
0
            *cnt = last + 1;
235
0
        }
236
0
    }
237
0
}
238
239
static void add_compound_extended_candidate(refmvs_candidate *const same,
240
                                            int *const same_count,
241
                                            const refmvs_block *const cand_b,
242
                                            const int sign0, const int sign1,
243
                                            const union refmvs_refpair ref,
244
                                            const uint8_t *const sign_bias)
245
0
{
246
0
    refmvs_candidate *const diff = &same[2];
247
0
    int *const diff_count = &same_count[2];
248
249
0
    for (int n = 0; n < 2; n++) {
250
0
        const int cand_ref = cand_b->ref.ref[n];
251
252
0
        if (cand_ref <= 0) break;
253
254
0
        mv cand_mv = cand_b->mv.mv[n];
255
0
        if (cand_ref == ref.ref[0]) {
256
0
            if (same_count[0] < 2)
257
0
                same[same_count[0]++].mv.mv[0] = cand_mv;
258
0
            if (diff_count[1] < 2) {
259
0
                if (sign1 ^ sign_bias[cand_ref - 1]) {
260
0
                    cand_mv.y = -cand_mv.y;
261
0
                    cand_mv.x = -cand_mv.x;
262
0
                }
263
0
                diff[diff_count[1]++].mv.mv[1] = cand_mv;
264
0
            }
265
0
        } else if (cand_ref == ref.ref[1]) {
266
0
            if (same_count[1] < 2)
267
0
                same[same_count[1]++].mv.mv[1] = cand_mv;
268
0
            if (diff_count[0] < 2) {
269
0
                if (sign0 ^ sign_bias[cand_ref - 1]) {
270
0
                    cand_mv.y = -cand_mv.y;
271
0
                    cand_mv.x = -cand_mv.x;
272
0
                }
273
0
                diff[diff_count[0]++].mv.mv[0] = cand_mv;
274
0
            }
275
0
        } else {
276
0
            mv i_cand_mv = (union mv) {
277
0
                .x = -cand_mv.x,
278
0
                .y = -cand_mv.y
279
0
            };
280
281
0
            if (diff_count[0] < 2) {
282
0
                diff[diff_count[0]++].mv.mv[0] =
283
0
                    sign0 ^ sign_bias[cand_ref - 1] ?
284
0
                    i_cand_mv : cand_mv;
285
0
            }
286
287
0
            if (diff_count[1] < 2) {
288
0
                diff[diff_count[1]++].mv.mv[1] =
289
0
                    sign1 ^ sign_bias[cand_ref - 1] ?
290
0
                    i_cand_mv : cand_mv;
291
0
            }
292
0
        }
293
0
    }
294
0
}
295
296
static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt,
297
                                          const refmvs_block *const cand_b,
298
                                          const int sign, const uint8_t *const sign_bias)
299
0
{
300
0
    for (int n = 0; n < 2; n++) {
301
0
        const int cand_ref = cand_b->ref.ref[n];
302
303
0
        if (cand_ref <= 0) break;
304
        // we need to continue even if cand_ref == ref.ref[0], since
305
        // the candidate could have been added as a globalmv variant,
306
        // which changes the value
307
        // FIXME if scan_{row,col}() returned a mask for the nearest
308
        // edge, we could skip the appropriate ones here
309
310
0
        mv cand_mv = cand_b->mv.mv[n];
311
0
        if (sign ^ sign_bias[cand_ref - 1]) {
312
0
            cand_mv.y = -cand_mv.y;
313
0
            cand_mv.x = -cand_mv.x;
314
0
        }
315
316
0
        int m;
317
0
        const int last = *cnt;
318
0
        for (m = 0; m < last; m++)
319
0
            if (cand_mv.n == mvstack[m].mv.mv[0].n)
320
0
                break;
321
0
        if (m == last) {
322
0
            mvstack[m].mv.mv[0] = cand_mv;
323
0
            mvstack[m].weight = 2; // "minimal"
324
0
            *cnt = last + 1;
325
0
        }
326
0
    }
327
0
}
328
329
/*
330
 * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame
331
 * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing.
332
 * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory,
333
 * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top
334
 * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start
335
 * of each tile/sbrow.
336
 *
337
 * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of
338
 * each tile/sbrow (when tile column threading is enabled), or at the start of
339
 * each interleaved sbrow (i.e. once for all tile columns together, when tile
340
 * column threading is disabled). This will copy the 4x4-resolution spatial MVs
341
 * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent
342
 * frames, at the start of each tile/sbrow (when tile column threading is
343
 * enabled) or at the start of each interleaved sbrow (when tile column
344
 * threading is disabled), we call load_tmvs(), which will project the MVs to
345
 * their respective position in the current frame.
346
 */
347
348
void dav1d_refmvs_find(const refmvs_tile *const rt,
349
                       refmvs_candidate mvstack[8], int *const cnt,
350
                       int *const ctx,
351
                       const union refmvs_refpair ref, const enum BlockSize bs,
352
                       const enum EdgeFlags edge_flags,
353
                       const int by4, const int bx4)
354
0
{
355
0
    const refmvs_frame *const rf = rt->rf;
356
0
    const uint8_t *const b_dim = dav1d_block_dimensions[bs];
357
0
    const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4);
358
0
    const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4);
359
0
    mv gmv[2], tgmv[2];
360
361
0
    *cnt = 0;
362
0
    assert(ref.ref[0] >=  0 && ref.ref[0] <= 8 &&
363
0
           ref.ref[1] >= -1 && ref.ref[1] <= 8);
364
0
    if (ref.ref[0] > 0) {
365
0
        tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1],
366
0
                             bx4, by4, bw4, bh4, rf->frm_hdr);
367
0
        gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
368
0
                 tgmv[0] : (mv) { .n = INVALID_MV };
369
0
    } else {
370
0
        tgmv[0] = (mv) { .n = 0 };
371
0
        gmv[0] = (mv) { .n = INVALID_MV };
372
0
    }
373
0
    if (ref.ref[1] > 0) {
374
0
        tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1],
375
0
                             bx4, by4, bw4, bh4, rf->frm_hdr);
376
0
        gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ?
377
0
                 tgmv[1] : (mv) { .n = INVALID_MV };
378
0
    }
379
380
    // top
381
0
    int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0;
382
0
    unsigned max_rows = 0, n_rows = ~0;
383
0
    const refmvs_block *b_top;
384
0
    if (by4 > rt->tile_row.start) {
385
0
        max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1));
386
0
        b_top = &rt->r[(by4 & 31) - 1 + 5][bx4];
387
0
        n_rows = scan_row(mvstack, cnt, ref, gmv, b_top,
388
0
                          bw4, w4, max_rows, bw4 >= 16 ? 4 : 1,
389
0
                          &have_newmv, &have_row_mvs);
390
0
    }
391
392
    // left
393
0
    unsigned max_cols = 0, n_cols = ~0U;
394
0
    refmvs_block *const *b_left;
395
0
    if (bx4 > rt->tile_col.start) {
396
0
        max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1));
397
0
        b_left = &rt->r[(by4 & 31) + 5];
398
0
        n_cols = scan_col(mvstack, cnt, ref, gmv, b_left,
399
0
                          bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1,
400
0
                          &have_newmv, &have_col_mvs);
401
0
    }
402
403
    // top/right
404
0
    if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT &&
405
0
        imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end)
406
0
    {
407
0
        add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv,
408
0
                              &have_newmv, &have_row_mvs);
409
0
    }
410
411
0
    const int nearest_match = have_col_mvs + have_row_mvs;
412
0
    const int nearest_cnt = *cnt;
413
0
    for (int n = 0; n < nearest_cnt; n++)
414
0
        mvstack[n].weight += 640;
415
416
    // temporal
417
0
    int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs;
418
0
    if (rf->use_ref_frame_mvs) {
419
0
        const ptrdiff_t stride = rf->rp_stride;
420
0
        const int by8 = by4 >> 1, bx8 = bx4 >> 1;
421
0
        const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8];
422
0
        const refmvs_temporal_block *rb = rbi;
423
0
        const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1;
424
0
        const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8);
425
0
        for (int y = 0; y < h8; y += step_v) {
426
0
            for (int x = 0; x < w8; x+= step_h) {
427
0
                add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref,
428
0
                                       !(x | y) ? &globalmv_ctx : NULL, tgmv);
429
0
            }
430
0
            rb += stride * step_v;
431
0
        }
432
0
        if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) {
433
0
            const int bh8 = bh4 >> 1, bw8 = bw4 >> 1;
434
0
            rb = &rbi[bh8 * stride];
435
0
            const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1,
436
0
                                                    (by8 & ~7) + 8);
437
0
            if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) {
438
0
                add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref,
439
0
                                       NULL, NULL);
440
0
            }
441
0
            if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) {
442
0
                if (has_bottom) {
443
0
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref,
444
0
                                           NULL, NULL);
445
0
                }
446
0
                if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) {
447
0
                    add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride],
448
0
                                           ref, NULL, NULL);
449
0
                }
450
0
            }
451
0
        }
452
0
    }
453
0
    assert(*cnt <= 8);
454
455
    // top/left (which, confusingly, is part of "secondary" references)
456
0
    int have_dummy_newmv_match;
457
0
    if ((n_rows | n_cols) != ~0U) {
458
0
        add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv,
459
0
                              &have_dummy_newmv_match, &have_row_mvs);
460
0
    }
461
462
    // "secondary" (non-direct neighbour) top & left edges
463
    // what is different about secondary is that everything is now in 8x8 resolution
464
0
    for (int n = 2; n <= 3; n++) {
465
0
        if ((unsigned) n > n_rows && (unsigned) n <= max_rows) {
466
0
            n_rows += scan_row(mvstack, cnt, ref, gmv,
467
0
                               &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1],
468
0
                               bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2,
469
0
                               &have_dummy_newmv_match, &have_row_mvs);
470
0
        }
471
472
0
        if ((unsigned) n > n_cols && (unsigned) n <= max_cols) {
473
0
            n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5],
474
0
                               bh4, h4, (bx4 - n * 2 + 1) | 1,
475
0
                               1 + max_cols - n, bh4 >= 16 ? 4 : 2,
476
0
                               &have_dummy_newmv_match, &have_col_mvs);
477
0
        }
478
0
    }
479
0
    assert(*cnt <= 8);
480
481
0
    const int ref_match_count = have_col_mvs + have_row_mvs;
482
483
    // context build-up
484
0
    int refmv_ctx, newmv_ctx;
485
0
    switch (nearest_match) {
486
0
    case 0:
487
0
        refmv_ctx = imin(2, ref_match_count);
488
0
        newmv_ctx = ref_match_count > 0;
489
0
        break;
490
0
    case 1:
491
0
        refmv_ctx = imin(ref_match_count * 3, 4);
492
0
        newmv_ctx = 3 - have_newmv;
493
0
        break;
494
0
    case 2:
495
0
        refmv_ctx = 5;
496
0
        newmv_ctx = 5 - have_newmv;
497
0
        break;
498
0
    }
499
500
    // sorting (nearest, then "secondary")
501
0
    int len = nearest_cnt;
502
0
    while (len) {
503
0
        int last = 0;
504
0
        for (int n = 1; n < len; n++) {
505
0
            if (mvstack[n - 1].weight < mvstack[n].weight) {
506
0
#define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0)
507
0
                EXCHANGE(mvstack[n - 1], mvstack[n]);
508
0
                last = n;
509
0
            }
510
0
        }
511
0
        len = last;
512
0
    }
513
0
    len = *cnt;
514
0
    while (len > nearest_cnt) {
515
0
        int last = nearest_cnt;
516
0
        for (int n = nearest_cnt + 1; n < len; n++) {
517
0
            if (mvstack[n - 1].weight < mvstack[n].weight) {
518
0
                EXCHANGE(mvstack[n - 1], mvstack[n]);
519
0
#undef EXCHANGE
520
0
                last = n;
521
0
            }
522
0
        }
523
0
        len = last;
524
0
    }
525
526
0
    if (ref.ref[1] > 0) {
527
0
        if (*cnt < 2) {
528
0
            const int sign0 = rf->sign_bias[ref.ref[0] - 1];
529
0
            const int sign1 = rf->sign_bias[ref.ref[1] - 1];
530
0
            const int sz4 = imin(w4, h4);
531
0
            refmvs_candidate *const same = &mvstack[*cnt];
532
0
            int same_count[4] = { 0 };
533
534
            // non-self references in top
535
0
            if (n_rows != ~0U) for (int x = 0; x < sz4;) {
536
0
                const refmvs_block *const cand_b = &b_top[x];
537
0
                add_compound_extended_candidate(same, same_count, cand_b,
538
0
                                                sign0, sign1, ref, rf->sign_bias);
539
0
                x += dav1d_block_dimensions[cand_b->bs][0];
540
0
            }
541
542
            // non-self references in left
543
0
            if (n_cols != ~0U) for (int y = 0; y < sz4;) {
544
0
                const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
545
0
                add_compound_extended_candidate(same, same_count, cand_b,
546
0
                                                sign0, sign1, ref, rf->sign_bias);
547
0
                y += dav1d_block_dimensions[cand_b->bs][1];
548
0
            }
549
550
0
            refmvs_candidate *const diff = &same[2];
551
0
            const int *const diff_count = &same_count[2];
552
553
            // merge together
554
0
            for (int n = 0; n < 2; n++) {
555
0
                int m = same_count[n];
556
557
0
                if (m >= 2) continue;
558
559
0
                const int l = diff_count[n];
560
0
                if (l) {
561
0
                    same[m].mv.mv[n] = diff[0].mv.mv[n];
562
0
                    if (++m == 2) continue;
563
0
                    if (l == 2) {
564
0
                        same[1].mv.mv[n] = diff[1].mv.mv[n];
565
0
                        continue;
566
0
                    }
567
0
                }
568
0
                do {
569
0
                    same[m].mv.mv[n] = tgmv[n];
570
0
                } while (++m < 2);
571
0
            }
572
573
            // if the first extended was the same as the non-extended one,
574
            // then replace it with the second extended one
575
0
            int n = *cnt;
576
0
            if (n == 1 && mvstack[0].mv.n == same[0].mv.n)
577
0
                mvstack[1].mv = mvstack[2].mv;
578
0
            do {
579
0
                mvstack[n].weight = 2;
580
0
            } while (++n < 2);
581
0
            *cnt = 2;
582
0
        }
583
584
        // clamping
585
0
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
586
0
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
587
0
        const int top = -(by4 + bh4 + 4) * 4 * 8;
588
0
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
589
590
0
        const int n_refmvs = *cnt;
591
0
        int n = 0;
592
0
        do {
593
0
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
594
0
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
595
0
            mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right);
596
0
            mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom);
597
0
        } while (++n < n_refmvs);
598
599
0
        switch (refmv_ctx >> 1) {
600
0
        case 0:
601
0
            *ctx = imin(newmv_ctx, 1);
602
0
            break;
603
0
        case 1:
604
0
            *ctx = 1 + imin(newmv_ctx, 3);
605
0
            break;
606
0
        case 2:
607
0
            *ctx = iclip(3 + newmv_ctx, 4, 7);
608
0
            break;
609
0
        }
610
611
0
        return;
612
0
    } else if (*cnt < 2 && ref.ref[0] > 0) {
613
0
        const int sign = rf->sign_bias[ref.ref[0] - 1];
614
0
        const int sz4 = imin(w4, h4);
615
616
        // non-self references in top
617
0
        if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) {
618
0
            const refmvs_block *const cand_b = &b_top[x];
619
0
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
620
0
            x += dav1d_block_dimensions[cand_b->bs][0];
621
0
        }
622
623
        // non-self references in left
624
0
        if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) {
625
0
            const refmvs_block *const cand_b = &b_left[y][bx4 - 1];
626
0
            add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias);
627
0
            y += dav1d_block_dimensions[cand_b->bs][1];
628
0
        }
629
0
    }
630
0
    assert(*cnt <= 8);
631
632
    // clamping
633
0
    int n_refmvs = *cnt;
634
0
    if (n_refmvs) {
635
0
        const int left = -(bx4 + bw4 + 4) * 4 * 8;
636
0
        const int right = (rf->iw4 - bx4 + 4) * 4 * 8;
637
0
        const int top = -(by4 + bh4 + 4) * 4 * 8;
638
0
        const int bottom = (rf->ih4 - by4 + 4) * 4 * 8;
639
640
0
        int n = 0;
641
0
        do {
642
0
            mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right);
643
0
            mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom);
644
0
        } while (++n < n_refmvs);
645
0
    }
646
647
0
    for (int n = *cnt; n < 2; n++)
648
0
        mvstack[n].mv.mv[0] = tgmv[0];
649
650
0
    *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx;
651
0
}
652
653
void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf,
654
                                  const int tile_col_start4, const int tile_col_end4,
655
                                  const int tile_row_start4, const int tile_row_end4,
656
                                  const int sby, int tile_row_idx, const int pass)
657
0
{
658
0
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
659
0
    rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx];
660
0
    const ptrdiff_t r_stride = rf->rp_stride * 2;
661
0
    const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ?
662
0
        35 * 2 * rf->n_blocks : 0;
663
0
    refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off];
664
0
    const int sbsz = rf->sbsz;
665
0
    const int off = (sbsz * sby) & 16;
666
0
    for (int i = 0; i < sbsz; i++, r += r_stride)
667
0
        rt->r[off + 5 + i] = r;
668
0
    rt->r[off + 0] = r;
669
0
    r += r_stride;
670
0
    rt->r[off + 1] = NULL;
671
0
    rt->r[off + 2] = r;
672
0
    r += r_stride;
673
0
    rt->r[off + 3] = NULL;
674
0
    rt->r[off + 4] = r;
675
0
    if (sby & 1) {
676
0
#define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0)
677
0
        EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]);
678
0
        EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]);
679
0
        EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]);
680
0
#undef EXCHANGE
681
0
    }
682
683
0
    rt->rf = rf;
684
0
    rt->tile_row.start = tile_row_start4;
685
0
    rt->tile_row.end = imin(tile_row_end4, rf->ih4);
686
0
    rt->tile_col.start = tile_col_start4;
687
0
    rt->tile_col.end = imin(tile_col_end4, rf->iw4);
688
0
}
689
690
static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
691
                        const int col_start8, const int col_end8,
692
                        const int row_start8, int row_end8)
693
0
{
694
0
    if (rf->n_tile_threads == 1) tile_row_idx = 0;
695
0
    assert(row_start8 >= 0);
696
0
    assert((unsigned) (row_end8 - row_start8) <= 16U);
697
0
    row_end8 = imin(row_end8, rf->ih8);
698
0
    const int col_start8i = imax(col_start8 - 8, 0);
699
0
    const int col_end8i = imin(col_end8 + 8, rf->iw8);
700
701
0
    const ptrdiff_t stride = rf->rp_stride;
702
0
    refmvs_temporal_block *rp_proj =
703
0
        &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride];
704
0
    for (int y = row_start8; y < row_end8; y++) {
705
0
        for (int x = col_start8; x < col_end8; x++)
706
0
            rp_proj[x].mv.n = INVALID_MV;
707
0
        rp_proj += stride;
708
0
    }
709
710
0
    rp_proj = &rf->rp_proj[16 * stride * tile_row_idx];
711
0
    for (int n = 0; n < rf->n_mfmvs; n++) {
712
0
        const int ref2cur = rf->mfmv_ref2cur[n];
713
0
        if (ref2cur == INVALID_REF2CUR) continue;
714
715
0
        const int ref = rf->mfmv_ref[n];
716
0
        const int ref_sign = ref - 4;
717
0
        const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride];
718
0
        for (int y = row_start8; y < row_end8; y++) {
719
0
            const int y_sb_align = y & ~7;
720
0
            const int y_proj_start = imax(y_sb_align, row_start8);
721
0
            const int y_proj_end = imin(y_sb_align + 8, row_end8);
722
0
            for (int x = col_start8i; x < col_end8i; x++) {
723
0
                const refmvs_temporal_block *rb = &r[x];
724
0
                const int b_ref = rb->ref;
725
0
                if (!b_ref) continue;
726
0
                const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1];
727
0
                if (!ref2ref) continue;
728
0
                const mv b_mv = rb->mv;
729
0
                const mv offset = mv_projection(b_mv, ref2cur, ref2ref);
730
0
                int pos_x = x + apply_sign(abs(offset.x) >> 6,
731
0
                                           offset.x ^ ref_sign);
732
0
                const int pos_y = y + apply_sign(abs(offset.y) >> 6,
733
0
                                                 offset.y ^ ref_sign);
734
0
                if (pos_y >= y_proj_start && pos_y < y_proj_end) {
735
0
                    const ptrdiff_t pos = (pos_y & 15) * stride;
736
0
                    for (;;) {
737
0
                        const int x_sb_align = x & ~7;
738
0
                        if (pos_x >= imax(x_sb_align - 8, col_start8) &&
739
0
                            pos_x < imin(x_sb_align + 16, col_end8))
740
0
                        {
741
0
                            rp_proj[pos + pos_x].mv = rb->mv;
742
0
                            rp_proj[pos + pos_x].ref = ref2ref;
743
0
                        }
744
0
                        if (++x >= col_end8i) break;
745
0
                        rb++;
746
0
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
747
0
                        pos_x++;
748
0
                    }
749
0
                } else {
750
0
                    for (;;) {
751
0
                        if (++x >= col_end8i) break;
752
0
                        rb++;
753
0
                        if (rb->ref != b_ref || rb->mv.n != b_mv.n) break;
754
0
                    }
755
0
                }
756
0
                x--;
757
0
            }
758
0
            r += stride;
759
0
        }
760
0
    }
761
0
}
762
763
static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride,
764
                        refmvs_block *const *const rr,
765
                        const uint8_t *const ref_sign,
766
                        const int col_end8, const int row_end8,
767
                        const int col_start8, const int row_start8)
768
0
{
769
0
    for (int y = row_start8; y < row_end8; y++) {
770
0
        const refmvs_block *const b = rr[(y & 15) * 2];
771
772
0
        for (int x = col_start8; x < col_end8;) {
773
0
            const refmvs_block *const cand_b = &b[x * 2 + 1];
774
0
            const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1;
775
776
0
            if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] &&
777
0
                (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096)
778
0
            {
779
0
                for (int n = 0; n < bw8; n++, x++)
780
0
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1],
781
0
                                                      .ref = cand_b->ref.ref[1] };
782
0
            } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] &&
783
0
                       (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096)
784
0
            {
785
0
                for (int n = 0; n < bw8; n++, x++)
786
0
                    rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0],
787
0
                                                      .ref = cand_b->ref.ref[0] };
788
0
            } else {
789
0
                for (int n = 0; n < bw8; n++, x++) {
790
0
                    rp[x].mv.n = 0;
791
0
                    rp[x].ref = 0; // "invalid"
792
0
                }
793
0
            }
794
0
        }
795
0
        rp += stride;
796
0
    }
797
0
}
798
799
int dav1d_refmvs_init_frame(refmvs_frame *const rf,
800
                            const Dav1dSequenceHeader *const seq_hdr,
801
                            const Dav1dFrameHeader *const frm_hdr,
802
                            const uint8_t ref_poc[7],
803
                            refmvs_temporal_block *const rp,
804
                            const uint8_t ref_ref_poc[7][7],
805
                            /*const*/ refmvs_temporal_block *const rp_ref[7],
806
                            const int n_tile_threads, const int n_frame_threads)
807
0
{
808
0
    const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3;
809
0
    const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1;
810
0
    const int n_blocks = rp_stride * n_tile_rows;
811
812
0
    rf->sbsz = 16 << seq_hdr->sb128;
813
0
    rf->frm_hdr = frm_hdr;
814
0
    rf->iw8 = (frm_hdr->width[0] + 7) >> 3;
815
0
    rf->ih8 = (frm_hdr->height + 7) >> 3;
816
0
    rf->iw4 = rf->iw8 << 1;
817
0
    rf->ih4 = rf->ih8 << 1;
818
0
    rf->rp = rp;
819
0
    rf->rp_stride = rp_stride;
820
0
    rf->n_tile_threads = n_tile_threads;
821
0
    rf->n_frame_threads = n_frame_threads;
822
823
0
    if (n_blocks != rf->n_blocks) {
824
0
        const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1));
825
0
        const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks;
826
        /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned
827
         * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */
828
0
        dav1d_free_aligned(rf->r);
829
0
        rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64);
830
0
        if (!rf->r) {
831
0
            rf->n_blocks = 0;
832
0
            return DAV1D_ERR(ENOMEM);
833
0
        }
834
835
0
        rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz);
836
0
        rf->n_blocks = n_blocks;
837
0
    }
838
839
0
    const int poc = frm_hdr->frame_offset;
840
0
    for (int i = 0; i < 7; i++) {
841
0
        const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits,
842
0
                                          ref_poc[i], poc);
843
0
        rf->sign_bias[i] = poc_diff > 0;
844
0
        rf->mfmv_sign[i] = poc_diff < 0;
845
0
        rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits,
846
0
                                            poc, ref_poc[i]), -31, 31);
847
0
    }
848
849
    // temporal MV setup
850
0
    rf->n_mfmvs = 0;
851
0
    rf->rp_ref = rp_ref;
852
0
    if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) {
853
0
        int total = 2;
854
0
        if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) {
855
0
            rf->mfmv_ref[rf->n_mfmvs++] = 0; // last
856
0
            total = 3;
857
0
        }
858
0
        if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4],
859
0
                                      frm_hdr->frame_offset) > 0)
860
0
        {
861
0
            rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd
862
0
        }
863
0
        if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5],
864
0
                                      frm_hdr->frame_offset) > 0)
865
0
        {
866
0
            rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2
867
0
        }
868
0
        if (rf->n_mfmvs < total && rp_ref[6] &&
869
0
            get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6],
870
0
                         frm_hdr->frame_offset) > 0)
871
0
        {
872
0
            rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref
873
0
        }
874
0
        if (rf->n_mfmvs < total && rp_ref[1])
875
0
            rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2
876
877
0
        for (int n = 0; n < rf->n_mfmvs; n++) {
878
0
            const int rpoc = ref_poc[rf->mfmv_ref[n]];
879
0
            const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits,
880
0
                                           rpoc, frm_hdr->frame_offset);
881
0
            if (abs(diff1) > 31) {
882
0
                rf->mfmv_ref2cur[n] = INVALID_REF2CUR;
883
0
            } else {
884
0
                rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1;
885
0
                for (int m = 0; m < 7; m++) {
886
0
                    const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m];
887
0
                    const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits,
888
0
                                                   rpoc, rrpoc);
889
                    // unsigned comparison also catches the < 0 case
890
0
                    rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2;
891
0
                }
892
0
            }
893
0
        }
894
0
    }
895
0
    rf->use_ref_frame_mvs = rf->n_mfmvs > 0;
896
897
0
    return 0;
898
0
}
899
900
static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
901
                       const int bx4, const int bw4, int bh4)
902
0
{
903
0
    do {
904
0
        refmvs_block *const r = *rr++ + bx4;
905
0
        for (int x = 0; x < bw4; x++)
906
0
            r[x] = *rmv;
907
0
    } while (--bh4);
908
0
}
909
910
#if HAVE_ASM
911
#if ARCH_AARCH64 || ARCH_ARM
912
#include "src/arm/refmvs.h"
913
#elif ARCH_LOONGARCH64
914
#include "src/loongarch/refmvs.h"
915
#elif ARCH_X86
916
#include "src/x86/refmvs.h"
917
#endif
918
#endif
919
920
COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c)
921
0
{
922
0
    c->load_tmvs = load_tmvs_c;
923
0
    c->save_tmvs = save_tmvs_c;
924
0
    c->splat_mv = splat_mv_c;
925
926
#if HAVE_ASM
927
#if ARCH_AARCH64 || ARCH_ARM
928
    refmvs_dsp_init_arm(c);
929
#elif ARCH_LOONGARCH64
930
    refmvs_dsp_init_loongarch(c);
931
#elif ARCH_X86
932
    refmvs_dsp_init_x86(c);
933
#endif
934
#endif
935
0
}