Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2020, VideoLAN and dav1d authors |
3 | | * Copyright © 2020, Two Orioles, LLC |
4 | | * All rights reserved. |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted provided that the following conditions are met: |
8 | | * |
9 | | * 1. Redistributions of source code must retain the above copyright notice, this |
10 | | * list of conditions and the following disclaimer. |
11 | | * |
12 | | * 2. Redistributions in binary form must reproduce the above copyright notice, |
13 | | * this list of conditions and the following disclaimer in the documentation |
14 | | * and/or other materials provided with the distribution. |
15 | | * |
16 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
17 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
18 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
19 | | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR |
20 | | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
21 | | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
22 | | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
23 | | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
24 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
25 | | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "config.h" |
29 | | |
30 | | #include <limits.h> |
31 | | #include <stdlib.h> |
32 | | |
33 | | #include "dav1d/common.h" |
34 | | |
35 | | #include "common/intops.h" |
36 | | |
37 | | #include "src/env.h" |
38 | | #include "src/mem.h" |
39 | | #include "src/refmvs.h" |
40 | | |
41 | | static void add_spatial_candidate(refmvs_candidate *const mvstack, int *const cnt, |
42 | | const int weight, const refmvs_block *const b, |
43 | | const union refmvs_refpair ref, const mv gmv[2], |
44 | | int *const have_newmv_match, |
45 | | int *const have_refmv_match) |
46 | 0 | { |
47 | 0 | if (b->mv.mv[0].n == INVALID_MV) return; // intra block, no intrabc |
48 | | |
49 | 0 | if (ref.ref[1] == -1) { |
50 | 0 | for (int n = 0; n < 2; n++) { |
51 | 0 | if (b->ref.ref[n] == ref.ref[0]) { |
52 | 0 | const mv cand_mv = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? |
53 | 0 | gmv[0] : b->mv.mv[n]; |
54 | |
|
55 | 0 | *have_refmv_match = 1; |
56 | 0 | *have_newmv_match |= b->mf >> 1; |
57 | |
|
58 | 0 | const int last = *cnt; |
59 | 0 | for (int m = 0; m < last; m++) |
60 | 0 | if (mvstack[m].mv.mv[0].n == cand_mv.n) { |
61 | 0 | mvstack[m].weight += weight; |
62 | 0 | return; |
63 | 0 | } |
64 | | |
65 | 0 | if (last < 8) { |
66 | 0 | mvstack[last].mv.mv[0] = cand_mv; |
67 | 0 | mvstack[last].weight = weight; |
68 | 0 | *cnt = last + 1; |
69 | 0 | } |
70 | 0 | return; |
71 | 0 | } |
72 | 0 | } |
73 | 0 | } else if (b->ref.pair == ref.pair) { |
74 | 0 | const refmvs_mvpair cand_mv = { .mv = { |
75 | 0 | [0] = ((b->mf & 1) && gmv[0].n != INVALID_MV) ? gmv[0] : b->mv.mv[0], |
76 | 0 | [1] = ((b->mf & 1) && gmv[1].n != INVALID_MV) ? gmv[1] : b->mv.mv[1], |
77 | 0 | }}; |
78 | |
|
79 | 0 | *have_refmv_match = 1; |
80 | 0 | *have_newmv_match |= b->mf >> 1; |
81 | |
|
82 | 0 | const int last = *cnt; |
83 | 0 | for (int n = 0; n < last; n++) |
84 | 0 | if (mvstack[n].mv.n == cand_mv.n) { |
85 | 0 | mvstack[n].weight += weight; |
86 | 0 | return; |
87 | 0 | } |
88 | | |
89 | 0 | if (last < 8) { |
90 | 0 | mvstack[last].mv = cand_mv; |
91 | 0 | mvstack[last].weight = weight; |
92 | 0 | *cnt = last + 1; |
93 | 0 | } |
94 | 0 | } |
95 | 0 | } |
96 | | |
97 | | static int scan_row(refmvs_candidate *const mvstack, int *const cnt, |
98 | | const union refmvs_refpair ref, const mv gmv[2], |
99 | | const refmvs_block *b, const int bw4, const int w4, |
100 | | const int max_rows, const int step, |
101 | | int *const have_newmv_match, int *const have_refmv_match) |
102 | 0 | { |
103 | 0 | const refmvs_block *cand_b = b; |
104 | 0 | const enum BlockSize first_cand_bs = cand_b->bs; |
105 | 0 | const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs]; |
106 | 0 | int cand_bw4 = first_cand_b_dim[0]; |
107 | 0 | int len = imax(step, imin(bw4, cand_bw4)); |
108 | |
|
109 | 0 | if (bw4 <= cand_bw4) { |
110 | | // FIXME weight can be higher for odd blocks (bx4 & 1), but then the |
111 | | // position of the first block has to be odd already, i.e. not just |
112 | | // for row_offset=-3/-5 |
113 | | // FIXME why can this not be cand_bw4? |
114 | 0 | const int weight = bw4 == 1 ? 2 : |
115 | 0 | imax(2, imin(2 * max_rows, first_cand_b_dim[1])); |
116 | 0 | add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv, |
117 | 0 | have_newmv_match, have_refmv_match); |
118 | 0 | return weight >> 1; |
119 | 0 | } |
120 | | |
121 | 0 | for (int x = 0;;) { |
122 | | // FIXME if we overhang above, we could fill a bitmask so we don't have |
123 | | // to repeat the add_spatial_candidate() for the next row, but just increase |
124 | | // the weight here |
125 | 0 | add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv, |
126 | 0 | have_newmv_match, have_refmv_match); |
127 | 0 | x += len; |
128 | 0 | if (x >= w4) return 1; |
129 | 0 | cand_b = &b[x]; |
130 | 0 | cand_bw4 = dav1d_block_dimensions[cand_b->bs][0]; |
131 | 0 | assert(cand_bw4 < bw4); |
132 | 0 | len = imax(step, cand_bw4); |
133 | 0 | } |
134 | 0 | } |
135 | | |
136 | | static int scan_col(refmvs_candidate *const mvstack, int *const cnt, |
137 | | const union refmvs_refpair ref, const mv gmv[2], |
138 | | /*const*/ refmvs_block *const *b, const int bh4, const int h4, |
139 | | const int bx4, const int max_cols, const int step, |
140 | | int *const have_newmv_match, int *const have_refmv_match) |
141 | 0 | { |
142 | 0 | const refmvs_block *cand_b = &b[0][bx4]; |
143 | 0 | const enum BlockSize first_cand_bs = cand_b->bs; |
144 | 0 | const uint8_t *const first_cand_b_dim = dav1d_block_dimensions[first_cand_bs]; |
145 | 0 | int cand_bh4 = first_cand_b_dim[1]; |
146 | 0 | int len = imax(step, imin(bh4, cand_bh4)); |
147 | |
|
148 | 0 | if (bh4 <= cand_bh4) { |
149 | | // FIXME weight can be higher for odd blocks (by4 & 1), but then the |
150 | | // position of the first block has to be odd already, i.e. not just |
151 | | // for col_offset=-3/-5 |
152 | | // FIXME why can this not be cand_bh4? |
153 | 0 | const int weight = bh4 == 1 ? 2 : |
154 | 0 | imax(2, imin(2 * max_cols, first_cand_b_dim[0])); |
155 | 0 | add_spatial_candidate(mvstack, cnt, len * weight, cand_b, ref, gmv, |
156 | 0 | have_newmv_match, have_refmv_match); |
157 | 0 | return weight >> 1; |
158 | 0 | } |
159 | | |
160 | 0 | for (int y = 0;;) { |
161 | | // FIXME if we overhang above, we could fill a bitmask so we don't have |
162 | | // to repeat the add_spatial_candidate() for the next row, but just increase |
163 | | // the weight here |
164 | 0 | add_spatial_candidate(mvstack, cnt, len * 2, cand_b, ref, gmv, |
165 | 0 | have_newmv_match, have_refmv_match); |
166 | 0 | y += len; |
167 | 0 | if (y >= h4) return 1; |
168 | 0 | cand_b = &b[y][bx4]; |
169 | 0 | cand_bh4 = dav1d_block_dimensions[cand_b->bs][1]; |
170 | 0 | assert(cand_bh4 < bh4); |
171 | 0 | len = imax(step, cand_bh4); |
172 | 0 | } |
173 | 0 | } |
174 | | |
175 | 0 | static inline union mv mv_projection(const union mv mv, const int num, const int den) { |
176 | 0 | static const uint16_t div_mult[32] = { |
177 | 0 | 0, 16384, 8192, 5461, 4096, 3276, 2730, 2340, |
178 | 0 | 2048, 1820, 1638, 1489, 1365, 1260, 1170, 1092, |
179 | 0 | 1024, 963, 910, 862, 819, 780, 744, 712, |
180 | 0 | 682, 655, 630, 606, 585, 564, 546, 528 |
181 | 0 | }; |
182 | 0 | assert(den > 0 && den < 32); |
183 | 0 | assert(num > -32 && num < 32); |
184 | 0 | const int frac = num * div_mult[den]; |
185 | 0 | const int y = mv.y * frac, x = mv.x * frac; |
186 | | // Round and clip according to AV1 spec section 7.9.3 |
187 | 0 | return (union mv) { // 0x3fff == (1 << 14) - 1 |
188 | 0 | .y = iclip((y + 8192 + (y >> 31)) >> 14, -0x3fff, 0x3fff), |
189 | 0 | .x = iclip((x + 8192 + (x >> 31)) >> 14, -0x3fff, 0x3fff) |
190 | 0 | }; |
191 | 0 | } |
192 | | |
193 | | static void add_temporal_candidate(const refmvs_frame *const rf, |
194 | | refmvs_candidate *const mvstack, int *const cnt, |
195 | | const refmvs_temporal_block *const rb, |
196 | | const union refmvs_refpair ref, int *const globalmv_ctx, |
197 | | const union mv gmv[]) |
198 | 0 | { |
199 | 0 | if (rb->mv.n == INVALID_MV) return; |
200 | | |
201 | 0 | union mv mv = mv_projection(rb->mv, rf->pocdiff[ref.ref[0] - 1], rb->ref); |
202 | 0 | fix_mv_precision(rf->frm_hdr, &mv); |
203 | |
|
204 | 0 | const int last = *cnt; |
205 | 0 | if (ref.ref[1] == -1) { |
206 | 0 | if (globalmv_ctx) |
207 | 0 | *globalmv_ctx = (abs(mv.x - gmv[0].x) | abs(mv.y - gmv[0].y)) >= 16; |
208 | |
|
209 | 0 | for (int n = 0; n < last; n++) |
210 | 0 | if (mvstack[n].mv.mv[0].n == mv.n) { |
211 | 0 | mvstack[n].weight += 2; |
212 | 0 | return; |
213 | 0 | } |
214 | 0 | if (last < 8) { |
215 | 0 | mvstack[last].mv.mv[0] = mv; |
216 | 0 | mvstack[last].weight = 2; |
217 | 0 | *cnt = last + 1; |
218 | 0 | } |
219 | 0 | } else { |
220 | 0 | refmvs_mvpair mvp = { .mv = { |
221 | 0 | [0] = mv, |
222 | 0 | [1] = mv_projection(rb->mv, rf->pocdiff[ref.ref[1] - 1], rb->ref), |
223 | 0 | }}; |
224 | 0 | fix_mv_precision(rf->frm_hdr, &mvp.mv[1]); |
225 | |
|
226 | 0 | for (int n = 0; n < last; n++) |
227 | 0 | if (mvstack[n].mv.n == mvp.n) { |
228 | 0 | mvstack[n].weight += 2; |
229 | 0 | return; |
230 | 0 | } |
231 | 0 | if (last < 8) { |
232 | 0 | mvstack[last].mv = mvp; |
233 | 0 | mvstack[last].weight = 2; |
234 | 0 | *cnt = last + 1; |
235 | 0 | } |
236 | 0 | } |
237 | 0 | } |
238 | | |
239 | | static void add_compound_extended_candidate(refmvs_candidate *const same, |
240 | | int *const same_count, |
241 | | const refmvs_block *const cand_b, |
242 | | const int sign0, const int sign1, |
243 | | const union refmvs_refpair ref, |
244 | | const uint8_t *const sign_bias) |
245 | 0 | { |
246 | 0 | refmvs_candidate *const diff = &same[2]; |
247 | 0 | int *const diff_count = &same_count[2]; |
248 | |
|
249 | 0 | for (int n = 0; n < 2; n++) { |
250 | 0 | const int cand_ref = cand_b->ref.ref[n]; |
251 | |
|
252 | 0 | if (cand_ref <= 0) break; |
253 | | |
254 | 0 | mv cand_mv = cand_b->mv.mv[n]; |
255 | 0 | if (cand_ref == ref.ref[0]) { |
256 | 0 | if (same_count[0] < 2) |
257 | 0 | same[same_count[0]++].mv.mv[0] = cand_mv; |
258 | 0 | if (diff_count[1] < 2) { |
259 | 0 | if (sign1 ^ sign_bias[cand_ref - 1]) { |
260 | 0 | cand_mv.y = -cand_mv.y; |
261 | 0 | cand_mv.x = -cand_mv.x; |
262 | 0 | } |
263 | 0 | diff[diff_count[1]++].mv.mv[1] = cand_mv; |
264 | 0 | } |
265 | 0 | } else if (cand_ref == ref.ref[1]) { |
266 | 0 | if (same_count[1] < 2) |
267 | 0 | same[same_count[1]++].mv.mv[1] = cand_mv; |
268 | 0 | if (diff_count[0] < 2) { |
269 | 0 | if (sign0 ^ sign_bias[cand_ref - 1]) { |
270 | 0 | cand_mv.y = -cand_mv.y; |
271 | 0 | cand_mv.x = -cand_mv.x; |
272 | 0 | } |
273 | 0 | diff[diff_count[0]++].mv.mv[0] = cand_mv; |
274 | 0 | } |
275 | 0 | } else { |
276 | 0 | mv i_cand_mv = (union mv) { |
277 | 0 | .x = -cand_mv.x, |
278 | 0 | .y = -cand_mv.y |
279 | 0 | }; |
280 | |
|
281 | 0 | if (diff_count[0] < 2) { |
282 | 0 | diff[diff_count[0]++].mv.mv[0] = |
283 | 0 | sign0 ^ sign_bias[cand_ref - 1] ? |
284 | 0 | i_cand_mv : cand_mv; |
285 | 0 | } |
286 | |
|
287 | 0 | if (diff_count[1] < 2) { |
288 | 0 | diff[diff_count[1]++].mv.mv[1] = |
289 | 0 | sign1 ^ sign_bias[cand_ref - 1] ? |
290 | 0 | i_cand_mv : cand_mv; |
291 | 0 | } |
292 | 0 | } |
293 | 0 | } |
294 | 0 | } |
295 | | |
296 | | static void add_single_extended_candidate(refmvs_candidate mvstack[8], int *const cnt, |
297 | | const refmvs_block *const cand_b, |
298 | | const int sign, const uint8_t *const sign_bias) |
299 | 0 | { |
300 | 0 | for (int n = 0; n < 2; n++) { |
301 | 0 | const int cand_ref = cand_b->ref.ref[n]; |
302 | |
|
303 | 0 | if (cand_ref <= 0) break; |
304 | | // we need to continue even if cand_ref == ref.ref[0], since |
305 | | // the candidate could have been added as a globalmv variant, |
306 | | // which changes the value |
307 | | // FIXME if scan_{row,col}() returned a mask for the nearest |
308 | | // edge, we could skip the appropriate ones here |
309 | | |
310 | 0 | mv cand_mv = cand_b->mv.mv[n]; |
311 | 0 | if (sign ^ sign_bias[cand_ref - 1]) { |
312 | 0 | cand_mv.y = -cand_mv.y; |
313 | 0 | cand_mv.x = -cand_mv.x; |
314 | 0 | } |
315 | |
|
316 | 0 | int m; |
317 | 0 | const int last = *cnt; |
318 | 0 | for (m = 0; m < last; m++) |
319 | 0 | if (cand_mv.n == mvstack[m].mv.mv[0].n) |
320 | 0 | break; |
321 | 0 | if (m == last) { |
322 | 0 | mvstack[m].mv.mv[0] = cand_mv; |
323 | 0 | mvstack[m].weight = 2; // "minimal" |
324 | 0 | *cnt = last + 1; |
325 | 0 | } |
326 | 0 | } |
327 | 0 | } |
328 | | |
329 | | /* |
330 | | * refmvs_frame allocates memory for one sbrow (32 blocks high, whole frame |
331 | | * wide) of 4x4-resolution refmvs_block entries for spatial MV referencing. |
332 | | * mvrefs_tile[] keeps a list of 35 (32 + 3 above) pointers into this memory, |
333 | | * and each sbrow, the bottom entries (y=27/29/31) are exchanged with the top |
334 | | * (-5/-3/-1) pointers by calling dav1d_refmvs_tile_sbrow_init() at the start |
335 | | * of each tile/sbrow. |
336 | | * |
337 | | * For temporal MV referencing, we call dav1d_refmvs_save_tmvs() at the end of |
338 | | * each tile/sbrow (when tile column threading is enabled), or at the start of |
339 | | * each interleaved sbrow (i.e. once for all tile columns together, when tile |
340 | | * column threading is disabled). This will copy the 4x4-resolution spatial MVs |
341 | | * into 8x8-resolution refmvs_temporal_block structures. Then, for subsequent |
342 | | * frames, at the start of each tile/sbrow (when tile column threading is |
343 | | * enabled) or at the start of each interleaved sbrow (when tile column |
344 | | * threading is disabled), we call load_tmvs(), which will project the MVs to |
345 | | * their respective position in the current frame. |
346 | | */ |
347 | | |
348 | | void dav1d_refmvs_find(const refmvs_tile *const rt, |
349 | | refmvs_candidate mvstack[8], int *const cnt, |
350 | | int *const ctx, |
351 | | const union refmvs_refpair ref, const enum BlockSize bs, |
352 | | const enum EdgeFlags edge_flags, |
353 | | const int by4, const int bx4) |
354 | 0 | { |
355 | 0 | const refmvs_frame *const rf = rt->rf; |
356 | 0 | const uint8_t *const b_dim = dav1d_block_dimensions[bs]; |
357 | 0 | const int bw4 = b_dim[0], w4 = imin(imin(bw4, 16), rt->tile_col.end - bx4); |
358 | 0 | const int bh4 = b_dim[1], h4 = imin(imin(bh4, 16), rt->tile_row.end - by4); |
359 | 0 | mv gmv[2], tgmv[2]; |
360 | |
|
361 | 0 | *cnt = 0; |
362 | 0 | assert(ref.ref[0] >= 0 && ref.ref[0] <= 8 && |
363 | 0 | ref.ref[1] >= -1 && ref.ref[1] <= 8); |
364 | 0 | if (ref.ref[0] > 0) { |
365 | 0 | tgmv[0] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[0] - 1], |
366 | 0 | bx4, by4, bw4, bh4, rf->frm_hdr); |
367 | 0 | gmv[0] = rf->frm_hdr->gmv[ref.ref[0] - 1].type > DAV1D_WM_TYPE_TRANSLATION ? |
368 | 0 | tgmv[0] : (mv) { .n = INVALID_MV }; |
369 | 0 | } else { |
370 | 0 | tgmv[0] = (mv) { .n = 0 }; |
371 | 0 | gmv[0] = (mv) { .n = INVALID_MV }; |
372 | 0 | } |
373 | 0 | if (ref.ref[1] > 0) { |
374 | 0 | tgmv[1] = get_gmv_2d(&rf->frm_hdr->gmv[ref.ref[1] - 1], |
375 | 0 | bx4, by4, bw4, bh4, rf->frm_hdr); |
376 | 0 | gmv[1] = rf->frm_hdr->gmv[ref.ref[1] - 1].type > DAV1D_WM_TYPE_TRANSLATION ? |
377 | 0 | tgmv[1] : (mv) { .n = INVALID_MV }; |
378 | 0 | } |
379 | | |
380 | | // top |
381 | 0 | int have_newmv = 0, have_col_mvs = 0, have_row_mvs = 0; |
382 | 0 | unsigned max_rows = 0, n_rows = ~0; |
383 | 0 | const refmvs_block *b_top; |
384 | 0 | if (by4 > rt->tile_row.start) { |
385 | 0 | max_rows = imin((by4 - rt->tile_row.start + 1) >> 1, 2 + (bh4 > 1)); |
386 | 0 | b_top = &rt->r[(by4 & 31) - 1 + 5][bx4]; |
387 | 0 | n_rows = scan_row(mvstack, cnt, ref, gmv, b_top, |
388 | 0 | bw4, w4, max_rows, bw4 >= 16 ? 4 : 1, |
389 | 0 | &have_newmv, &have_row_mvs); |
390 | 0 | } |
391 | | |
392 | | // left |
393 | 0 | unsigned max_cols = 0, n_cols = ~0U; |
394 | 0 | refmvs_block *const *b_left; |
395 | 0 | if (bx4 > rt->tile_col.start) { |
396 | 0 | max_cols = imin((bx4 - rt->tile_col.start + 1) >> 1, 2 + (bw4 > 1)); |
397 | 0 | b_left = &rt->r[(by4 & 31) + 5]; |
398 | 0 | n_cols = scan_col(mvstack, cnt, ref, gmv, b_left, |
399 | 0 | bh4, h4, bx4 - 1, max_cols, bh4 >= 16 ? 4 : 1, |
400 | 0 | &have_newmv, &have_col_mvs); |
401 | 0 | } |
402 | | |
403 | | // top/right |
404 | 0 | if (n_rows != ~0U && edge_flags & EDGE_I444_TOP_HAS_RIGHT && |
405 | 0 | imax(bw4, bh4) <= 16 && bw4 + bx4 < rt->tile_col.end) |
406 | 0 | { |
407 | 0 | add_spatial_candidate(mvstack, cnt, 4, &b_top[bw4], ref, gmv, |
408 | 0 | &have_newmv, &have_row_mvs); |
409 | 0 | } |
410 | |
|
411 | 0 | const int nearest_match = have_col_mvs + have_row_mvs; |
412 | 0 | const int nearest_cnt = *cnt; |
413 | 0 | for (int n = 0; n < nearest_cnt; n++) |
414 | 0 | mvstack[n].weight += 640; |
415 | | |
416 | | // temporal |
417 | 0 | int globalmv_ctx = rf->frm_hdr->use_ref_frame_mvs; |
418 | 0 | if (rf->use_ref_frame_mvs) { |
419 | 0 | const ptrdiff_t stride = rf->rp_stride; |
420 | 0 | const int by8 = by4 >> 1, bx8 = bx4 >> 1; |
421 | 0 | const refmvs_temporal_block *const rbi = &rt->rp_proj[(by8 & 15) * stride + bx8]; |
422 | 0 | const refmvs_temporal_block *rb = rbi; |
423 | 0 | const int step_h = bw4 >= 16 ? 2 : 1, step_v = bh4 >= 16 ? 2 : 1; |
424 | 0 | const int w8 = imin((w4 + 1) >> 1, 8), h8 = imin((h4 + 1) >> 1, 8); |
425 | 0 | for (int y = 0; y < h8; y += step_v) { |
426 | 0 | for (int x = 0; x < w8; x+= step_h) { |
427 | 0 | add_temporal_candidate(rf, mvstack, cnt, &rb[x], ref, |
428 | 0 | !(x | y) ? &globalmv_ctx : NULL, tgmv); |
429 | 0 | } |
430 | 0 | rb += stride * step_v; |
431 | 0 | } |
432 | 0 | if (imin(bw4, bh4) >= 2 && imax(bw4, bh4) < 16) { |
433 | 0 | const int bh8 = bh4 >> 1, bw8 = bw4 >> 1; |
434 | 0 | rb = &rbi[bh8 * stride]; |
435 | 0 | const int has_bottom = by8 + bh8 < imin(rt->tile_row.end >> 1, |
436 | 0 | (by8 & ~7) + 8); |
437 | 0 | if (has_bottom && bx8 - 1 >= imax(rt->tile_col.start >> 1, bx8 & ~7)) { |
438 | 0 | add_temporal_candidate(rf, mvstack, cnt, &rb[-1], ref, |
439 | 0 | NULL, NULL); |
440 | 0 | } |
441 | 0 | if (bx8 + bw8 < imin(rt->tile_col.end >> 1, (bx8 & ~7) + 8)) { |
442 | 0 | if (has_bottom) { |
443 | 0 | add_temporal_candidate(rf, mvstack, cnt, &rb[bw8], ref, |
444 | 0 | NULL, NULL); |
445 | 0 | } |
446 | 0 | if (by8 + bh8 - 1 < imin(rt->tile_row.end >> 1, (by8 & ~7) + 8)) { |
447 | 0 | add_temporal_candidate(rf, mvstack, cnt, &rb[bw8 - stride], |
448 | 0 | ref, NULL, NULL); |
449 | 0 | } |
450 | 0 | } |
451 | 0 | } |
452 | 0 | } |
453 | 0 | assert(*cnt <= 8); |
454 | | |
455 | | // top/left (which, confusingly, is part of "secondary" references) |
456 | 0 | int have_dummy_newmv_match; |
457 | 0 | if ((n_rows | n_cols) != ~0U) { |
458 | 0 | add_spatial_candidate(mvstack, cnt, 4, &b_top[-1], ref, gmv, |
459 | 0 | &have_dummy_newmv_match, &have_row_mvs); |
460 | 0 | } |
461 | | |
462 | | // "secondary" (non-direct neighbour) top & left edges |
463 | | // what is different about secondary is that everything is now in 8x8 resolution |
464 | 0 | for (int n = 2; n <= 3; n++) { |
465 | 0 | if ((unsigned) n > n_rows && (unsigned) n <= max_rows) { |
466 | 0 | n_rows += scan_row(mvstack, cnt, ref, gmv, |
467 | 0 | &rt->r[(((by4 & 31) - 2 * n + 1) | 1) + 5][bx4 | 1], |
468 | 0 | bw4, w4, 1 + max_rows - n, bw4 >= 16 ? 4 : 2, |
469 | 0 | &have_dummy_newmv_match, &have_row_mvs); |
470 | 0 | } |
471 | |
|
472 | 0 | if ((unsigned) n > n_cols && (unsigned) n <= max_cols) { |
473 | 0 | n_cols += scan_col(mvstack, cnt, ref, gmv, &rt->r[((by4 & 31) | 1) + 5], |
474 | 0 | bh4, h4, (bx4 - n * 2 + 1) | 1, |
475 | 0 | 1 + max_cols - n, bh4 >= 16 ? 4 : 2, |
476 | 0 | &have_dummy_newmv_match, &have_col_mvs); |
477 | 0 | } |
478 | 0 | } |
479 | 0 | assert(*cnt <= 8); |
480 | | |
481 | 0 | const int ref_match_count = have_col_mvs + have_row_mvs; |
482 | | |
483 | | // context build-up |
484 | 0 | int refmv_ctx, newmv_ctx; |
485 | 0 | switch (nearest_match) { |
486 | 0 | case 0: |
487 | 0 | refmv_ctx = imin(2, ref_match_count); |
488 | 0 | newmv_ctx = ref_match_count > 0; |
489 | 0 | break; |
490 | 0 | case 1: |
491 | 0 | refmv_ctx = imin(ref_match_count * 3, 4); |
492 | 0 | newmv_ctx = 3 - have_newmv; |
493 | 0 | break; |
494 | 0 | case 2: |
495 | 0 | refmv_ctx = 5; |
496 | 0 | newmv_ctx = 5 - have_newmv; |
497 | 0 | break; |
498 | 0 | } |
499 | | |
500 | | // sorting (nearest, then "secondary") |
501 | 0 | int len = nearest_cnt; |
502 | 0 | while (len) { |
503 | 0 | int last = 0; |
504 | 0 | for (int n = 1; n < len; n++) { |
505 | 0 | if (mvstack[n - 1].weight < mvstack[n].weight) { |
506 | 0 | #define EXCHANGE(a, b) do { refmvs_candidate tmp = a; a = b; b = tmp; } while (0) |
507 | 0 | EXCHANGE(mvstack[n - 1], mvstack[n]); |
508 | 0 | last = n; |
509 | 0 | } |
510 | 0 | } |
511 | 0 | len = last; |
512 | 0 | } |
513 | 0 | len = *cnt; |
514 | 0 | while (len > nearest_cnt) { |
515 | 0 | int last = nearest_cnt; |
516 | 0 | for (int n = nearest_cnt + 1; n < len; n++) { |
517 | 0 | if (mvstack[n - 1].weight < mvstack[n].weight) { |
518 | 0 | EXCHANGE(mvstack[n - 1], mvstack[n]); |
519 | 0 | #undef EXCHANGE |
520 | 0 | last = n; |
521 | 0 | } |
522 | 0 | } |
523 | 0 | len = last; |
524 | 0 | } |
525 | |
|
526 | 0 | if (ref.ref[1] > 0) { |
527 | 0 | if (*cnt < 2) { |
528 | 0 | const int sign0 = rf->sign_bias[ref.ref[0] - 1]; |
529 | 0 | const int sign1 = rf->sign_bias[ref.ref[1] - 1]; |
530 | 0 | const int sz4 = imin(w4, h4); |
531 | 0 | refmvs_candidate *const same = &mvstack[*cnt]; |
532 | 0 | int same_count[4] = { 0 }; |
533 | | |
534 | | // non-self references in top |
535 | 0 | if (n_rows != ~0U) for (int x = 0; x < sz4;) { |
536 | 0 | const refmvs_block *const cand_b = &b_top[x]; |
537 | 0 | add_compound_extended_candidate(same, same_count, cand_b, |
538 | 0 | sign0, sign1, ref, rf->sign_bias); |
539 | 0 | x += dav1d_block_dimensions[cand_b->bs][0]; |
540 | 0 | } |
541 | | |
542 | | // non-self references in left |
543 | 0 | if (n_cols != ~0U) for (int y = 0; y < sz4;) { |
544 | 0 | const refmvs_block *const cand_b = &b_left[y][bx4 - 1]; |
545 | 0 | add_compound_extended_candidate(same, same_count, cand_b, |
546 | 0 | sign0, sign1, ref, rf->sign_bias); |
547 | 0 | y += dav1d_block_dimensions[cand_b->bs][1]; |
548 | 0 | } |
549 | |
|
550 | 0 | refmvs_candidate *const diff = &same[2]; |
551 | 0 | const int *const diff_count = &same_count[2]; |
552 | | |
553 | | // merge together |
554 | 0 | for (int n = 0; n < 2; n++) { |
555 | 0 | int m = same_count[n]; |
556 | |
|
557 | 0 | if (m >= 2) continue; |
558 | | |
559 | 0 | const int l = diff_count[n]; |
560 | 0 | if (l) { |
561 | 0 | same[m].mv.mv[n] = diff[0].mv.mv[n]; |
562 | 0 | if (++m == 2) continue; |
563 | 0 | if (l == 2) { |
564 | 0 | same[1].mv.mv[n] = diff[1].mv.mv[n]; |
565 | 0 | continue; |
566 | 0 | } |
567 | 0 | } |
568 | 0 | do { |
569 | 0 | same[m].mv.mv[n] = tgmv[n]; |
570 | 0 | } while (++m < 2); |
571 | 0 | } |
572 | | |
573 | | // if the first extended was the same as the non-extended one, |
574 | | // then replace it with the second extended one |
575 | 0 | int n = *cnt; |
576 | 0 | if (n == 1 && mvstack[0].mv.n == same[0].mv.n) |
577 | 0 | mvstack[1].mv = mvstack[2].mv; |
578 | 0 | do { |
579 | 0 | mvstack[n].weight = 2; |
580 | 0 | } while (++n < 2); |
581 | 0 | *cnt = 2; |
582 | 0 | } |
583 | | |
584 | | // clamping |
585 | 0 | const int left = -(bx4 + bw4 + 4) * 4 * 8; |
586 | 0 | const int right = (rf->iw4 - bx4 + 4) * 4 * 8; |
587 | 0 | const int top = -(by4 + bh4 + 4) * 4 * 8; |
588 | 0 | const int bottom = (rf->ih4 - by4 + 4) * 4 * 8; |
589 | |
|
590 | 0 | const int n_refmvs = *cnt; |
591 | 0 | int n = 0; |
592 | 0 | do { |
593 | 0 | mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right); |
594 | 0 | mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom); |
595 | 0 | mvstack[n].mv.mv[1].x = iclip(mvstack[n].mv.mv[1].x, left, right); |
596 | 0 | mvstack[n].mv.mv[1].y = iclip(mvstack[n].mv.mv[1].y, top, bottom); |
597 | 0 | } while (++n < n_refmvs); |
598 | |
|
599 | 0 | switch (refmv_ctx >> 1) { |
600 | 0 | case 0: |
601 | 0 | *ctx = imin(newmv_ctx, 1); |
602 | 0 | break; |
603 | 0 | case 1: |
604 | 0 | *ctx = 1 + imin(newmv_ctx, 3); |
605 | 0 | break; |
606 | 0 | case 2: |
607 | 0 | *ctx = iclip(3 + newmv_ctx, 4, 7); |
608 | 0 | break; |
609 | 0 | } |
610 | | |
611 | 0 | return; |
612 | 0 | } else if (*cnt < 2 && ref.ref[0] > 0) { |
613 | 0 | const int sign = rf->sign_bias[ref.ref[0] - 1]; |
614 | 0 | const int sz4 = imin(w4, h4); |
615 | | |
616 | | // non-self references in top |
617 | 0 | if (n_rows != ~0U) for (int x = 0; x < sz4 && *cnt < 2;) { |
618 | 0 | const refmvs_block *const cand_b = &b_top[x]; |
619 | 0 | add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias); |
620 | 0 | x += dav1d_block_dimensions[cand_b->bs][0]; |
621 | 0 | } |
622 | | |
623 | | // non-self references in left |
624 | 0 | if (n_cols != ~0U) for (int y = 0; y < sz4 && *cnt < 2;) { |
625 | 0 | const refmvs_block *const cand_b = &b_left[y][bx4 - 1]; |
626 | 0 | add_single_extended_candidate(mvstack, cnt, cand_b, sign, rf->sign_bias); |
627 | 0 | y += dav1d_block_dimensions[cand_b->bs][1]; |
628 | 0 | } |
629 | 0 | } |
630 | 0 | assert(*cnt <= 8); |
631 | | |
632 | | // clamping |
633 | 0 | int n_refmvs = *cnt; |
634 | 0 | if (n_refmvs) { |
635 | 0 | const int left = -(bx4 + bw4 + 4) * 4 * 8; |
636 | 0 | const int right = (rf->iw4 - bx4 + 4) * 4 * 8; |
637 | 0 | const int top = -(by4 + bh4 + 4) * 4 * 8; |
638 | 0 | const int bottom = (rf->ih4 - by4 + 4) * 4 * 8; |
639 | |
|
640 | 0 | int n = 0; |
641 | 0 | do { |
642 | 0 | mvstack[n].mv.mv[0].x = iclip(mvstack[n].mv.mv[0].x, left, right); |
643 | 0 | mvstack[n].mv.mv[0].y = iclip(mvstack[n].mv.mv[0].y, top, bottom); |
644 | 0 | } while (++n < n_refmvs); |
645 | 0 | } |
646 | |
|
647 | 0 | for (int n = *cnt; n < 2; n++) |
648 | 0 | mvstack[n].mv.mv[0] = tgmv[0]; |
649 | |
|
650 | 0 | *ctx = (refmv_ctx << 4) | (globalmv_ctx << 3) | newmv_ctx; |
651 | 0 | } |
652 | | |
653 | | void dav1d_refmvs_tile_sbrow_init(refmvs_tile *const rt, const refmvs_frame *const rf, |
654 | | const int tile_col_start4, const int tile_col_end4, |
655 | | const int tile_row_start4, const int tile_row_end4, |
656 | | const int sby, int tile_row_idx, const int pass) |
657 | 0 | { |
658 | 0 | if (rf->n_tile_threads == 1) tile_row_idx = 0; |
659 | 0 | rt->rp_proj = &rf->rp_proj[16 * rf->rp_stride * tile_row_idx]; |
660 | 0 | const ptrdiff_t r_stride = rf->rp_stride * 2; |
661 | 0 | const ptrdiff_t pass_off = (rf->n_frame_threads > 1 && pass == 2) ? |
662 | 0 | 35 * 2 * rf->n_blocks : 0; |
663 | 0 | refmvs_block *r = &rf->r[35 * r_stride * tile_row_idx + pass_off]; |
664 | 0 | const int sbsz = rf->sbsz; |
665 | 0 | const int off = (sbsz * sby) & 16; |
666 | 0 | for (int i = 0; i < sbsz; i++, r += r_stride) |
667 | 0 | rt->r[off + 5 + i] = r; |
668 | 0 | rt->r[off + 0] = r; |
669 | 0 | r += r_stride; |
670 | 0 | rt->r[off + 1] = NULL; |
671 | 0 | rt->r[off + 2] = r; |
672 | 0 | r += r_stride; |
673 | 0 | rt->r[off + 3] = NULL; |
674 | 0 | rt->r[off + 4] = r; |
675 | 0 | if (sby & 1) { |
676 | 0 | #define EXCHANGE(a, b) do { void *const tmp = a; a = b; b = tmp; } while (0) |
677 | 0 | EXCHANGE(rt->r[off + 0], rt->r[off + sbsz + 0]); |
678 | 0 | EXCHANGE(rt->r[off + 2], rt->r[off + sbsz + 2]); |
679 | 0 | EXCHANGE(rt->r[off + 4], rt->r[off + sbsz + 4]); |
680 | 0 | #undef EXCHANGE |
681 | 0 | } |
682 | |
|
683 | 0 | rt->rf = rf; |
684 | 0 | rt->tile_row.start = tile_row_start4; |
685 | 0 | rt->tile_row.end = imin(tile_row_end4, rf->ih4); |
686 | 0 | rt->tile_col.start = tile_col_start4; |
687 | 0 | rt->tile_col.end = imin(tile_col_end4, rf->iw4); |
688 | 0 | } |
689 | | |
690 | | static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx, |
691 | | const int col_start8, const int col_end8, |
692 | | const int row_start8, int row_end8) |
693 | 0 | { |
694 | 0 | if (rf->n_tile_threads == 1) tile_row_idx = 0; |
695 | 0 | assert(row_start8 >= 0); |
696 | 0 | assert((unsigned) (row_end8 - row_start8) <= 16U); |
697 | 0 | row_end8 = imin(row_end8, rf->ih8); |
698 | 0 | const int col_start8i = imax(col_start8 - 8, 0); |
699 | 0 | const int col_end8i = imin(col_end8 + 8, rf->iw8); |
700 | |
|
701 | 0 | const ptrdiff_t stride = rf->rp_stride; |
702 | 0 | refmvs_temporal_block *rp_proj = |
703 | 0 | &rf->rp_proj[16 * stride * tile_row_idx + (row_start8 & 15) * stride]; |
704 | 0 | for (int y = row_start8; y < row_end8; y++) { |
705 | 0 | for (int x = col_start8; x < col_end8; x++) |
706 | 0 | rp_proj[x].mv.n = INVALID_MV; |
707 | 0 | rp_proj += stride; |
708 | 0 | } |
709 | |
|
710 | 0 | rp_proj = &rf->rp_proj[16 * stride * tile_row_idx]; |
711 | 0 | for (int n = 0; n < rf->n_mfmvs; n++) { |
712 | 0 | const int ref2cur = rf->mfmv_ref2cur[n]; |
713 | 0 | if (ref2cur == INVALID_REF2CUR) continue; |
714 | | |
715 | 0 | const int ref = rf->mfmv_ref[n]; |
716 | 0 | const int ref_sign = ref - 4; |
717 | 0 | const refmvs_temporal_block *r = &rf->rp_ref[ref][row_start8 * stride]; |
718 | 0 | for (int y = row_start8; y < row_end8; y++) { |
719 | 0 | const int y_sb_align = y & ~7; |
720 | 0 | const int y_proj_start = imax(y_sb_align, row_start8); |
721 | 0 | const int y_proj_end = imin(y_sb_align + 8, row_end8); |
722 | 0 | for (int x = col_start8i; x < col_end8i; x++) { |
723 | 0 | const refmvs_temporal_block *rb = &r[x]; |
724 | 0 | const int b_ref = rb->ref; |
725 | 0 | if (!b_ref) continue; |
726 | 0 | const int ref2ref = rf->mfmv_ref2ref[n][b_ref - 1]; |
727 | 0 | if (!ref2ref) continue; |
728 | 0 | const mv b_mv = rb->mv; |
729 | 0 | const mv offset = mv_projection(b_mv, ref2cur, ref2ref); |
730 | 0 | int pos_x = x + apply_sign(abs(offset.x) >> 6, |
731 | 0 | offset.x ^ ref_sign); |
732 | 0 | const int pos_y = y + apply_sign(abs(offset.y) >> 6, |
733 | 0 | offset.y ^ ref_sign); |
734 | 0 | if (pos_y >= y_proj_start && pos_y < y_proj_end) { |
735 | 0 | const ptrdiff_t pos = (pos_y & 15) * stride; |
736 | 0 | for (;;) { |
737 | 0 | const int x_sb_align = x & ~7; |
738 | 0 | if (pos_x >= imax(x_sb_align - 8, col_start8) && |
739 | 0 | pos_x < imin(x_sb_align + 16, col_end8)) |
740 | 0 | { |
741 | 0 | rp_proj[pos + pos_x].mv = rb->mv; |
742 | 0 | rp_proj[pos + pos_x].ref = ref2ref; |
743 | 0 | } |
744 | 0 | if (++x >= col_end8i) break; |
745 | 0 | rb++; |
746 | 0 | if (rb->ref != b_ref || rb->mv.n != b_mv.n) break; |
747 | 0 | pos_x++; |
748 | 0 | } |
749 | 0 | } else { |
750 | 0 | for (;;) { |
751 | 0 | if (++x >= col_end8i) break; |
752 | 0 | rb++; |
753 | 0 | if (rb->ref != b_ref || rb->mv.n != b_mv.n) break; |
754 | 0 | } |
755 | 0 | } |
756 | 0 | x--; |
757 | 0 | } |
758 | 0 | r += stride; |
759 | 0 | } |
760 | 0 | } |
761 | 0 | } |
762 | | |
763 | | static void save_tmvs_c(refmvs_temporal_block *rp, const ptrdiff_t stride, |
764 | | refmvs_block *const *const rr, |
765 | | const uint8_t *const ref_sign, |
766 | | const int col_end8, const int row_end8, |
767 | | const int col_start8, const int row_start8) |
768 | 0 | { |
769 | 0 | for (int y = row_start8; y < row_end8; y++) { |
770 | 0 | const refmvs_block *const b = rr[(y & 15) * 2]; |
771 | |
|
772 | 0 | for (int x = col_start8; x < col_end8;) { |
773 | 0 | const refmvs_block *const cand_b = &b[x * 2 + 1]; |
774 | 0 | const int bw8 = (dav1d_block_dimensions[cand_b->bs][0] + 1) >> 1; |
775 | |
|
776 | 0 | if (cand_b->ref.ref[1] > 0 && ref_sign[cand_b->ref.ref[1] - 1] && |
777 | 0 | (abs(cand_b->mv.mv[1].y) | abs(cand_b->mv.mv[1].x)) < 4096) |
778 | 0 | { |
779 | 0 | for (int n = 0; n < bw8; n++, x++) |
780 | 0 | rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[1], |
781 | 0 | .ref = cand_b->ref.ref[1] }; |
782 | 0 | } else if (cand_b->ref.ref[0] > 0 && ref_sign[cand_b->ref.ref[0] - 1] && |
783 | 0 | (abs(cand_b->mv.mv[0].y) | abs(cand_b->mv.mv[0].x)) < 4096) |
784 | 0 | { |
785 | 0 | for (int n = 0; n < bw8; n++, x++) |
786 | 0 | rp[x] = (refmvs_temporal_block) { .mv = cand_b->mv.mv[0], |
787 | 0 | .ref = cand_b->ref.ref[0] }; |
788 | 0 | } else { |
789 | 0 | for (int n = 0; n < bw8; n++, x++) { |
790 | 0 | rp[x].mv.n = 0; |
791 | 0 | rp[x].ref = 0; // "invalid" |
792 | 0 | } |
793 | 0 | } |
794 | 0 | } |
795 | 0 | rp += stride; |
796 | 0 | } |
797 | 0 | } |
798 | | |
799 | | int dav1d_refmvs_init_frame(refmvs_frame *const rf, |
800 | | const Dav1dSequenceHeader *const seq_hdr, |
801 | | const Dav1dFrameHeader *const frm_hdr, |
802 | | const uint8_t ref_poc[7], |
803 | | refmvs_temporal_block *const rp, |
804 | | const uint8_t ref_ref_poc[7][7], |
805 | | /*const*/ refmvs_temporal_block *const rp_ref[7], |
806 | | const int n_tile_threads, const int n_frame_threads) |
807 | 0 | { |
808 | 0 | const int rp_stride = ((frm_hdr->width[0] + 127) & ~127) >> 3; |
809 | 0 | const int n_tile_rows = n_tile_threads > 1 ? frm_hdr->tiling.rows : 1; |
810 | 0 | const int n_blocks = rp_stride * n_tile_rows; |
811 | |
|
812 | 0 | rf->sbsz = 16 << seq_hdr->sb128; |
813 | 0 | rf->frm_hdr = frm_hdr; |
814 | 0 | rf->iw8 = (frm_hdr->width[0] + 7) >> 3; |
815 | 0 | rf->ih8 = (frm_hdr->height + 7) >> 3; |
816 | 0 | rf->iw4 = rf->iw8 << 1; |
817 | 0 | rf->ih4 = rf->ih8 << 1; |
818 | 0 | rf->rp = rp; |
819 | 0 | rf->rp_stride = rp_stride; |
820 | 0 | rf->n_tile_threads = n_tile_threads; |
821 | 0 | rf->n_frame_threads = n_frame_threads; |
822 | |
|
823 | 0 | if (n_blocks != rf->n_blocks) { |
824 | 0 | const size_t r_sz = sizeof(*rf->r) * 35 * 2 * n_blocks * (1 + (n_frame_threads > 1)); |
825 | 0 | const size_t rp_proj_sz = sizeof(*rf->rp_proj) * 16 * n_blocks; |
826 | | /* Note that sizeof(*rf->r) == 12, but it's accessed using 16-byte unaligned |
827 | | * loads in save_tmvs() asm which can overread 4 bytes into rp_proj. */ |
828 | 0 | dav1d_free_aligned(rf->r); |
829 | 0 | rf->r = dav1d_alloc_aligned(ALLOC_REFMVS, r_sz + rp_proj_sz, 64); |
830 | 0 | if (!rf->r) { |
831 | 0 | rf->n_blocks = 0; |
832 | 0 | return DAV1D_ERR(ENOMEM); |
833 | 0 | } |
834 | | |
835 | 0 | rf->rp_proj = (refmvs_temporal_block*)((uintptr_t)rf->r + r_sz); |
836 | 0 | rf->n_blocks = n_blocks; |
837 | 0 | } |
838 | | |
839 | 0 | const int poc = frm_hdr->frame_offset; |
840 | 0 | for (int i = 0; i < 7; i++) { |
841 | 0 | const int poc_diff = get_poc_diff(seq_hdr->order_hint_n_bits, |
842 | 0 | ref_poc[i], poc); |
843 | 0 | rf->sign_bias[i] = poc_diff > 0; |
844 | 0 | rf->mfmv_sign[i] = poc_diff < 0; |
845 | 0 | rf->pocdiff[i] = iclip(get_poc_diff(seq_hdr->order_hint_n_bits, |
846 | 0 | poc, ref_poc[i]), -31, 31); |
847 | 0 | } |
848 | | |
849 | | // temporal MV setup |
850 | 0 | rf->n_mfmvs = 0; |
851 | 0 | rf->rp_ref = rp_ref; |
852 | 0 | if (frm_hdr->use_ref_frame_mvs && seq_hdr->order_hint_n_bits) { |
853 | 0 | int total = 2; |
854 | 0 | if (rp_ref[0] && ref_ref_poc[0][6] != ref_poc[3] /* alt-of-last != gold */) { |
855 | 0 | rf->mfmv_ref[rf->n_mfmvs++] = 0; // last |
856 | 0 | total = 3; |
857 | 0 | } |
858 | 0 | if (rp_ref[4] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[4], |
859 | 0 | frm_hdr->frame_offset) > 0) |
860 | 0 | { |
861 | 0 | rf->mfmv_ref[rf->n_mfmvs++] = 4; // bwd |
862 | 0 | } |
863 | 0 | if (rp_ref[5] && get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[5], |
864 | 0 | frm_hdr->frame_offset) > 0) |
865 | 0 | { |
866 | 0 | rf->mfmv_ref[rf->n_mfmvs++] = 5; // altref2 |
867 | 0 | } |
868 | 0 | if (rf->n_mfmvs < total && rp_ref[6] && |
869 | 0 | get_poc_diff(seq_hdr->order_hint_n_bits, ref_poc[6], |
870 | 0 | frm_hdr->frame_offset) > 0) |
871 | 0 | { |
872 | 0 | rf->mfmv_ref[rf->n_mfmvs++] = 6; // altref |
873 | 0 | } |
874 | 0 | if (rf->n_mfmvs < total && rp_ref[1]) |
875 | 0 | rf->mfmv_ref[rf->n_mfmvs++] = 1; // last2 |
876 | |
|
877 | 0 | for (int n = 0; n < rf->n_mfmvs; n++) { |
878 | 0 | const int rpoc = ref_poc[rf->mfmv_ref[n]]; |
879 | 0 | const int diff1 = get_poc_diff(seq_hdr->order_hint_n_bits, |
880 | 0 | rpoc, frm_hdr->frame_offset); |
881 | 0 | if (abs(diff1) > 31) { |
882 | 0 | rf->mfmv_ref2cur[n] = INVALID_REF2CUR; |
883 | 0 | } else { |
884 | 0 | rf->mfmv_ref2cur[n] = rf->mfmv_ref[n] < 4 ? -diff1 : diff1; |
885 | 0 | for (int m = 0; m < 7; m++) { |
886 | 0 | const int rrpoc = ref_ref_poc[rf->mfmv_ref[n]][m]; |
887 | 0 | const int diff2 = get_poc_diff(seq_hdr->order_hint_n_bits, |
888 | 0 | rpoc, rrpoc); |
889 | | // unsigned comparison also catches the < 0 case |
890 | 0 | rf->mfmv_ref2ref[n][m] = (unsigned) diff2 > 31U ? 0 : diff2; |
891 | 0 | } |
892 | 0 | } |
893 | 0 | } |
894 | 0 | } |
895 | 0 | rf->use_ref_frame_mvs = rf->n_mfmvs > 0; |
896 | |
|
897 | 0 | return 0; |
898 | 0 | } |
899 | | |
900 | | static void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv, |
901 | | const int bx4, const int bw4, int bh4) |
902 | 0 | { |
903 | 0 | do { |
904 | 0 | refmvs_block *const r = *rr++ + bx4; |
905 | 0 | for (int x = 0; x < bw4; x++) |
906 | 0 | r[x] = *rmv; |
907 | 0 | } while (--bh4); |
908 | 0 | } |
909 | | |
910 | | #if HAVE_ASM |
911 | | #if ARCH_AARCH64 || ARCH_ARM |
912 | | #include "src/arm/refmvs.h" |
913 | | #elif ARCH_LOONGARCH64 |
914 | | #include "src/loongarch/refmvs.h" |
915 | | #elif ARCH_X86 |
916 | | #include "src/x86/refmvs.h" |
917 | | #endif |
918 | | #endif |
919 | | |
920 | | COLD void dav1d_refmvs_dsp_init(Dav1dRefmvsDSPContext *const c) |
921 | 0 | { |
922 | 0 | c->load_tmvs = load_tmvs_c; |
923 | 0 | c->save_tmvs = save_tmvs_c; |
924 | 0 | c->splat_mv = splat_mv_c; |
925 | |
|
926 | | #if HAVE_ASM |
927 | | #if ARCH_AARCH64 || ARCH_ARM |
928 | | refmvs_dsp_init_arm(c); |
929 | | #elif ARCH_LOONGARCH64 |
930 | | refmvs_dsp_init_loongarch(c); |
931 | | #elif ARCH_X86 |
932 | | refmvs_dsp_init_x86(c); |
933 | | #endif |
934 | | #endif |
935 | 0 | } |