/work/svt-av1/Source/Lib/Codec/av1me.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include <limits.h> |
13 | | #include <math.h> |
14 | | #include <stdio.h> |
15 | | #include "av1me.h" |
16 | | #include "mcomp.h" |
17 | | #include "utility.h" |
18 | | #include "pcs.h" |
19 | | #include "sequence_control_set.h" |
20 | | #include "aom_dsp_rtcd.h" |
21 | | #include "md_process.h" |
22 | | #include "adaptive_mv_pred.h" |
23 | | |
24 | | AomVarianceFnPtr svt_aom_mefn_ptr[BLOCK_SIZES_ALL]; |
25 | | |
26 | 1 | void init_fn_ptr(void) { |
27 | 1 | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
28 | 1 | #define BFP0(w, h) \ |
29 | 22 | svt_aom_mefn_ptr[BLOCK_##w##X##h].sdf = svt_aom_sad##w##x##h; \ |
30 | 22 | svt_aom_mefn_ptr[BLOCK_##w##X##h].vf = svt_aom_variance##w##x##h; \ |
31 | 22 | svt_aom_mefn_ptr[BLOCK_##w##X##h].vf_hbd_10 = svt_aom_highbd_10_variance##w##x##h; \ |
32 | 22 | svt_aom_mefn_ptr[BLOCK_##w##X##h].svf = svt_aom_sub_pixel_variance##w##x##h; \ |
33 | 22 | svt_aom_mefn_ptr[BLOCK_##w##X##h].sdx4df = svt_aom_sad##w##x##h##x4d; |
34 | | #else |
35 | | #define BFP0(w, h) \ |
36 | | svt_aom_mefn_ptr[BLOCK_##w##X##h].sdf = svt_aom_sad##w##x##h; \ |
37 | | svt_aom_mefn_ptr[BLOCK_##w##X##h].vf = svt_aom_variance##w##x##h; \ |
38 | | svt_aom_mefn_ptr[BLOCK_##w##X##h].svf = svt_aom_sub_pixel_variance##w##x##h; \ |
39 | | svt_aom_mefn_ptr[BLOCK_##w##X##h].sdx4df = svt_aom_sad##w##x##h##x4d; |
40 | | #endif |
41 | 1 | BFP0(4, 16) |
42 | 1 | BFP0(16, 4) |
43 | 1 | BFP0(8, 32) |
44 | 1 | BFP0(32, 8) |
45 | 1 | BFP0(16, 64) |
46 | 1 | BFP0(64, 16) |
47 | 1 | BFP0(128, 128) |
48 | 1 | BFP0(128, 64) |
49 | 1 | BFP0(64, 128) |
50 | 1 | BFP0(32, 16) |
51 | 1 | BFP0(16, 32) |
52 | 1 | BFP0(64, 32) |
53 | 1 | BFP0(32, 64) |
54 | 1 | BFP0(32, 32) |
55 | 1 | BFP0(64, 64) |
56 | 1 | BFP0(16, 16) |
57 | 1 | BFP0(16, 8) |
58 | 1 | BFP0(8, 16) |
59 | 1 | BFP0(8, 8) |
60 | 1 | BFP0(8, 4) |
61 | 1 | BFP0(4, 8) |
62 | 1 | BFP0(4, 4) |
63 | 1 | #if CONFIG_ENABLE_OBMC |
64 | 1 | #define OBFP(w, h) \ |
65 | 22 | svt_aom_mefn_ptr[BLOCK_##w##X##h].osdf = svt_aom_obmc_sad##w##x##h; \ |
66 | 22 | svt_aom_mefn_ptr[BLOCK_##w##X##h].ovf = svt_aom_obmc_variance##w##x##h; \ |
67 | 22 | svt_aom_mefn_ptr[BLOCK_##w##X##h].osvf = svt_aom_obmc_sub_pixel_variance##w##x##h; |
68 | 1 | OBFP(128, 128) |
69 | 1 | OBFP(128, 64) |
70 | 1 | OBFP(64, 128) |
71 | 1 | OBFP(64, 64) |
72 | 1 | OBFP(64, 32) |
73 | 1 | OBFP(32, 64) |
74 | 1 | OBFP(32, 32) |
75 | 1 | OBFP(32, 16) |
76 | 1 | OBFP(16, 32) |
77 | 1 | OBFP(16, 16) |
78 | 1 | OBFP(16, 8) |
79 | 1 | OBFP(8, 16) |
80 | 1 | OBFP(8, 8) |
81 | 1 | OBFP(4, 8) |
82 | 1 | OBFP(8, 4) |
83 | 1 | OBFP(4, 4) |
84 | 1 | OBFP(4, 16) |
85 | 1 | OBFP(16, 4) |
86 | 1 | OBFP(8, 32) |
87 | 1 | OBFP(32, 8) |
88 | 1 | OBFP(16, 64) |
89 | 1 | OBFP(64, 16) |
90 | 1 | #endif |
91 | 1 | } |
92 | | |
93 | 0 | static INLINE const uint8_t* get_buf_from_mv(const Buf2D* buf, const Mv* mv) { |
94 | 0 | return &buf->buf[mv->y * buf->stride + mv->x]; |
95 | 0 | } |
96 | | |
97 | 0 | void svt_av1_set_mv_search_range(MvLimits* mv_limits, const Mv* mv) { |
98 | 0 | int col_min = (mv->x >> 3) - MAX_FULL_PEL_VAL + !!(mv->x & 7); |
99 | 0 | int row_min = (mv->y >> 3) - MAX_FULL_PEL_VAL + !!(mv->y & 7); |
100 | 0 | int col_max = (mv->x >> 3) + MAX_FULL_PEL_VAL; |
101 | 0 | int row_max = (mv->y >> 3) + MAX_FULL_PEL_VAL; |
102 | |
|
103 | 0 | col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1); |
104 | 0 | row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1); |
105 | 0 | col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1); |
106 | 0 | row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1); |
107 | | |
108 | | // Get intersection of UMV window and valid MV window to reduce # of checks |
109 | | // in diamond search. |
110 | 0 | if (mv_limits->col_min < col_min) { |
111 | 0 | mv_limits->col_min = col_min; |
112 | 0 | } |
113 | 0 | if (mv_limits->col_max > col_max) { |
114 | 0 | mv_limits->col_max = col_max; |
115 | 0 | } |
116 | 0 | if (mv_limits->row_min < row_min) { |
117 | 0 | mv_limits->row_min = row_min; |
118 | 0 | } |
119 | 0 | if (mv_limits->row_max > row_max) { |
120 | 0 | mv_limits->row_max = row_max; |
121 | 0 | } |
122 | 0 | } |
123 | | |
124 | | #define PIXEL_TRANSFORM_ERROR_SCALE 4 |
125 | | |
126 | 0 | int svt_aom_mv_err_cost_light(const Mv* mv, const Mv* ref) { |
127 | 0 | const uint32_t factor = 50; |
128 | 0 | const uint32_t absmvdiffx = ABS(mv->x - ref->x); |
129 | 0 | const uint32_t absmvdiffy = ABS(mv->y - ref->y); |
130 | 0 | const uint32_t mv_rate = 1296 + (factor * (absmvdiffx + absmvdiffy)); |
131 | 0 | return mv_rate; |
132 | 0 | } |
133 | | |
134 | 0 | static int mvsad_err_cost_light(const Mv* mv, const Mv* ref) { |
135 | 0 | const uint32_t factor = 50; |
136 | 0 | const uint32_t absmvdiffx = ABS(mv->x - ref->x) * 8; |
137 | 0 | const uint32_t absmvdiffy = ABS(mv->y - ref->y) * 8; |
138 | 0 | const uint32_t mv_rate = 1296 + (factor * (absmvdiffx + absmvdiffy)); |
139 | 0 | return mv_rate; |
140 | 0 | } |
141 | | |
142 | 0 | int svt_aom_mv_err_cost(const Mv* mv, const Mv* ref, const int* mvjcost, const int* mvcost[2], int error_per_bit) { |
143 | 0 | if (mvcost) { |
144 | 0 | const Mv diff = (Mv){{mv->x - ref->x, mv->y - ref->y}}; |
145 | 0 | return (int)ROUND_POWER_OF_TWO_64( |
146 | 0 | (int64_t)svt_mv_cost(&diff, mvjcost, mvcost) * error_per_bit, |
147 | 0 | RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT + PIXEL_TRANSFORM_ERROR_SCALE); |
148 | 0 | } |
149 | 0 | return 0; |
150 | 0 | } |
151 | | |
152 | 0 | static int mvsad_err_cost(const IntraBcContext* x, const Mv* mv, const Mv* ref, int sad_per_bit) { |
153 | 0 | if (x->approx_inter_rate) { |
154 | 0 | return mvsad_err_cost_light(mv, ref); |
155 | 0 | } |
156 | 0 | const Mv diff = (Mv){{(mv->x - ref->x) * 8, (mv->y - ref->y) * 8}}; |
157 | 0 | return ROUND_POWER_OF_TWO( |
158 | 0 | (unsigned)svt_mv_cost(&diff, x->nmv_vec_cost, (const int* const*)x->mv_cost_stack) * sad_per_bit, |
159 | 0 | AV1_PROB_COST_SHIFT); |
160 | 0 | } |
161 | | |
162 | 0 | void svt_av1_init3smotion_compensation(SearchSiteConfig* cfg, int stride) { |
163 | 0 | int len, ss_count = 1; |
164 | |
|
165 | 0 | cfg->ss[0].mv.as_int = 0; |
166 | 0 | cfg->ss[0].offset = 0; |
167 | |
|
168 | 0 | for (len = MAX_FIRST_STEP; len > 0; len /= 2) { |
169 | | // Generate offsets for 8 search sites per step. |
170 | 0 | const Mv ss_mvs[8] = {{{0, -len}}, |
171 | 0 | {{0, len}}, |
172 | 0 | {{-len, 0}}, |
173 | 0 | {{len, 0}}, |
174 | 0 | {{-len, -len}}, |
175 | 0 | {{len, -len}}, |
176 | 0 | {{-len, len}}, |
177 | 0 | {{len, len}}}; |
178 | 0 | int i; |
179 | 0 | for (i = 0; i < 8; ++i) { |
180 | 0 | SearchSite* const ss = &cfg->ss[ss_count++]; |
181 | 0 | ss->mv = ss_mvs[i]; |
182 | 0 | ss->offset = ss->mv.y * stride + ss->mv.x; |
183 | 0 | } |
184 | 0 | } |
185 | |
|
186 | 0 | cfg->ss_count = ss_count; |
187 | 0 | cfg->searches_per_step = 8; |
188 | 0 | } |
189 | | |
190 | 0 | static INLINE int is_mv_in(const MvLimits* mv_limits, const Mv* mv) { |
191 | 0 | return (mv->x >= mv_limits->col_min) && (mv->x <= mv_limits->col_max) && (mv->y >= mv_limits->row_min) && |
192 | 0 | (mv->y <= mv_limits->row_max); |
193 | 0 | } |
194 | | |
195 | | int svt_av1_get_mvpred_var(const IntraBcContext* x, const Mv* best_mv, const Mv* center_mv, const AomVarianceFnPtr* vfp, |
196 | 0 | int use_mvcost) { |
197 | 0 | const Buf2D* const what = &x->plane[0].src; |
198 | 0 | const Buf2D* const in_what = &x->xdplane[0].pre[0]; |
199 | 0 | const Mv mv = {{best_mv->x * 8, best_mv->y * 8}}; |
200 | 0 | unsigned int unused; |
201 | 0 | if (x->approx_inter_rate) { |
202 | 0 | return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + |
203 | 0 | (use_mvcost ? svt_aom_mv_err_cost_light(&mv, center_mv) : 0); |
204 | 0 | } else { |
205 | 0 | return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + |
206 | 0 | (use_mvcost ? svt_aom_mv_err_cost(&mv, center_mv, x->nmv_vec_cost, x->mv_cost_stack, x->errorperbit) : 0); |
207 | 0 | } |
208 | 0 | } |
209 | | |
210 | | // Exhaustive motion search around a given centre position with a given |
211 | | // step size. |
212 | | static int exhaustive_mesh_search(IntraBcContext* x, Mv* ref_mv, Mv* best_mv, int range, int step, int sad_per_bit, |
213 | 0 | const AomVarianceFnPtr* fn_ptr, const Mv* center_mv) { |
214 | 0 | const Buf2D* const what = &x->plane[0].src; |
215 | 0 | const Buf2D* const in_what = &x->xdplane[0].pre[0]; |
216 | 0 | Mv fcenter_mv = {.as_int = center_mv->as_int}; |
217 | 0 | unsigned int best_sad = INT_MAX; |
218 | 0 | int r, c, i; |
219 | 0 | int start_col, end_col, start_row, end_row; |
220 | 0 | int col_step = (step > 1) ? step : 4; |
221 | |
|
222 | 0 | assert(step >= 1); |
223 | |
|
224 | 0 | clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); |
225 | 0 | *best_mv = fcenter_mv; |
226 | 0 | best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + |
227 | 0 | mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); |
228 | 0 | start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.y); |
229 | 0 | start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.x); |
230 | 0 | end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.y); |
231 | 0 | end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.x); |
232 | |
|
233 | 0 | for (r = start_row; r <= end_row; r += step) { |
234 | 0 | for (c = start_col; c <= end_col; c += col_step) { |
235 | | // Step > 1 means we are not checking every location in this pass. |
236 | 0 | if (step > 1) { |
237 | 0 | const Mv mv = {{fcenter_mv.x + c, fcenter_mv.y + r}}; |
238 | 0 | unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); |
239 | 0 | if (sad < best_sad) { |
240 | 0 | sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); |
241 | 0 | if (sad < best_sad) { |
242 | 0 | best_sad = sad; |
243 | 0 | x->second_best_mv = *best_mv; |
244 | 0 | *best_mv = mv; |
245 | 0 | } |
246 | 0 | } |
247 | 0 | } else { |
248 | | // 4 sads in a single call if we are checking every location |
249 | 0 | if (c + 3 <= end_col) { |
250 | 0 | unsigned int sads[4]; |
251 | 0 | const uint8_t* addrs[4]; |
252 | 0 | for (i = 0; i < 4; ++i) { |
253 | 0 | const Mv mv = {{fcenter_mv.x + c + i, fcenter_mv.y + r}}; |
254 | 0 | addrs[i] = get_buf_from_mv(in_what, &mv); |
255 | 0 | } |
256 | 0 | fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); |
257 | |
|
258 | 0 | for (i = 0; i < 4; ++i) { |
259 | 0 | if (sads[i] < best_sad) { |
260 | 0 | const Mv mv = {{fcenter_mv.x + c + i, fcenter_mv.y + r}}; |
261 | 0 | const unsigned int sad = sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); |
262 | 0 | if (sad < best_sad) { |
263 | 0 | best_sad = sad; |
264 | 0 | x->second_best_mv = *best_mv; |
265 | 0 | *best_mv = mv; |
266 | 0 | } |
267 | 0 | } |
268 | 0 | } |
269 | 0 | } else { |
270 | 0 | for (i = 0; i < end_col - c; ++i) { |
271 | 0 | const Mv mv = {{fcenter_mv.x + c + i, fcenter_mv.y + r}}; |
272 | 0 | unsigned int sad = fn_ptr->sdf( |
273 | 0 | what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); |
274 | 0 | if (sad < best_sad) { |
275 | 0 | sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); |
276 | 0 | if (sad < best_sad) { |
277 | 0 | best_sad = sad; |
278 | 0 | x->second_best_mv = *best_mv; |
279 | 0 | *best_mv = mv; |
280 | 0 | } |
281 | 0 | } |
282 | 0 | } |
283 | 0 | } |
284 | 0 | } |
285 | 0 | } |
286 | 0 | } |
287 | |
|
288 | 0 | return best_sad; |
289 | 0 | } |
290 | | |
291 | | int svt_av1_diamond_search_sad_c(IntraBcContext* x, const SearchSiteConfig* cfg, Mv* ref_mv, Mv* best_mv, |
292 | | int search_param, int sad_per_bit, int* num00, const AomVarianceFnPtr* fn_ptr, |
293 | 0 | const Mv* center_mv) { |
294 | 0 | int i, j, step; |
295 | |
|
296 | 0 | uint8_t* what = x->plane[0].src.buf; |
297 | 0 | const int what_stride = x->plane[0].src.stride; |
298 | 0 | const uint8_t* in_what; |
299 | 0 | const int in_what_stride = x->xdplane[0].pre[0].stride; |
300 | 0 | const uint8_t* best_address; |
301 | |
|
302 | 0 | unsigned int bestsad; |
303 | 0 | int best_site = 0; |
304 | 0 | int last_site = 0; |
305 | |
|
306 | 0 | int ref_row; |
307 | 0 | int ref_col; |
308 | | |
309 | | // search_param determines the length of the initial step and hence the number |
310 | | // of iterations. |
311 | | // 0 = initial step (MAX_FIRST_STEP) pel |
312 | | // 1 = (MAX_FIRST_STEP/2) pel, |
313 | | // 2 = (MAX_FIRST_STEP/4) pel... |
314 | 0 | const SearchSite* ss = &cfg->ss[search_param * cfg->searches_per_step]; |
315 | 0 | const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; |
316 | |
|
317 | 0 | const Mv fcenter_mv = {{center_mv->x >> 3, center_mv->y >> 3}}; |
318 | 0 | clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); |
319 | 0 | ref_row = ref_mv->y; |
320 | 0 | ref_col = ref_mv->x; |
321 | 0 | *num00 = 0; |
322 | 0 | best_mv->y = ref_row; |
323 | 0 | best_mv->x = ref_col; |
324 | | |
325 | | // Work out the start point for the search |
326 | 0 | in_what = x->xdplane[0].pre[0].buf + ref_row * in_what_stride + ref_col; |
327 | 0 | best_address = in_what; |
328 | | |
329 | | // Check the starting position |
330 | 0 | bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + |
331 | 0 | mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); |
332 | |
|
333 | 0 | i = 1; |
334 | |
|
335 | 0 | for (step = 0; step < tot_steps; step++) { |
336 | 0 | int all_in = 1; |
337 | | |
338 | | // All_in is true if every one of the points we are checking are within |
339 | | // the bounds of the image. |
340 | 0 | all_in &= ((best_mv->y + ss[i].mv.y) > x->mv_limits.row_min); |
341 | 0 | all_in &= ((best_mv->y + ss[i + 1].mv.y) < x->mv_limits.row_max); |
342 | 0 | all_in &= ((best_mv->x + ss[i + 2].mv.x) > x->mv_limits.col_min); |
343 | 0 | all_in &= ((best_mv->x + ss[i + 3].mv.x) < x->mv_limits.col_max); |
344 | | |
345 | | // If all the pixels are within the bounds we don't check whether the |
346 | | // search point is valid in this loop, otherwise we check each point |
347 | | // for validity.. |
348 | 0 | if (all_in) { |
349 | 0 | unsigned int sad_array[4]; |
350 | |
|
351 | 0 | for (j = 0; j < cfg->searches_per_step; j += 4) { |
352 | 0 | unsigned char const* block_offset[4]; |
353 | |
|
354 | 0 | for (int t = 0; t < 4; t++) { |
355 | 0 | block_offset[t] = ss[i + t].offset + best_address; |
356 | 0 | } |
357 | |
|
358 | 0 | fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); |
359 | |
|
360 | 0 | for (int t = 0; t < 4; t++, i++) { |
361 | 0 | if (sad_array[t] < bestsad) { |
362 | 0 | const Mv this_mv = {{best_mv->x + ss[i].mv.x, best_mv->y + ss[i].mv.y}}; |
363 | 0 | sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); |
364 | 0 | if (sad_array[t] < bestsad) { |
365 | 0 | bestsad = sad_array[t]; |
366 | 0 | best_site = i; |
367 | 0 | } |
368 | 0 | } |
369 | 0 | } |
370 | 0 | } |
371 | 0 | } else { |
372 | 0 | for (j = 0; j < cfg->searches_per_step; j++) { |
373 | | // Trap illegal vectors |
374 | 0 | const Mv this_mv = {{best_mv->x + ss[i].mv.x, best_mv->y + ss[i].mv.y}}; |
375 | |
|
376 | 0 | if (is_mv_in(&x->mv_limits, &this_mv)) { |
377 | 0 | const uint8_t* const check_here = ss[i].offset + best_address; |
378 | 0 | unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
379 | |
|
380 | 0 | if (thissad < bestsad) { |
381 | 0 | thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); |
382 | 0 | if (thissad < bestsad) { |
383 | 0 | bestsad = thissad; |
384 | 0 | best_site = i; |
385 | 0 | } |
386 | 0 | } |
387 | 0 | } |
388 | 0 | i++; |
389 | 0 | } |
390 | 0 | } |
391 | 0 | if (best_site != last_site) { |
392 | 0 | x->second_best_mv = *best_mv; |
393 | 0 | best_mv->y += ss[best_site].mv.y; |
394 | 0 | best_mv->x += ss[best_site].mv.x; |
395 | 0 | best_address += ss[best_site].offset; |
396 | 0 | last_site = best_site; |
397 | | #if defined(NEW_DIAMOND_SEARCH) |
398 | | while (1) { |
399 | | const Mv this_mv = {{best_mv->x + ss[best_site].mv.x, best_mv->y + ss[best_site].mv.y}}; |
400 | | if (is_mv_in(&x->mv_limits, &this_mv)) { |
401 | | const uint8_t* const check_here = ss[best_site].offset + best_address; |
402 | | unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); |
403 | | if (thissad < bestsad) { |
404 | | thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); |
405 | | if (thissad < bestsad) { |
406 | | bestsad = thissad; |
407 | | best_mv->y += ss[best_site].mv.y; |
408 | | best_mv->x += ss[best_site].mv.x; |
409 | | best_address += ss[best_site].offset; |
410 | | continue; |
411 | | } |
412 | | } |
413 | | } |
414 | | break; |
415 | | } |
416 | | #endif |
417 | 0 | } else if (best_address == in_what) { |
418 | 0 | (*num00)++; |
419 | 0 | } |
420 | 0 | } |
421 | 0 | return bestsad; |
422 | 0 | } |
423 | | |
424 | | static int svt_av1_refining_search_sad(IntraBcContext* x, Mv* ref_mv, int error_per_bit, int search_range, |
425 | 0 | const AomVarianceFnPtr* fn_ptr, const Mv* center_mv) { |
426 | 0 | const Mv neighbors[4] = {{{0, -1}}, {{-1, 0}}, {{1, 0}}, {{0, 1}}}; |
427 | 0 | const Buf2D* const what = &x->plane[0].src; |
428 | 0 | const Buf2D* const in_what = &x->xdplane[0].pre[0]; |
429 | 0 | const Mv fcenter_mv = {{center_mv->x >> 3, center_mv->y >> 3}}; |
430 | 0 | const uint8_t* best_address = get_buf_from_mv(in_what, ref_mv); |
431 | 0 | unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) + |
432 | 0 | mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); |
433 | 0 | for (int i = 0; i < search_range; i++) { |
434 | 0 | int best_site = -1; |
435 | 0 | const int all_in = (ref_mv->y - 1) > x->mv_limits.row_min && (ref_mv->y + 1) < x->mv_limits.row_max && |
436 | 0 | (ref_mv->x - 1) > x->mv_limits.col_min && (ref_mv->x + 1) < x->mv_limits.col_max; |
437 | |
|
438 | 0 | if (all_in) { |
439 | 0 | unsigned int sads[4]; |
440 | 0 | const uint8_t* const positions[4] = { |
441 | 0 | best_address - in_what->stride, best_address - 1, best_address + 1, best_address + in_what->stride}; |
442 | |
|
443 | 0 | fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); |
444 | |
|
445 | 0 | for (int j = 0; j < 4; ++j) { |
446 | 0 | if (sads[j] < best_sad) { |
447 | 0 | const Mv mv = {{ref_mv->x + neighbors[j].x, ref_mv->y + neighbors[j].y}}; |
448 | 0 | sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); |
449 | 0 | if (sads[j] < best_sad) { |
450 | 0 | best_sad = sads[j]; |
451 | 0 | best_site = j; |
452 | 0 | } |
453 | 0 | } |
454 | 0 | } |
455 | 0 | } else { |
456 | 0 | for (int j = 0; j < 4; ++j) { |
457 | 0 | const Mv mv = {{ref_mv->x + neighbors[j].x, ref_mv->y + neighbors[j].y}}; |
458 | |
|
459 | 0 | if (is_mv_in(&x->mv_limits, &mv)) { |
460 | 0 | unsigned int sad = fn_ptr->sdf( |
461 | 0 | what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); |
462 | 0 | if (sad < best_sad) { |
463 | 0 | sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); |
464 | 0 | if (sad < best_sad) { |
465 | 0 | best_sad = sad; |
466 | 0 | best_site = j; |
467 | 0 | } |
468 | 0 | } |
469 | 0 | } |
470 | 0 | } |
471 | 0 | } |
472 | |
|
473 | 0 | if (best_site == -1) { |
474 | 0 | break; |
475 | 0 | } else { |
476 | 0 | x->second_best_mv = *ref_mv; |
477 | 0 | ref_mv->y += neighbors[best_site].y; |
478 | 0 | ref_mv->x += neighbors[best_site].x; |
479 | 0 | best_address = get_buf_from_mv(in_what, ref_mv); |
480 | 0 | } |
481 | 0 | } |
482 | |
|
483 | 0 | return best_sad; |
484 | 0 | } |
485 | | |
486 | | /* do_refine: If last step (1-away) of n-step search doesn't pick the center |
487 | | point as the best match, we will do a final 1-away diamond |
488 | | refining search */ |
489 | | static int full_pixel_diamond(PictureControlSet* pcs, IntraBcContext /*MACROBLOCK*/* x, Mv* mvp_full, int step_param, |
490 | | int sadpb, int further_steps, int do_refine, int* cost_list, |
491 | 0 | const AomVarianceFnPtr* fn_ptr, const Mv* ref_mv) { |
492 | 0 | Mv temp_mv; |
493 | 0 | int thissme, n, num00 = 0; |
494 | 0 | (void)cost_list; |
495 | | /*int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, |
496 | | step_param, sadpb, &n, fn_ptr, ref_mv);*/ |
497 | 0 | int bestsme = svt_av1_diamond_search_sad_c( |
498 | 0 | x, &pcs->ss_cfg, mvp_full, &temp_mv, step_param, sadpb, &n, fn_ptr, ref_mv); |
499 | |
|
500 | 0 | if (bestsme < INT_MAX) { |
501 | 0 | bestsme = svt_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); |
502 | 0 | } |
503 | 0 | x->best_mv = temp_mv; |
504 | | |
505 | | // If there won't be more n-step search, check to see if refining search is |
506 | | // needed. |
507 | 0 | if (n > further_steps) { |
508 | 0 | do_refine = 0; |
509 | 0 | } |
510 | |
|
511 | 0 | while (n < further_steps) { |
512 | 0 | ++n; |
513 | |
|
514 | 0 | if (num00) { |
515 | 0 | num00--; |
516 | 0 | } else { |
517 | | /*thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, |
518 | | step_param + n, sadpb, &num00, fn_ptr, |
519 | | ref_mv);*/ |
520 | 0 | thissme = svt_av1_diamond_search_sad_c( |
521 | 0 | x, &pcs->ss_cfg, mvp_full, &temp_mv, step_param + n, sadpb, &num00, fn_ptr, ref_mv); |
522 | |
|
523 | 0 | if (thissme < INT_MAX) { |
524 | 0 | thissme = svt_av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); |
525 | 0 | } |
526 | | |
527 | | // check to see if refining search is needed. |
528 | 0 | if (num00 > further_steps - n) { |
529 | 0 | do_refine = 0; |
530 | 0 | } |
531 | |
|
532 | 0 | if (thissme < bestsme) { |
533 | 0 | bestsme = thissme; |
534 | 0 | x->best_mv = temp_mv; |
535 | 0 | } |
536 | 0 | } |
537 | 0 | } |
538 | | |
539 | | // final 1-away diamond refining search |
540 | 0 | if (do_refine) { |
541 | 0 | const int search_range = 8; |
542 | 0 | Mv best_mv = x->best_mv; |
543 | 0 | thissme = svt_av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, ref_mv); |
544 | 0 | if (thissme < INT_MAX) { |
545 | 0 | thissme = svt_av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); |
546 | 0 | } |
547 | 0 | if (thissme < bestsme) { |
548 | 0 | bestsme = thissme; |
549 | 0 | x->best_mv = best_mv; |
550 | 0 | } |
551 | 0 | } |
552 | | |
553 | | // Return cost list. |
554 | | /* if (cost_list) { |
555 | | calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list); |
556 | | }*/ |
557 | 0 | return bestsme; |
558 | 0 | } |
559 | | |
560 | 0 | #define MIN_RANGE 7 |
561 | 0 | #define MAX_RANGE 256 |
562 | 0 | #define MIN_INTERVAL 1 |
563 | | |
564 | | // Runs an limited range exhaustive mesh search using a pattern set |
565 | | // according to the encode speed profile. |
566 | | static int intrabc_full_pixel_exhaustive(PictureControlSet* pcs, IntraBcContext* x, const Mv* center_mv, int sadpb, |
567 | 0 | const AomVarianceFnPtr* fn_ptr, const Mv* ref_mv, Mv* dst_mv) { |
568 | 0 | const IntrabcCtrls* ctrls = &pcs->ppcs->intrabc_ctrls; |
569 | |
|
570 | 0 | Mv search_mv = *center_mv; |
571 | 0 | Mv ref_mv_fp = {{ref_mv->x >> 3, ref_mv->y >> 3}}; |
572 | |
|
573 | 0 | int range = ctrls->mesh_patterns[0].range; |
574 | 0 | int interval = ctrls->mesh_patterns[0].interval; |
575 | 0 | int best_cost = INT_MAX; |
576 | | |
577 | | // Validate parameters |
578 | 0 | if (range < MIN_RANGE || range > MAX_RANGE || interval < MIN_INTERVAL || interval > range) { |
579 | 0 | return INT_MAX; |
580 | 0 | } |
581 | | |
582 | 0 | const int base_interval_div = range / interval; |
583 | | |
584 | | // Adapt search range based on center MV magnitude |
585 | 0 | int mv_mag = AOMMAX(abs(search_mv.x), abs(search_mv.y)); |
586 | 0 | range = AOMMAX(range, (5 * mv_mag) / 4); |
587 | 0 | range = AOMMIN(range, MAX_RANGE); |
588 | 0 | interval = AOMMAX(interval, range / base_interval_div); |
589 | | |
590 | | // Initial coarse search |
591 | 0 | best_cost = exhaustive_mesh_search(x, &ref_mv_fp, &search_mv, range, interval, sadpb, fn_ptr, &search_mv); |
592 | | |
593 | | // Progressive refinement |
594 | 0 | if (interval > MIN_INTERVAL && range > MIN_RANGE) { |
595 | 0 | for (int i = 1; i < MAX_MESH_STEP; i++) { |
596 | 0 | const MeshPattern* pattern = &ctrls->mesh_patterns[i]; |
597 | |
|
598 | 0 | if (pattern->range == 0) { |
599 | 0 | break; |
600 | 0 | } |
601 | | |
602 | 0 | best_cost = exhaustive_mesh_search( |
603 | 0 | x, &ref_mv_fp, &search_mv, pattern->range, pattern->interval, sadpb, fn_ptr, &search_mv); |
604 | |
|
605 | 0 | if (pattern->interval == 1) { |
606 | 0 | break; |
607 | 0 | } |
608 | 0 | } |
609 | 0 | } |
610 | | |
611 | | // Final cost evaluation |
612 | 0 | if (best_cost < INT_MAX) { |
613 | 0 | best_cost = svt_av1_get_mvpred_var(x, &search_mv, ref_mv, fn_ptr, 1); |
614 | 0 | } |
615 | |
|
616 | 0 | *dst_mv = search_mv; |
617 | |
|
618 | 0 | return best_cost; |
619 | 0 | } |
620 | | #if CONFIG_ENABLE_OBMC |
621 | | static int get_obmc_mvpred_var(const IntraBcContext* x, const int32_t* wsrc, const int32_t* mask, const Mv* best_mv, |
622 | 0 | const Mv* center_mv, const AomVarianceFnPtr* vfp, int use_mvcost, int is_second) { |
623 | 0 | const Buf2D* in_what = (const Buf2D*)(&x->xdplane[0].pre[is_second]); |
624 | 0 | const Mv mv = {{best_mv->x * 8, best_mv->y * 8}}; |
625 | 0 | unsigned int unused; |
626 | 0 | if (x->approx_inter_rate) { |
627 | 0 | return vfp->ovf(get_buf_from_mv((const Buf2D*)in_what, best_mv), in_what->stride, wsrc, mask, &unused) + |
628 | 0 | (use_mvcost ? svt_aom_mv_err_cost_light(&mv, center_mv) : 0); |
629 | 0 | } else { |
630 | 0 | return vfp->ovf(get_buf_from_mv((const Buf2D*)in_what, best_mv), in_what->stride, wsrc, mask, &unused) + |
631 | 0 | (use_mvcost ? svt_aom_mv_err_cost(&mv, center_mv, x->nmv_vec_cost, x->mv_cost_stack, x->errorperbit) : 0); |
632 | 0 | } |
633 | 0 | } |
634 | | |
635 | | static int obmc_refining_search_sad(const IntraBcContext* x, const int32_t* wsrc, const int32_t* mask, Mv* ref_mv, |
636 | | int error_per_bit, int search_range, const AomVarianceFnPtr* fn_ptr, |
637 | 0 | const Mv* center_mv, int is_second, uint8_t search_diag) { |
638 | 0 | const Mv neighbors[8] = {{{0, -1}}, {{-1, 0}}, {{1, 0}}, {{0, 1}}, {{1, -1}}, {{1, 1}}, {{-1, 1}}, {{-1, -1}}}; |
639 | 0 | const Buf2D* in_what = (const Buf2D*)(&x->xdplane[0].pre[is_second]); |
640 | 0 | const Mv fcenter_mv = {{center_mv->x >> 3, center_mv->y >> 3}}; |
641 | 0 | unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv((const Buf2D*)in_what, ref_mv), in_what->stride, wsrc, mask) + |
642 | 0 | mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); |
643 | 0 | int i, j; |
644 | |
|
645 | 0 | for (i = 0; i < search_range; i++) { |
646 | 0 | int best_site = -1; |
647 | |
|
648 | 0 | for (j = 0; j < (search_diag ? 8 : 4); j++) { |
649 | 0 | const Mv mv = {{ref_mv->x + neighbors[j].x, ref_mv->y + neighbors[j].y}}; |
650 | 0 | if (is_mv_in(&x->mv_limits, &mv)) { |
651 | 0 | unsigned int sad = fn_ptr->osdf( |
652 | 0 | get_buf_from_mv((const Buf2D*)in_what, &mv), in_what->stride, wsrc, mask); |
653 | 0 | if (sad < best_sad) { |
654 | 0 | sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); |
655 | 0 | if (sad < best_sad) { |
656 | 0 | best_sad = sad; |
657 | 0 | best_site = j; |
658 | 0 | } |
659 | 0 | } |
660 | 0 | } |
661 | 0 | } |
662 | |
|
663 | 0 | if (best_site == -1) { |
664 | 0 | break; |
665 | 0 | } else { |
666 | 0 | ref_mv->y += neighbors[best_site].y; |
667 | 0 | ref_mv->x += neighbors[best_site].x; |
668 | 0 | } |
669 | 0 | } |
670 | 0 | return best_sad; |
671 | 0 | } |
672 | | |
673 | | int svt_av1_obmc_full_pixel_search(ModeDecisionContext* ctx, IntraBcContext* x, const Mv* mvp_full, int sadpb, |
674 | 0 | const AomVarianceFnPtr* fn_ptr, const Mv* ref_mv, Mv* dst_mv, int is_second) { |
675 | | // obmc_full_pixel_diamond does not provide BDR gain on 360p |
676 | 0 | const int32_t* wsrc = ctx->wsrc_buf; |
677 | 0 | const int32_t* mask = ctx->mask_buf; |
678 | 0 | const int search_range = ctx->obmc_ctrls.fpel_search_range; |
679 | 0 | *dst_mv = *mvp_full; |
680 | 0 | x->approx_inter_rate = ctx->approx_inter_rate; |
681 | 0 | clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); |
682 | 0 | clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); |
683 | 0 | int thissme = obmc_refining_search_sad( |
684 | 0 | x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second, ctx->obmc_ctrls.fpel_search_diag); |
685 | 0 | if (thissme < INT_MAX) { |
686 | 0 | thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1, is_second); |
687 | 0 | } |
688 | |
|
689 | 0 | return thissme; |
690 | 0 | } |
691 | | #endif |
692 | | |
693 | | #if CONFIG_ENABLE_OBMC |
694 | | static INLINE void set_subpel_mv_search_range(const MvLimits* mv_limits, int* col_min, int* col_max, int* row_min, |
695 | 0 | int* row_max, const Mv* ref_mv) { |
696 | 0 | const int max_mv = MAX_FULL_PEL_VAL * 8; |
697 | 0 | const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->x - max_mv); |
698 | 0 | const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->x + max_mv); |
699 | 0 | const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->y - max_mv); |
700 | 0 | const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->y + max_mv); |
701 | |
|
702 | 0 | *col_min = AOMMAX(MV_LOW + 1, minc); |
703 | 0 | *col_max = AOMMIN(MV_UPP - 1, maxc); |
704 | 0 | *row_min = AOMMAX(MV_LOW + 1, minr); |
705 | 0 | *row_max = AOMMIN(MV_UPP - 1, maxr); |
706 | 0 | } |
707 | | |
708 | | static const Mv search_step_table[12] = { |
709 | | // left, right, up, down |
710 | | {{-4, 0}}, |
711 | | {{4, 0}}, |
712 | | {{0, -4}}, |
713 | | {{0, 4}}, |
714 | | {{-2, 0}}, |
715 | | {{2, 0}}, |
716 | | {{0, -2}}, |
717 | | {{0, 2}}, |
718 | | {{-1, 0}}, |
719 | | {{1, 0}}, |
720 | | {{0, -1}}, |
721 | | {{0, 1}}}; |
722 | | |
723 | | static unsigned int setup_obmc_center_error(const int32_t* mask, const Mv* bestmv, const Mv* ref_mv, int error_per_bit, |
724 | | const AomVarianceFnPtr* vfp, const int32_t* const wsrc, |
725 | | const uint8_t* const y, int y_stride, int offset, int* mvjcost, |
726 | | const int* mvcost[2], unsigned int* sse1, |
727 | 0 | uint8_t use_low_precision_cost_estimation, int* distortion) { |
728 | 0 | unsigned int besterr; |
729 | 0 | besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1); |
730 | 0 | *distortion = besterr; |
731 | 0 | if (use_low_precision_cost_estimation) { |
732 | 0 | besterr += svt_aom_mv_err_cost_light(bestmv, ref_mv); |
733 | 0 | } else { |
734 | 0 | besterr += svt_aom_mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
735 | 0 | } |
736 | 0 | return besterr; |
737 | 0 | } |
738 | | |
739 | | /* returns subpixel variance error function */ |
740 | 0 | #define DIST(r, c) vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse) |
741 | | #define CHECK_BETTER(v, r, c, lp) \ |
742 | 0 | do { \ |
743 | 0 | if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ |
744 | 0 | thismse = (DIST(r, c)); \ |
745 | 0 | \ |
746 | 0 | if (lp) \ |
747 | 0 | v = svt_aom_mv_err_cost_light(&(const Mv){{c, r}}, ref_mv); \ |
748 | 0 | else \ |
749 | 0 | v = svt_aom_mv_err_cost(&(const Mv){{c, r}}, ref_mv, mvjcost, mvcost, error_per_bit); \ |
750 | 0 | if ((v + thismse) < besterr) { \ |
751 | 0 | besterr = v + thismse; \ |
752 | 0 | br = r; \ |
753 | 0 | bc = c; \ |
754 | 0 | *distortion = thismse; \ |
755 | 0 | *sse1 = sse; \ |
756 | 0 | } \ |
757 | 0 | } else \ |
758 | 0 | v = INT_MAX; \ |
759 | 0 | } while (0) |
760 | 0 | #define CHECK_BETTER0(v, r, c, lp) CHECK_BETTER(v, r, c, lp) |
761 | | #define CHECK_BETTER1(v, r, c, lp) \ |
762 | 0 | do { \ |
763 | 0 | if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ |
764 | 0 | Mv this_mv = {{c, r}}; \ |
765 | 0 | thismse = upsampled_obmc_pref_error(xd, \ |
766 | 0 | cm, \ |
767 | 0 | mi_row, \ |
768 | 0 | mi_col, \ |
769 | 0 | &this_mv, \ |
770 | 0 | mask, \ |
771 | 0 | vfp, \ |
772 | 0 | z, \ |
773 | 0 | pre(y, y_stride, r, c), \ |
774 | 0 | y_stride, \ |
775 | 0 | sp(c), \ |
776 | 0 | sp(r), \ |
777 | 0 | w, \ |
778 | 0 | h, \ |
779 | 0 | &sse, \ |
780 | 0 | use_accurate_subpel_search); \ |
781 | 0 | if (lp) \ |
782 | 0 | v = svt_aom_mv_err_cost_light(&this_mv, ref_mv); \ |
783 | 0 | else \ |
784 | 0 | v = svt_aom_mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ |
785 | 0 | if ((v + thismse) < besterr) { \ |
786 | 0 | besterr = v + thismse; \ |
787 | 0 | br = r; \ |
788 | 0 | bc = c; \ |
789 | 0 | *distortion = thismse; \ |
790 | 0 | *sse1 = sse; \ |
791 | 0 | } \ |
792 | 0 | } else \ |
793 | 0 | v = INT_MAX; \ |
794 | 0 | } while (0) |
795 | | #define SECOND_LEVEL_CHECKS_BEST(k) \ |
796 | 0 | do { \ |
797 | 0 | unsigned int second; \ |
798 | 0 | int br0 = br; \ |
799 | 0 | int bc0 = bc; \ |
800 | 0 | assert(tr == br || tc == bc); \ |
801 | 0 | if (tr == br && tc != bc) \ |
802 | 0 | kc = bc - tc; \ |
803 | 0 | else if (tr != br && tc == bc) \ |
804 | 0 | kr = br - tr; \ |
805 | 0 | CHECK_BETTER##k(second, br0 + kr, bc0, lp); \ |
806 | 0 | CHECK_BETTER##k(second, br0, bc0 + kc, lp); \ |
807 | 0 | if (br0 != br || bc0 != bc) \ |
808 | 0 | CHECK_BETTER##k(second, br0 + kr, bc0 + kc, lp); \ |
809 | 0 | } while (0) |
810 | | |
811 | | static int upsampled_obmc_pref_error(MacroBlockD* xd, const Av1Common* const cm, int mi_row, int mi_col, |
812 | | const Mv* const mv, const int32_t* mask, const AomVarianceFnPtr* vfp, |
813 | | const int32_t* const wsrc, const uint8_t* const y, int y_stride, int subpel_x_q3, |
814 | 0 | int subpel_y_q3, int w, int h, unsigned int* sse, int subpel_search) { |
815 | 0 | unsigned int besterr; |
816 | |
|
817 | 0 | DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]); |
818 | | #if CONFIG_AV1_HIGHBITDEPTH |
819 | | if (is_cur_buf_hbd(xd)) { |
820 | | uint8_t* pred8 = CONVERT_TO_BYTEPTR(pred); |
821 | | aom_highbd_upsampled_pred( |
822 | | xd, cm, mi_row, mi_col, mv, pred8, w, h, subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, subpel_search); |
823 | | besterr = vfp->ovf(pred8, w, wsrc, mask, sse); |
824 | | } else { |
825 | | svt_aom_upsampled_pred( |
826 | | xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3, subpel_y_q3, y, y_stride, subpel_search); |
827 | | |
828 | | besterr = vfp->ovf(pred, w, wsrc, mask, sse); |
829 | | } |
830 | | #else |
831 | 0 | svt_aom_upsampled_pred(xd, |
832 | 0 | (const struct AV1Common* const)cm, |
833 | 0 | mi_row, |
834 | 0 | mi_col, |
835 | 0 | mv, |
836 | 0 | pred, |
837 | 0 | w, |
838 | 0 | h, |
839 | 0 | subpel_x_q3, |
840 | 0 | subpel_y_q3, |
841 | 0 | y, |
842 | 0 | y_stride, |
843 | 0 | subpel_search); |
844 | |
|
845 | 0 | besterr = vfp->ovf(pred, w, wsrc, mask, sse); |
846 | 0 | #endif |
847 | 0 | return besterr; |
848 | 0 | } |
849 | | |
850 | | static unsigned int upsampled_setup_obmc_center_error(MacroBlockD* xd, const Av1Common* const cm, int mi_row, |
851 | | int mi_col, const int32_t* mask, const Mv* bestmv, |
852 | | const Mv* ref_mv, int error_per_bit, const AomVarianceFnPtr* vfp, |
853 | | const int32_t* const wsrc, const uint8_t* const y, int y_stride, |
854 | | int w, int h, int offset, int* mvjcost, const int* mvcost[2], |
855 | | unsigned int* sse1, int* distortion, |
856 | 0 | uint8_t use_low_precision_cost_estimation, int subpel_search) { |
857 | 0 | unsigned int besterr = upsampled_obmc_pref_error( |
858 | 0 | xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0, 0, w, h, sse1, subpel_search); |
859 | 0 | *distortion = besterr; |
860 | 0 | if (use_low_precision_cost_estimation) { |
861 | 0 | besterr += svt_aom_mv_err_cost_light(bestmv, ref_mv); |
862 | 0 | } else { |
863 | 0 | besterr += svt_aom_mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); |
864 | 0 | } |
865 | 0 | return besterr; |
866 | 0 | } |
867 | | |
868 | | // convert motion vector component to offset for sv[a]f calc |
869 | 0 | static INLINE int sp(int x) { |
870 | 0 | return x & 7; |
871 | 0 | } |
872 | | |
873 | 0 | static INLINE const uint8_t* pre(const uint8_t* buf, int stride, int r, int c) { |
874 | 0 | const int offset = (r >> 3) * stride + (c >> 3); |
875 | 0 | return buf + offset; |
876 | 0 | } |
877 | | |
878 | | int svt_av1_find_best_obmc_sub_pixel_tree_up(ModeDecisionContext* ctx, IntraBcContext* x, |
879 | | const struct Av1Common* const cm, int mi_row, int mi_col, Mv* bestmv, |
880 | | const Mv* ref_mv, int allow_hp, int error_per_bit, |
881 | | const AomVarianceFnPtr* vfp, int forced_stop, int iters_per_step, |
882 | | int* mvjcost, const int* mvcost[2], int* distortion, unsigned int* sse1, |
883 | 0 | int is_second, int use_accurate_subpel_search) { |
884 | 0 | const int32_t* wsrc = ctx->wsrc_buf; |
885 | 0 | const int32_t* mask = ctx->mask_buf; |
886 | 0 | const int* const z = wsrc; |
887 | 0 | const int* const src_address = z; |
888 | 0 | MacroBlockD* xd = x->xd; |
889 | 0 | struct MacroBlockDPlane* const pd = &x->xdplane[0]; |
890 | 0 | unsigned int besterr = INT_MAX; |
891 | 0 | unsigned int sse; |
892 | 0 | unsigned int thismse; |
893 | 0 | int br = bestmv->y * 8; |
894 | 0 | int bc = bestmv->x * 8; |
895 | 0 | int hstep = 4; |
896 | 0 | int round = 3 - forced_stop; |
897 | 0 | int tr; |
898 | 0 | int tc; |
899 | 0 | const Mv* search_step = search_step_table; |
900 | 0 | int best_idx = -1; |
901 | 0 | unsigned int cost_array[5]; |
902 | 0 | const int w = block_size_wide[ctx->blk_geom->bsize]; |
903 | 0 | const int h = block_size_high[ctx->blk_geom->bsize]; |
904 | 0 | const uint8_t lp = ctx->approx_inter_rate; |
905 | 0 | int minc, maxc, minr, maxr; |
906 | |
|
907 | 0 | set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv); |
908 | |
|
909 | 0 | const uint8_t* y = pd->pre[is_second].buf; |
910 | 0 | int y_stride = pd->pre[is_second].stride; |
911 | 0 | int offset = bestmv->y * y_stride + bestmv->x; |
912 | |
|
913 | 0 | if (!allow_hp && round == 3) { |
914 | 0 | round = 2; |
915 | 0 | } |
916 | |
|
917 | 0 | bestmv->y *= 8; |
918 | 0 | bestmv->x *= 8; |
919 | | // use_accurate_subpel_search can be 0 or 1 or 2 |
920 | 0 | besterr = use_accurate_subpel_search |
921 | 0 | ? upsampled_setup_obmc_center_error(xd, |
922 | 0 | cm, |
923 | 0 | mi_row, |
924 | 0 | mi_col, |
925 | 0 | mask, |
926 | 0 | bestmv, |
927 | 0 | ref_mv, |
928 | 0 | error_per_bit, |
929 | 0 | vfp, |
930 | 0 | z, |
931 | 0 | y, |
932 | 0 | y_stride, |
933 | 0 | w, |
934 | 0 | h, |
935 | 0 | offset, |
936 | 0 | mvjcost, |
937 | 0 | mvcost, |
938 | 0 | sse1, |
939 | 0 | distortion, |
940 | 0 | lp, |
941 | 0 | use_accurate_subpel_search) |
942 | 0 | : setup_obmc_center_error( |
943 | 0 | mask, bestmv, ref_mv, error_per_bit, vfp, z, y, y_stride, offset, mvjcost, mvcost, sse1, lp, distortion); |
944 | |
|
945 | 0 | for (int iter = 0; iter < round; ++iter) { |
946 | | // Check vertical and horizontal sub-pixel positions. |
947 | 0 | int idx = 0; |
948 | 0 | for (; idx < 4; ++idx) { |
949 | 0 | tr = br + search_step[idx].y; |
950 | 0 | tc = bc + search_step[idx].x; |
951 | 0 | if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { |
952 | 0 | Mv this_mv = {{tc, tr}}; |
953 | 0 | thismse = use_accurate_subpel_search |
954 | 0 | ? (unsigned)upsampled_obmc_pref_error(xd, |
955 | 0 | cm, |
956 | 0 | mi_row, |
957 | 0 | mi_col, |
958 | 0 | &this_mv, |
959 | 0 | mask, |
960 | 0 | vfp, |
961 | 0 | src_address, |
962 | 0 | pre(y, y_stride, tr, tc), |
963 | 0 | y_stride, |
964 | 0 | sp(tc), |
965 | 0 | sp(tr), |
966 | 0 | w, |
967 | 0 | h, |
968 | 0 | &sse, |
969 | 0 | use_accurate_subpel_search) |
970 | 0 | : vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), src_address, mask, &sse); |
971 | 0 | if (lp) { |
972 | 0 | cost_array[idx] = thismse + svt_aom_mv_err_cost_light(&this_mv, ref_mv); |
973 | 0 | } else { |
974 | 0 | cost_array[idx] = thismse + svt_aom_mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); |
975 | 0 | } |
976 | 0 | if (cost_array[idx] < besterr) { |
977 | 0 | best_idx = idx; |
978 | 0 | besterr = cost_array[idx]; |
979 | 0 | *distortion = thismse; |
980 | 0 | *sse1 = sse; |
981 | 0 | } |
982 | 0 | } else { |
983 | 0 | cost_array[idx] = INT_MAX; |
984 | 0 | } |
985 | 0 | } |
986 | | |
987 | | // Check diagonal sub-pixel position |
988 | 0 | int kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); |
989 | 0 | int kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); |
990 | |
|
991 | 0 | tc = bc + kc; |
992 | 0 | tr = br + kr; |
993 | 0 | if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { |
994 | 0 | Mv this_mv = {{tc, tr}}; |
995 | 0 | thismse = use_accurate_subpel_search |
996 | 0 | ? (unsigned)upsampled_obmc_pref_error(xd, |
997 | 0 | cm, |
998 | 0 | mi_row, |
999 | 0 | mi_col, |
1000 | 0 | &this_mv, |
1001 | 0 | mask, |
1002 | 0 | vfp, |
1003 | 0 | src_address, |
1004 | 0 | pre(y, y_stride, tr, tc), |
1005 | 0 | y_stride, |
1006 | 0 | sp(tc), |
1007 | 0 | sp(tr), |
1008 | 0 | w, |
1009 | 0 | h, |
1010 | 0 | &sse, |
1011 | 0 | use_accurate_subpel_search) |
1012 | 0 | : vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), src_address, mask, &sse); |
1013 | 0 | if (lp) { |
1014 | 0 | cost_array[4] = thismse + svt_aom_mv_err_cost_light(&this_mv, ref_mv); |
1015 | 0 | } else { |
1016 | 0 | cost_array[4] = thismse + svt_aom_mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); |
1017 | 0 | } |
1018 | |
|
1019 | 0 | if (cost_array[4] < besterr) { |
1020 | 0 | best_idx = 4; |
1021 | 0 | besterr = cost_array[4]; |
1022 | 0 | *distortion = thismse; |
1023 | 0 | *sse1 = sse; |
1024 | 0 | } |
1025 | 0 | } else { |
1026 | 0 | cost_array[idx] = INT_MAX; |
1027 | 0 | } |
1028 | |
|
1029 | 0 | if (best_idx < 4 && best_idx >= 0) { |
1030 | 0 | br += search_step[best_idx].y; |
1031 | 0 | bc += search_step[best_idx].x; |
1032 | 0 | } else if (best_idx == 4) { |
1033 | 0 | br = tr; |
1034 | 0 | bc = tc; |
1035 | 0 | } |
1036 | |
|
1037 | 0 | if (iters_per_step > 1 && best_idx != -1) { |
1038 | 0 | if (use_accurate_subpel_search) { |
1039 | 0 | SECOND_LEVEL_CHECKS_BEST(1); |
1040 | 0 | } else { |
1041 | 0 | SECOND_LEVEL_CHECKS_BEST(0); |
1042 | 0 | } |
1043 | 0 | } |
1044 | |
|
1045 | 0 | search_step += 4; |
1046 | 0 | hstep >>= 1; |
1047 | 0 | best_idx = -1; |
1048 | 0 | } |
1049 | |
|
1050 | 0 | bestmv->y = br; |
1051 | 0 | bestmv->x = bc; |
1052 | |
|
1053 | 0 | return besterr; |
1054 | 0 | } |
1055 | | #endif |
1056 | | void svt_av1_intrabc_hash_search(PictureControlSet* pcs, IntraBcContext* x, BlockSize bsize, int x_pos, int y_pos, |
1057 | | const Mv* ref_mv, int intra, const AomVarianceFnPtr* fn_ptr, int* best_hash_cost, |
1058 | 0 | Mv* best_hash_mv) { |
1059 | 0 | const int block_width = block_size_wide[bsize]; |
1060 | 0 | const int block_height = block_size_high[bsize]; |
1061 | |
|
1062 | 0 | if (block_width != block_height || block_width > pcs->ppcs->intrabc_ctrls.max_block_size_hash) { |
1063 | 0 | return; |
1064 | 0 | } |
1065 | | |
1066 | 0 | uint8_t* src_buf = x->plane[0].src.buf; |
1067 | 0 | int src_stride = x->plane[0].src.stride; |
1068 | |
|
1069 | 0 | uint32_t hash_value1, hash_value2; |
1070 | |
|
1071 | 0 | svt_av1_get_block_hash_value(src_buf, src_stride, block_width, &hash_value1, &hash_value2, 0, pcs, x); |
1072 | |
|
1073 | 0 | HashTable* ref_frame_hash = &pcs->hash_table; |
1074 | 0 | int count = svt_av1_hash_table_count(ref_frame_hash, hash_value1); |
1075 | |
|
1076 | 0 | if (count <= (intra ? 1 : 0)) { |
1077 | 0 | return; |
1078 | 0 | } |
1079 | | |
1080 | 0 | Iterator iterator = svt_av1_hash_get_first_iterator(ref_frame_hash, hash_value1); |
1081 | |
|
1082 | 0 | const int mi_col = x_pos / MI_SIZE; |
1083 | 0 | const int mi_row = y_pos / MI_SIZE; |
1084 | |
|
1085 | 0 | for (int i = 0; i < count; i++, svt_aom_iterator_increment(&iterator)) { |
1086 | 0 | BlockHash ref_block_hash = *(BlockHash*)(svt_aom_iterator_get(&iterator)); |
1087 | |
|
1088 | 0 | if (hash_value2 != ref_block_hash.hash_value2) { |
1089 | 0 | continue; |
1090 | 0 | } |
1091 | | |
1092 | 0 | if (intra) { |
1093 | 0 | Mv dv = {{8 * (ref_block_hash.x - x_pos), 8 * (ref_block_hash.y - y_pos)}}; |
1094 | |
|
1095 | 0 | if (!svt_aom_is_dv_valid(dv, x->xd, mi_row, mi_col, bsize, pcs->ppcs->scs->seq_header.sb_size_log2)) { |
1096 | 0 | continue; |
1097 | 0 | } |
1098 | 0 | } |
1099 | | |
1100 | 0 | Mv hash_mv = {{ref_block_hash.x - x_pos, ref_block_hash.y - y_pos}}; |
1101 | |
|
1102 | 0 | if (!is_mv_in(&x->mv_limits, &hash_mv)) { |
1103 | 0 | continue; |
1104 | 0 | } |
1105 | | |
1106 | 0 | int ref_cost = svt_av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1); |
1107 | |
|
1108 | 0 | if (ref_cost < *best_hash_cost) { |
1109 | 0 | *best_hash_cost = ref_cost; |
1110 | 0 | *best_hash_mv = hash_mv; |
1111 | 0 | } |
1112 | 0 | } |
1113 | 0 | } |
1114 | | |
1115 | | int svt_av1_full_pixel_search(PictureControlSet* pcs, IntraBcContext* x, BlockSize bsize, Mv* mvp_full, int step_param, |
1116 | 0 | int error_per_bit, int* cost_list, const Mv* ref_mv) { |
1117 | 0 | const AomVarianceFnPtr* fn_ptr = &svt_aom_mefn_ptr[bsize]; |
1118 | 0 | int var = 0; |
1119 | | |
1120 | | // Initialize cost list if requested |
1121 | 0 | if (cost_list) { |
1122 | 0 | for (int i = 0; i < 5; i++) { |
1123 | 0 | cost_list[i] = INT_MAX; |
1124 | 0 | } |
1125 | 0 | } |
1126 | | |
1127 | | // Primary diamond search |
1128 | 0 | var = full_pixel_diamond( |
1129 | 0 | pcs, x, mvp_full, step_param, error_per_bit, MAX_MVSEARCH_STEPS - 1 - step_param, 1, cost_list, fn_ptr, ref_mv); |
1130 | | |
1131 | | // Decide whether to run exhaustive refinement |
1132 | 0 | bool run_mesh_search = 0; |
1133 | |
|
1134 | 0 | int exhaustive_mesh_thresh = (int)pcs->ppcs->intrabc_ctrls.exhaustive_mesh_thresh; |
1135 | | |
1136 | | // Scale threshold by block size |
1137 | 0 | exhaustive_mesh_thresh >>= 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]); |
1138 | |
|
1139 | 0 | if (var > exhaustive_mesh_thresh) { |
1140 | 0 | run_mesh_search = 1; |
1141 | 0 | } |
1142 | 0 | const int32_t full_pel_mv_diff = MAX(abs(mvp_full->x - x->best_mv.x), abs(mvp_full->y - x->best_mv.y)); |
1143 | 0 | if (full_pel_mv_diff <= pcs->ppcs->intrabc_ctrls.mesh_search_mv_diff_threshold) { |
1144 | 0 | run_mesh_search = 0; |
1145 | 0 | } |
1146 | | // Exhaustive (Mesh) Search |
1147 | 0 | if (run_mesh_search) { |
1148 | 0 | int var_ex; |
1149 | 0 | Mv mv_ex; |
1150 | |
|
1151 | 0 | var_ex = intrabc_full_pixel_exhaustive(pcs, x, &x->best_mv, error_per_bit, fn_ptr, ref_mv, &mv_ex); |
1152 | |
|
1153 | 0 | if (var_ex < var) { |
1154 | 0 | x->best_mv = mv_ex; |
1155 | 0 | } |
1156 | 0 | } |
1157 | |
|
1158 | 0 | return 0; |
1159 | 0 | } |