/work/svt-av1/Source/Lib/Codec/mcomp.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <limits.h> |
13 | | #include <math.h> |
14 | | #include <stdio.h> |
15 | | #include "mcomp.h" |
16 | | #include "mv.h" |
17 | | #include "av1_common.h" |
18 | | #include "coding_unit.h" |
19 | | #include "block_structures.h" |
20 | | #include "av1me.h" |
21 | | #include "aom_dsp_rtcd.h" |
22 | | #include "rd_cost.h" |
23 | | // ============================================================================ |
24 | | // Cost of motion vectors |
25 | | // ============================================================================ |
26 | | // TODO(any): Adaptively adjust the regularization strength based on image size |
27 | | // and motion activity instead of using hard-coded values. It seems like we |
28 | | // roughly half the lambda for each increase in resolution |
29 | | // These are multiplier used to perform regularization in motion compensation |
30 | | // when x->mv_cost_type is set to MV_COST_L1. |
31 | | // LOWRES |
32 | 0 | #define SSE_LAMBDA_LOWRES 2 // Used by mv_cost_err_fn |
33 | | // MIDRES |
34 | 0 | #define SSE_LAMBDA_MIDRES 0 // Used by mv_cost_err_fn |
35 | | // HDRES |
36 | 0 | #define SSE_LAMBDA_HDRES 1 // Used by mv_cost_err_fn |
37 | | |
38 | | // Returns the cost of using the current mv during the motion search. This is |
39 | | // used when var is used as the error metric. |
40 | | #define PIXEL_TRANSFORM_ERROR_SCALE 4 |
41 | | |
42 | | static INLINE int svt_mv_err_cost(const Mv* mv, const Mv* ref_mv, const int* mvjcost, const int* const mvcost[2], |
43 | 0 | int error_per_bit, MV_COST_TYPE mv_cost_type) { |
44 | 0 | const Mv diff = {{mv->x - ref_mv->x, mv->y - ref_mv->y}}; |
45 | 0 | const Mv abs_diff = {{abs(diff.x), abs(diff.y)}}; |
46 | |
|
47 | 0 | switch (mv_cost_type) { |
48 | 0 | case MV_COST_ENTROPY: |
49 | 0 | if (mvcost) { |
50 | 0 | return (int)ROUND_POWER_OF_TWO_64( |
51 | 0 | (int64_t)svt_mv_cost(&diff, mvjcost, mvcost) * error_per_bit, |
52 | 0 | RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT + PIXEL_TRANSFORM_ERROR_SCALE); |
53 | 0 | } |
54 | 0 | return 0; |
55 | 0 | case MV_COST_L1_LOWRES: |
56 | 0 | return (SSE_LAMBDA_LOWRES * (abs_diff.y + abs_diff.x)) >> 3; |
57 | 0 | case MV_COST_L1_MIDRES: |
58 | 0 | return (SSE_LAMBDA_MIDRES * (abs_diff.y + abs_diff.x)) >> 3; |
59 | 0 | case MV_COST_L1_HDRES: |
60 | 0 | return (SSE_LAMBDA_HDRES * (abs_diff.y + abs_diff.x)) >> 3; |
61 | 0 | case MV_COST_OPT: { |
62 | 0 | return (int)ROUND_POWER_OF_TWO_64( |
63 | 0 | (int64_t)((abs_diff.y + abs_diff.x) << 8) * error_per_bit, |
64 | 0 | RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT + PIXEL_TRANSFORM_ERROR_SCALE); |
65 | 0 | } |
66 | 0 | case MV_COST_NONE: |
67 | 0 | return 0; |
68 | 0 | default: |
69 | 0 | assert(0 && "Invalid rd_cost_type"); |
70 | 0 | return 0; |
71 | 0 | } |
72 | 0 | } |
73 | | |
74 | 0 | static INLINE int svt_mv_err_cost_(const Mv* mv, const svt_mv_cost_param* mv_cost_params) { |
75 | 0 | return svt_mv_err_cost(mv, |
76 | 0 | mv_cost_params->ref_mv, |
77 | 0 | mv_cost_params->mvjcost, |
78 | 0 | mv_cost_params->mvcost, |
79 | 0 | mv_cost_params->error_per_bit, |
80 | 0 | mv_cost_params->mv_cost_type); |
81 | 0 | } |
82 | | |
83 | | // ============================================================================= |
84 | | // Subpixel Motion Search: Translational |
85 | | // ============================================================================= |
86 | 0 | #define INIT_SUBPEL_STEP_SIZE (4) |
87 | | |
88 | | /* |
89 | | * To avoid the penalty for crossing cache-line read, preload the reference |
90 | | * area in a small buffer, which is aligned to make sure there won't be crossing |
91 | | * cache-line read while reading from this buffer. This reduced the cpu |
92 | | * cycles spent on reading ref data in sub-pixel filter functions. |
93 | | * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x |
94 | | * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we |
95 | | * could reduce the area. |
96 | | */ |
97 | | |
98 | | // Returns the subpel offset used by various subpel variance functions [m]sv[a]f |
99 | 0 | static INLINE int svt_get_subpel_part(int x) { |
100 | 0 | return x & 7; |
101 | 0 | } |
102 | | |
103 | | // Gets the address of the ref buffer at subpel location (r, c), rounded to the |
104 | | // nearest fullpel precision toward - \infty |
105 | | |
106 | 0 | static INLINE const uint8_t* svt_get_buf_from_mv(const struct svt_buf_2d* buf, const Mv mv) { |
107 | 0 | const int offset = (mv.y >> 3) * buf->stride + (mv.x >> 3); |
108 | 0 | return &buf->buf[offset]; |
109 | 0 | } |
110 | | |
111 | | // Calculates the variance of prediction residue. |
112 | | static int svt_upsampled_pref_error(MacroBlockD* xd, const struct AV1Common* const cm, const Mv* this_mv, |
113 | 0 | const SUBPEL_SEARCH_VAR_PARAMS* var_params, unsigned int* sse) { |
114 | 0 | const AomVarianceFnPtr* vfp = var_params->vfp; |
115 | 0 | const SUBPEL_SEARCH_TYPE subpel_search_type = var_params->subpel_search_type; |
116 | |
|
117 | 0 | const MSBuffers* ms_buffers = &var_params->ms_buffers; |
118 | 0 | const uint8_t* src = ms_buffers->src->buf; |
119 | 0 | const uint8_t* ref = svt_get_buf_from_mv(ms_buffers->ref, *this_mv); |
120 | 0 | const int src_stride = ms_buffers->src->stride; |
121 | 0 | const int ref_stride = ms_buffers->ref->stride; |
122 | 0 | const int w = var_params->w; |
123 | 0 | const int h = var_params->h; |
124 | 0 | const int mi_row = xd->mi_row; |
125 | 0 | const int mi_col = xd->mi_col; |
126 | 0 | const int subpel_x_q3 = svt_get_subpel_part(this_mv->x); |
127 | 0 | const int subpel_y_q3 = svt_get_subpel_part(this_mv->y); |
128 | |
|
129 | 0 | unsigned int besterr; |
130 | 0 | { |
131 | 0 | DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); |
132 | |
|
133 | 0 | { |
134 | 0 | svt_aom_upsampled_pred(xd, |
135 | 0 | cm, |
136 | 0 | mi_row, |
137 | 0 | mi_col, |
138 | 0 | this_mv, |
139 | 0 | pred, |
140 | 0 | w, |
141 | 0 | h, |
142 | 0 | subpel_x_q3, |
143 | 0 | subpel_y_q3, |
144 | 0 | ref, |
145 | 0 | ref_stride, |
146 | 0 | subpel_search_type); |
147 | 0 | } |
148 | 0 | besterr = vfp->vf(pred, w, src, src_stride, sse); |
149 | 0 | } |
150 | |
|
151 | 0 | return besterr; |
152 | 0 | } |
153 | | |
154 | | // Estimates the variance of prediction residue using bilinear filter for fast |
155 | | // search. |
156 | | static INLINE int svt_estimated_pref_error(const Mv* this_mv, const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
157 | 0 | unsigned int* sse) { |
158 | 0 | const AomVarianceFnPtr* vfp = var_params->vfp; |
159 | |
|
160 | 0 | const MSBuffers* ms_buffers = &var_params->ms_buffers; |
161 | 0 | const uint8_t* src = ms_buffers->src->buf; |
162 | 0 | const uint8_t* ref = svt_get_buf_from_mv(ms_buffers->ref, *this_mv); |
163 | 0 | const int src_stride = ms_buffers->src->stride; |
164 | 0 | const int ref_stride = ms_buffers->ref->stride; |
165 | |
|
166 | 0 | const int subpel_x_q3 = svt_get_subpel_part(this_mv->x); |
167 | 0 | const int subpel_y_q3 = svt_get_subpel_part(this_mv->y); |
168 | | |
169 | | // TODO: port other variance-related functions |
170 | 0 | return vfp->svf(ref, ref_stride, subpel_x_q3, subpel_y_q3, src, src_stride, sse); |
171 | 0 | } |
172 | | |
173 | | // Estimates whether this_mv is better than best_mv. This function incorporates |
174 | | // both prediction error and residue into account. It is suffixed "fast" because |
175 | | // it uses bilinear filter to estimate the prediction. |
176 | | static INLINE unsigned int svt_check_better_fast(MacroBlockD* xd, const struct AV1Common* const cm, const Mv* this_mv, |
177 | | Mv* best_mv, const SubpelMvLimits* mv_limits, |
178 | | const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
179 | | const svt_mv_cost_param* mv_cost_params, unsigned int* besterr, |
180 | | unsigned int* sse1, int* distortion, int* has_better_mv, |
181 | 0 | int is_scaled) { |
182 | 0 | unsigned int cost; |
183 | 0 | if (svt_av1_is_subpelmv_in_range(mv_limits, *this_mv)) { |
184 | 0 | unsigned int sse; |
185 | 0 | int thismse; |
186 | 0 | cost = svt_mv_err_cost_(this_mv, mv_cost_params); |
187 | 0 | if (mv_cost_params->mv_cost_type == MV_COST_OPT) { |
188 | 0 | int64_t bestcost = *distortion + cost; |
189 | 0 | if (bestcost > (((int64_t)*besterr * (int64_t)mv_cost_params->early_exit_th) / 1000)) { |
190 | 0 | return (uint32_t)bestcost; |
191 | 0 | } |
192 | 0 | } |
193 | | // TODO: add estimated func |
194 | 0 | if (is_scaled) { |
195 | 0 | thismse = svt_upsampled_pref_error(xd, cm, this_mv, var_params, &sse); |
196 | 0 | } else { |
197 | 0 | thismse = svt_estimated_pref_error(this_mv, var_params, &sse); |
198 | 0 | } |
199 | 0 | cost += thismse; |
200 | 0 | int weight = 100; |
201 | 0 | if (var_params->bias_fp && (*best_mv).x % 8 == 0 && (*best_mv).y % 8 == 0) { |
202 | 0 | weight = var_params->bias_fp; |
203 | 0 | } |
204 | 0 | if ((((uint64_t)cost * weight) / 100) < *besterr) { |
205 | 0 | *besterr = cost; |
206 | 0 | *best_mv = *this_mv; |
207 | 0 | *distortion = thismse; |
208 | 0 | *sse1 = sse; |
209 | 0 | *has_better_mv |= 1; |
210 | 0 | } |
211 | 0 | } else { |
212 | 0 | cost = INT_MAX; |
213 | 0 | } |
214 | 0 | return cost; |
215 | 0 | } |
216 | | |
217 | | // Checks whether this_mv is better than best_mv. This function incorporates |
218 | | // both prediction error and residue into account. |
219 | | static AOM_FORCE_INLINE unsigned int svt_check_better(MacroBlockD* xd, const struct AV1Common* const cm, |
220 | | const Mv* this_mv, Mv* best_mv, const SubpelMvLimits* mv_limits, |
221 | | const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
222 | | const svt_mv_cost_param* mv_cost_params, unsigned int* besterr, |
223 | 0 | unsigned int* sse1, int* distortion, int* is_better) { |
224 | 0 | unsigned int cost; |
225 | 0 | if (svt_av1_is_subpelmv_in_range(mv_limits, *this_mv)) { |
226 | 0 | unsigned int sse; |
227 | 0 | int thismse; |
228 | 0 | thismse = svt_upsampled_pref_error(xd, cm, this_mv, var_params, &sse); |
229 | 0 | cost = svt_mv_err_cost_(this_mv, mv_cost_params); |
230 | 0 | cost += thismse; |
231 | 0 | int weight = 100; |
232 | 0 | if (var_params->bias_fp && (*best_mv).x % 8 == 0 && (*best_mv).y % 8 == 0) { |
233 | 0 | weight = var_params->bias_fp; |
234 | 0 | } |
235 | 0 | if ((((uint64_t)cost * weight) / 100) < *besterr) { |
236 | 0 | *besterr = cost; |
237 | 0 | *best_mv = *this_mv; |
238 | 0 | *distortion = thismse; |
239 | 0 | *sse1 = sse; |
240 | 0 | *is_better |= 1; |
241 | 0 | } |
242 | 0 | } else { |
243 | 0 | cost = INT_MAX; |
244 | 0 | } |
245 | 0 | return cost; |
246 | 0 | } |
247 | | |
248 | | static INLINE Mv get_best_diag_step(int step_size, unsigned int left_cost, unsigned int right_cost, |
249 | | unsigned int up_cost, unsigned int down_cost) { |
250 | | const Mv diag_step = { |
251 | | {left_cost <= right_cost ? -step_size : step_size, up_cost <= down_cost ? -step_size : step_size}}; |
252 | | |
253 | | return diag_step; |
254 | | } |
255 | | |
256 | | static AOM_FORCE_INLINE Mv svt_first_level_check(MacroBlockD* xd, const struct AV1Common* const cm, const Mv this_mv, |
257 | | Mv* best_mv, const int hstep, const SubpelMvLimits* mv_limits, |
258 | | const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
259 | | const svt_mv_cost_param* mv_cost_params, unsigned int* besterr, |
260 | 0 | unsigned int* sse1, int* distortion) { |
261 | 0 | int dummy = 0; |
262 | 0 | const Mv left_mv = {{this_mv.x - hstep, this_mv.y}}; |
263 | 0 | const Mv right_mv = {{this_mv.x + hstep, this_mv.y}}; |
264 | 0 | const Mv top_mv = {{this_mv.x, this_mv.y - hstep}}; |
265 | 0 | const Mv bottom_mv = {{this_mv.x, this_mv.y + hstep}}; |
266 | |
|
267 | 0 | const unsigned int left = svt_check_better( |
268 | 0 | xd, cm, &left_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy); |
269 | 0 | const unsigned int right = svt_check_better( |
270 | 0 | xd, cm, &right_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy); |
271 | 0 | const unsigned int up = svt_check_better( |
272 | 0 | xd, cm, &top_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy); |
273 | 0 | const unsigned int down = svt_check_better( |
274 | 0 | xd, cm, &bottom_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy); |
275 | |
|
276 | 0 | const Mv diag_step = get_best_diag_step(hstep, left, right, up, down); |
277 | 0 | const Mv diag_mv = {{this_mv.x + diag_step.x, this_mv.y + diag_step.y}}; |
278 | | |
279 | | // Check the diagonal direction with the best mv |
280 | 0 | svt_check_better( |
281 | 0 | xd, cm, &diag_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy); |
282 | |
|
283 | 0 | return diag_step; |
284 | 0 | } |
285 | | |
286 | | // A newer version of second level check that gives better quality. |
287 | | // TODO(chiyotsai@google.com): evaluate this on subpel_search_types different |
288 | | // from av1_find_best_sub_pixel_tree |
289 | | static AOM_FORCE_INLINE void svt_second_level_check_v2(MacroBlockD* xd, const struct AV1Common* const cm, |
290 | | const Mv this_mv, Mv diag_step, Mv* best_mv, |
291 | | const SubpelMvLimits* mv_limits, |
292 | | const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
293 | | const svt_mv_cost_param* mv_cost_params, unsigned int* besterr, |
294 | 0 | unsigned int* sse1, int* distortion, int is_scaled) { |
295 | 0 | assert(best_mv->y == this_mv.y + diag_step.y || best_mv->x == this_mv.x + diag_step.x); |
296 | 0 | if (CHECK_MV_EQUAL(this_mv, *best_mv)) { |
297 | 0 | return; |
298 | 0 | } else if (this_mv.y == best_mv->y) { |
299 | | // Search away from diagonal step since diagonal search did not provide any |
300 | | // improvement |
301 | 0 | diag_step.y *= -1; |
302 | 0 | } else if (this_mv.x == best_mv->x) { |
303 | 0 | diag_step.x *= -1; |
304 | 0 | } |
305 | | |
306 | 0 | const Mv row_bias_mv = {{best_mv->x, best_mv->y + diag_step.y}}; |
307 | 0 | const Mv col_bias_mv = {{best_mv->x + diag_step.x, best_mv->y}}; |
308 | 0 | const Mv diag_bias_mv = {{best_mv->x + diag_step.x, best_mv->y + diag_step.y}}; |
309 | 0 | int has_better_mv = 0; |
310 | 0 | svt_check_better(xd, |
311 | 0 | cm, |
312 | 0 | &row_bias_mv, |
313 | 0 | best_mv, |
314 | 0 | mv_limits, |
315 | 0 | var_params, |
316 | 0 | mv_cost_params, |
317 | 0 | besterr, |
318 | 0 | sse1, |
319 | 0 | distortion, |
320 | 0 | &has_better_mv); |
321 | 0 | svt_check_better(xd, |
322 | 0 | cm, |
323 | 0 | &col_bias_mv, |
324 | 0 | best_mv, |
325 | 0 | mv_limits, |
326 | 0 | var_params, |
327 | 0 | mv_cost_params, |
328 | 0 | besterr, |
329 | 0 | sse1, |
330 | 0 | distortion, |
331 | 0 | &has_better_mv); |
332 | | |
333 | | // Do an additional search if the second iteration gives a better mv |
334 | 0 | if (has_better_mv) { |
335 | 0 | svt_check_better(xd, |
336 | 0 | cm, |
337 | 0 | &diag_bias_mv, |
338 | 0 | best_mv, |
339 | 0 | mv_limits, |
340 | 0 | var_params, |
341 | 0 | mv_cost_params, |
342 | 0 | besterr, |
343 | 0 | sse1, |
344 | 0 | distortion, |
345 | 0 | &has_better_mv); |
346 | 0 | } |
347 | 0 | (void)is_scaled; |
348 | 0 | } |
349 | | |
350 | | // Gets the error at the beginning when the mv has fullpel precision |
351 | | static unsigned int svt_upsampled_setup_center_error(const Mv* bestmv, const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
352 | | const svt_mv_cost_param* mv_cost_params, |
353 | 0 | unsigned int* distortion) { |
354 | 0 | const MSBuffers* ms_buffers = &var_params->ms_buffers; |
355 | 0 | const uint8_t* ref = svt_get_buf_from_mv(ms_buffers->ref, *bestmv); |
356 | 0 | *distortion = var_params->vfp->vf( |
357 | 0 | ref, ms_buffers->ref->stride, ms_buffers->src->buf, ms_buffers->src->stride, distortion); |
358 | 0 | return *distortion + svt_mv_err_cost_(bestmv, mv_cost_params); |
359 | 0 | } |
360 | | |
361 | | // Searches the four cardinal direction for a better mv, then follows up with a |
362 | | // search in the best quadrant. This uses bilinear filter to speed up the |
363 | | // calculation. |
364 | | static AOM_FORCE_INLINE Mv first_level_check_fast(MacroBlockD* xd, const struct AV1Common* const cm, const Mv this_mv, |
365 | | Mv* best_mv, int hstep, const SubpelMvLimits* mv_limits, |
366 | | const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
367 | | const svt_mv_cost_param* mv_cost_params, unsigned int* besterr, |
368 | | unsigned int orgerr, unsigned int* sse1, int* distortion, |
369 | | int is_scaled) { |
370 | | // Check the four cardinal directions |
371 | | const Mv left_mv = {{this_mv.x - hstep, this_mv.y}}; |
372 | | int dummy = 0; |
373 | | const unsigned int left = svt_check_better_fast( |
374 | | xd, cm, &left_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy, is_scaled); |
375 | | |
376 | | const Mv right_mv = {{this_mv.x + hstep, this_mv.y}}; |
377 | | const unsigned int right = svt_check_better_fast(xd, |
378 | | cm, |
379 | | &right_mv, |
380 | | best_mv, |
381 | | mv_limits, |
382 | | var_params, |
383 | | mv_cost_params, |
384 | | besterr, |
385 | | sse1, |
386 | | distortion, |
387 | | &dummy, |
388 | | is_scaled); |
389 | | |
390 | | const Mv top_mv = {{this_mv.x, this_mv.y - hstep}}; |
391 | | const unsigned int up = svt_check_better_fast( |
392 | | xd, cm, &top_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy, is_scaled); |
393 | | |
394 | | const Mv bottom_mv = {{this_mv.x, this_mv.y + hstep}}; |
395 | | const unsigned int down = svt_check_better_fast(xd, |
396 | | cm, |
397 | | &bottom_mv, |
398 | | best_mv, |
399 | | mv_limits, |
400 | | var_params, |
401 | | mv_cost_params, |
402 | | besterr, |
403 | | sse1, |
404 | | distortion, |
405 | | &dummy, |
406 | | is_scaled); |
407 | | |
408 | | const Mv diag_step = get_best_diag_step(hstep, left, right, up, down); |
409 | | const Mv diag_mv = {{this_mv.x + diag_step.x, this_mv.y + diag_step.y}}; |
410 | | if (*besterr >= orgerr) { |
411 | | return diag_step; |
412 | | } |
413 | | // Check the diagonal direction with the best mv |
414 | | svt_check_better_fast( |
415 | | xd, cm, &diag_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr, sse1, distortion, &dummy, is_scaled); |
416 | | |
417 | | return diag_step; |
418 | | } |
419 | | |
420 | | // Performs a following up search after first_level_check_fast is called. This |
421 | | // performs two extra chess pattern searches in the best quadrant. |
422 | | static AOM_FORCE_INLINE void second_level_check_fast(MacroBlockD* xd, const struct AV1Common* const cm, |
423 | | const Mv this_mv, const Mv diag_step, Mv* best_mv, int hstep, |
424 | | const SubpelMvLimits* mv_limits, |
425 | | const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
426 | | const svt_mv_cost_param* mv_cost_params, unsigned int* besterr, |
427 | | unsigned int* sse1, int* distortion, int is_scaled) { |
428 | | assert(diag_step.y == hstep || diag_step.y == -hstep); |
429 | | assert(diag_step.x == hstep || diag_step.x == -hstep); |
430 | | const int tr = this_mv.y; |
431 | | const int tc = this_mv.x; |
432 | | const int br = best_mv->y; |
433 | | const int bc = best_mv->x; |
434 | | int dummy = 0; |
435 | | if (tr != br && tc != bc) { |
436 | | assert(diag_step.x == bc - tc); |
437 | | assert(diag_step.y == br - tr); |
438 | | const Mv chess_mv_1 = {{bc + diag_step.x, br}}; |
439 | | const Mv chess_mv_2 = {{bc, br + diag_step.y}}; |
440 | | svt_check_better_fast(xd, |
441 | | cm, |
442 | | &chess_mv_1, |
443 | | best_mv, |
444 | | mv_limits, |
445 | | var_params, |
446 | | mv_cost_params, |
447 | | besterr, |
448 | | sse1, |
449 | | distortion, |
450 | | &dummy, |
451 | | is_scaled); |
452 | | |
453 | | svt_check_better_fast(xd, |
454 | | cm, |
455 | | &chess_mv_2, |
456 | | best_mv, |
457 | | mv_limits, |
458 | | var_params, |
459 | | mv_cost_params, |
460 | | besterr, |
461 | | sse1, |
462 | | distortion, |
463 | | &dummy, |
464 | | is_scaled); |
465 | | } else if (tr == br && tc != bc) { |
466 | | assert(diag_step.x == bc - tc); |
467 | | // Continue searching in the best direction |
468 | | const Mv bottom_long_mv = {{bc + diag_step.x, br + hstep}}; |
469 | | const Mv top_long_mv = {{bc + diag_step.x, br - hstep}}; |
470 | | svt_check_better_fast(xd, |
471 | | cm, |
472 | | &bottom_long_mv, |
473 | | best_mv, |
474 | | mv_limits, |
475 | | var_params, |
476 | | mv_cost_params, |
477 | | besterr, |
478 | | sse1, |
479 | | distortion, |
480 | | &dummy, |
481 | | is_scaled); |
482 | | svt_check_better_fast(xd, |
483 | | cm, |
484 | | &top_long_mv, |
485 | | best_mv, |
486 | | mv_limits, |
487 | | var_params, |
488 | | mv_cost_params, |
489 | | besterr, |
490 | | sse1, |
491 | | distortion, |
492 | | &dummy, |
493 | | is_scaled); |
494 | | |
495 | | // Search in the direction opposite of the best quadrant |
496 | | const Mv rev_mv = {{bc, br - diag_step.y}}; |
497 | | svt_check_better_fast(xd, |
498 | | cm, |
499 | | &rev_mv, |
500 | | best_mv, |
501 | | mv_limits, |
502 | | var_params, |
503 | | mv_cost_params, |
504 | | besterr, |
505 | | sse1, |
506 | | distortion, |
507 | | &dummy, |
508 | | is_scaled); |
509 | | } else if (tr != br && tc == bc) { |
510 | | assert(diag_step.y == br - tr); |
511 | | // Continue searching in the best direction |
512 | | const Mv right_long_mv = {{bc + hstep, br + diag_step.y}}; |
513 | | const Mv left_long_mv = {{bc - hstep, br + diag_step.y}}; |
514 | | svt_check_better_fast(xd, |
515 | | cm, |
516 | | &right_long_mv, |
517 | | best_mv, |
518 | | mv_limits, |
519 | | var_params, |
520 | | mv_cost_params, |
521 | | besterr, |
522 | | sse1, |
523 | | distortion, |
524 | | &dummy, |
525 | | is_scaled); |
526 | | svt_check_better_fast(xd, |
527 | | cm, |
528 | | &left_long_mv, |
529 | | best_mv, |
530 | | mv_limits, |
531 | | var_params, |
532 | | mv_cost_params, |
533 | | besterr, |
534 | | sse1, |
535 | | distortion, |
536 | | &dummy, |
537 | | is_scaled); |
538 | | |
539 | | // Search in the direction opposite of the best quadrant |
540 | | const Mv rev_mv = {{bc - diag_step.x, br}}; |
541 | | svt_check_better_fast(xd, |
542 | | cm, |
543 | | &rev_mv, |
544 | | best_mv, |
545 | | mv_limits, |
546 | | var_params, |
547 | | mv_cost_params, |
548 | | besterr, |
549 | | sse1, |
550 | | distortion, |
551 | | &dummy, |
552 | | is_scaled); |
553 | | } |
554 | | } |
555 | | |
556 | | // Combines first level check and second level check when applicable. This first |
557 | | // searches the four cardinal directions, and perform several |
558 | | // diagonal/chess-pattern searches in the best quadrant. |
559 | | static AOM_FORCE_INLINE void two_level_checks_fast(MacroBlockD* xd, const struct AV1Common* const cm, const Mv this_mv, |
560 | | Mv* best_mv, int hstep, const SubpelMvLimits* mv_limits, |
561 | | const SUBPEL_SEARCH_VAR_PARAMS* var_params, |
562 | | const svt_mv_cost_param* mv_cost_params, unsigned int* besterr, |
563 | | unsigned int orgerr, unsigned int* sse1, int* distortion, int iters, |
564 | | int is_scaled) { |
565 | | const Mv diag_step = first_level_check_fast(xd, |
566 | | cm, |
567 | | this_mv, |
568 | | best_mv, |
569 | | hstep, |
570 | | mv_limits, |
571 | | var_params, |
572 | | mv_cost_params, |
573 | | besterr, |
574 | | orgerr, |
575 | | sse1, |
576 | | distortion, |
577 | | is_scaled); |
578 | | if (*besterr < orgerr) { |
579 | | if (iters > 1) { |
580 | | second_level_check_fast(xd, |
581 | | cm, |
582 | | this_mv, |
583 | | diag_step, |
584 | | best_mv, |
585 | | hstep, |
586 | | mv_limits, |
587 | | var_params, |
588 | | mv_cost_params, |
589 | | besterr, |
590 | | sse1, |
591 | | distortion, |
592 | | is_scaled); |
593 | | } |
594 | | } |
595 | | } |
596 | | |
597 | | extern const uint8_t svt_aom_eb_av1_var_offs[MAX_SB_SIZE]; |
598 | | |
599 | | int svt_av1_find_best_sub_pixel_tree_pruned(void* ictx, MacroBlockD* xd, const struct AV1Common* const cm, |
600 | | SUBPEL_MOTION_SEARCH_PARAMS* ms_params, Mv start_mv, Mv* bestmv, |
601 | | int* distortion, unsigned int* sse1, int qp, BlockSize bsize, |
602 | 0 | uint8_t early_neigh_check_exit) { |
603 | 0 | (void)ictx; |
604 | 0 | (void)cm; |
605 | 0 | const int allow_hp = ms_params->allow_hp; |
606 | 0 | const int forced_stop = ms_params->forced_stop; |
607 | 0 | const int iters_per_step = ms_params->iters_per_step; |
608 | 0 | const SubpelMvLimits* mv_limits = &ms_params->mv_limits; |
609 | 0 | const svt_mv_cost_param* mv_cost_params = &ms_params->mv_cost_params; |
610 | 0 | const SUBPEL_SEARCH_VAR_PARAMS* var_params = &ms_params->var_params; |
611 | 0 | int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel |
612 | 0 | unsigned int besterr; |
613 | 0 | unsigned int org_error; |
614 | 0 | *bestmv = start_mv; |
615 | |
|
616 | 0 | const int is_scaled = 0; |
617 | 0 | besterr = svt_upsampled_setup_center_error(bestmv, var_params, mv_cost_params, (unsigned int*)distortion); |
618 | |
|
619 | 0 | if (ictx != NULL && ms_params->search_stage == SPEL_ME) { |
620 | 0 | ModeDecisionContext* ctx = (ModeDecisionContext*)ictx; |
621 | 0 | ctx->fp_me_dist[ms_params->list_idx][ms_params->ref_idx] = besterr; |
622 | 0 | } |
623 | |
|
624 | 0 | if (early_neigh_check_exit) { |
625 | 0 | return besterr; |
626 | 0 | } |
627 | 0 | const uint64_t th_normalizer = (uint64_t)(((var_params->w * var_params->h) << 5) * |
628 | 0 | (uint64_t)ms_params->abs_th_mult); |
629 | 0 | if ((uint64_t)qp * besterr < th_normalizer) { |
630 | 0 | return besterr; |
631 | 0 | } |
632 | | // How many steps to take. A round of 0 means fullpel search only, 1 means |
633 | | // half-pel, and so on. |
634 | 0 | const int round = AOMMIN(FULL_PEL - forced_stop, 3 - !allow_hp); |
635 | | |
636 | | // If forced_stop is FULL_PEL, return. |
637 | 0 | if (!round) { |
638 | 0 | return besterr; |
639 | 0 | } |
640 | | // Exit subpel search if the variance of the full-pel predicted samples is low (i.e. where likely interpolation will not modify the integer samples) |
641 | 0 | if (ms_params->pred_variance_th) { |
642 | 0 | const MSBuffers* ms_buffers = &var_params->ms_buffers; |
643 | 0 | const uint8_t* ref = svt_get_buf_from_mv(ms_buffers->ref, *bestmv); |
644 | 0 | unsigned int sse; |
645 | 0 | const unsigned int var = var_params->vfp->vf(ref, ms_buffers->ref->stride, svt_aom_eb_av1_var_offs, 0, &sse); |
646 | 0 | int block_var = ROUND_POWER_OF_TWO(var, eb_num_pels_log2_lookup[bsize]); |
647 | |
|
648 | 0 | if (block_var < ms_params->pred_variance_th) { |
649 | 0 | return besterr; |
650 | 0 | } |
651 | 0 | } |
652 | 0 | if (ms_params->skip_diag_refinement >= 4) { |
653 | 0 | org_error = 0; |
654 | 0 | } else { |
655 | 0 | unsigned int demo = ms_params->skip_diag_refinement >= 2 |
656 | 0 | ? ((var_params->w >= 64 || var_params->h >= 64) ? 2 : 1) |
657 | 0 | : 1; |
658 | 0 | org_error = ms_params->skip_diag_refinement ? besterr / demo : INT_MAX; |
659 | 0 | } |
660 | 0 | for (int iter = 0; iter < round; ++iter) { |
661 | 0 | unsigned int prev_besterr = besterr; |
662 | 0 | two_level_checks_fast(xd, |
663 | 0 | cm, |
664 | 0 | start_mv, |
665 | 0 | bestmv, |
666 | 0 | hstep, |
667 | 0 | mv_limits, |
668 | 0 | var_params, |
669 | 0 | mv_cost_params, |
670 | 0 | &besterr, |
671 | 0 | org_error, |
672 | 0 | sse1, |
673 | 0 | distortion, |
674 | 0 | iters_per_step, |
675 | 0 | is_scaled); |
676 | 0 | hstep >>= 1; |
677 | 0 | start_mv = *bestmv; |
678 | 0 | if (ms_params->skip_diag_refinement && iter < QUARTER_PEL) { |
679 | 0 | org_error = MIN(org_error, besterr); |
680 | 0 | } |
681 | 0 | int32_t deviation = (((int64_t)MAX(besterr, 1) - (int64_t)MAX(prev_besterr, 1)) * 100) / |
682 | 0 | (int64_t)MAX(prev_besterr, 1); |
683 | 0 | if (deviation >= ms_params->round_dev_th) { |
684 | 0 | return besterr; |
685 | 0 | } |
686 | 0 | } |
687 | 0 | return besterr; |
688 | 0 | } |
689 | | |
690 | | int svt_av1_find_best_sub_pixel_tree(void* ictx, MacroBlockD* xd, const struct AV1Common* const cm, |
691 | | SUBPEL_MOTION_SEARCH_PARAMS* ms_params, Mv start_mv, Mv* bestmv, int* distortion, |
692 | 0 | unsigned int* sse1, int qp, BlockSize bsize, uint8_t early_neigh_check_exit) { |
693 | 0 | ModeDecisionContext* ctx = (ModeDecisionContext*)ictx; |
694 | 0 | const int allow_hp = ms_params->allow_hp; |
695 | 0 | const int forced_stop = ms_params->forced_stop; |
696 | 0 | const int iters_per_step = ms_params->iters_per_step; |
697 | |
|
698 | 0 | svt_mv_cost_param* mv_cost_params = &ms_params->mv_cost_params; |
699 | 0 | const SUBPEL_SEARCH_VAR_PARAMS* var_params = &ms_params->var_params; |
700 | 0 | const SubpelMvLimits* mv_limits = &ms_params->mv_limits; |
701 | | |
702 | | // How many steps to take. A round of 0 means fullpel search only, 1 means |
703 | | // half-pel, and so on. |
704 | 0 | int round = AOMMIN(FULL_PEL - forced_stop, 3 - !allow_hp); |
705 | 0 | int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel |
706 | |
|
707 | 0 | unsigned int besterr; |
708 | |
|
709 | 0 | *bestmv = start_mv; |
710 | 0 | const int is_scaled = 0; |
711 | 0 | besterr = svt_upsampled_setup_center_error(bestmv, var_params, mv_cost_params, (unsigned int*)distortion); |
712 | 0 | if (ctx != NULL && ms_params->search_stage == SPEL_ME) { |
713 | 0 | ctx->fp_me_dist[ms_params->list_idx][ms_params->ref_idx] = besterr; |
714 | 0 | if (ctx->pd_pass == PD_PASS_1 && ctx->md_subpel_me_ctrls.mvp_th > 0) { |
715 | 0 | unsigned int best_mvperr = ctx->best_fp_mvp_dist[ms_params->list_idx][ms_params->ref_idx]; |
716 | 0 | int best_mvp_idx = ctx->best_fp_mvp_idx[ms_params->list_idx][ms_params->ref_idx]; |
717 | 0 | const int mvp_err = best_mvperr + 1; |
718 | 0 | const int me_err = besterr + 1; |
719 | 0 | const int32_t deviation = ((me_err - mvp_err) * 100) / me_err; |
720 | 0 | if (deviation >= ctx->md_subpel_me_ctrls.mvp_th) { |
721 | 0 | round = 1; |
722 | 0 | } else if (ABS(bestmv->x - ctx->mvp_array[ms_params->list_idx][ms_params->ref_idx][best_mvp_idx].x) > |
723 | 0 | ctx->md_subpel_me_ctrls.hp_mv_th || |
724 | 0 | ABS(bestmv->y - ctx->mvp_array[ms_params->list_idx][ms_params->ref_idx][best_mvp_idx].y) > |
725 | 0 | ctx->md_subpel_me_ctrls.hp_mv_th) { |
726 | 0 | round = MIN(round, 2); |
727 | 0 | } |
728 | 0 | } |
729 | 0 | } |
730 | 0 | if (early_neigh_check_exit) { |
731 | 0 | return besterr; |
732 | 0 | } |
733 | 0 | const uint64_t th_normalizer = (uint64_t)(((var_params->w * var_params->h) << 5) * |
734 | 0 | (uint64_t)ms_params->abs_th_mult); |
735 | 0 | if ((uint64_t)qp * besterr < th_normalizer) { |
736 | 0 | return besterr; |
737 | 0 | } |
738 | | |
739 | | // If forced_stop is FULL_PEL, return. |
740 | 0 | if (!round) { |
741 | 0 | return besterr; |
742 | 0 | } |
743 | | // Exit subpel search if the variance of the full-pel predicted samples is low (i.e. where likely interpolation will not modify the integer samples) |
744 | 0 | if (ms_params->pred_variance_th) { |
745 | 0 | const MSBuffers* ms_buffers = &var_params->ms_buffers; |
746 | 0 | const uint8_t* ref = svt_get_buf_from_mv(ms_buffers->ref, *bestmv); |
747 | 0 | unsigned int sse; |
748 | 0 | const unsigned int var = var_params->vfp->vf(ref, ms_buffers->ref->stride, svt_aom_eb_av1_var_offs, 0, &sse); |
749 | 0 | int block_var = ROUND_POWER_OF_TWO(var, eb_num_pels_log2_lookup[bsize]); |
750 | |
|
751 | 0 | if (block_var < ms_params->pred_variance_th) { |
752 | 0 | return besterr; |
753 | 0 | } |
754 | 0 | } |
755 | 0 | for (int iter = 0; iter < round; ++iter) { |
756 | 0 | Mv iter_center_mv = *bestmv; |
757 | 0 | Mv diag_step; |
758 | 0 | diag_step = svt_first_level_check( |
759 | 0 | xd, cm, iter_center_mv, bestmv, hstep, mv_limits, var_params, mv_cost_params, &besterr, sse1, distortion); |
760 | | |
761 | | // Check diagonal sub-pixel position |
762 | 0 | if (!CHECK_MV_EQUAL(iter_center_mv, *bestmv) && iters_per_step > 1) { |
763 | 0 | svt_second_level_check_v2(xd, |
764 | 0 | cm, |
765 | 0 | iter_center_mv, |
766 | 0 | diag_step, |
767 | 0 | bestmv, |
768 | 0 | mv_limits, |
769 | 0 | var_params, |
770 | 0 | mv_cost_params, |
771 | 0 | &besterr, |
772 | 0 | sse1, |
773 | 0 | distortion, |
774 | 0 | is_scaled); |
775 | 0 | } |
776 | |
|
777 | 0 | hstep >>= 1; |
778 | 0 | } |
779 | |
|
780 | 0 | return besterr; |
781 | 0 | } |
782 | | |
783 | | // ============================================================================= |
784 | | // SVT Functions |
785 | | // ============================================================================= |
786 | 0 | int svt_aom_fp_mv_err_cost(const Mv* mv, const svt_mv_cost_param* mv_cost_params) { |
787 | 0 | return svt_mv_err_cost_(mv, mv_cost_params); |
788 | 0 | } |