/work/svt-av1/Source/Lib/Codec/mode_decision.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 3-Clause Clear License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | |
13 | | /*************************************** |
14 | | * Includes |
15 | | ***************************************/ |
16 | | #include <stdbool.h> |
17 | | #include <stdio.h> |
18 | | #include <stdlib.h> |
19 | | #include <limits.h> |
20 | | |
21 | | #include "common_utils.h" |
22 | | #include "definitions.h" |
23 | | #include "sequence_control_set.h" |
24 | | #include "mode_decision.h" |
25 | | #include "md_process.h" |
26 | | #include "motion_estimation.h" |
27 | | |
28 | | #include "av1me.h" |
29 | | #include "hash.h" |
30 | | #include "enc_inter_prediction.h" |
31 | | #include "rd_cost.h" |
32 | | #include "aom_dsp_rtcd.h" |
33 | | #include "svt_log.h" |
34 | | #include "resize.h" |
35 | | #include "mcomp.h" |
36 | | #include "ac_bias.h" |
37 | | #include "src_ops_process.h" |
38 | | #include "utility.h" |
39 | | #include "adaptive_mv_pred.h" |
40 | | #include "av1me.h" |
41 | | static const uint32_t intra_luma_to_chroma[INTRA_MODES] = { |
42 | | UV_DC_PRED, // Average of above and left pixels |
43 | | UV_V_PRED, // Vertical |
44 | | UV_H_PRED, // Horizontal |
45 | | UV_D45_PRED, // Directional 45 degree |
46 | | UV_D135_PRED, // Directional 135 degree |
47 | | UV_D113_PRED, // Directional 113 degree |
48 | | UV_D157_PRED, // Directional 157 degree |
49 | | UV_D203_PRED, // Directional 203 degree |
50 | | UV_D67_PRED, // Directional 67 degree |
51 | | UV_SMOOTH_PRED, // Combination of horizontal and vertical interpolation |
52 | | UV_SMOOTH_V_PRED, // Vertical interpolation |
53 | | UV_SMOOTH_H_PRED, // Horizontal interpolation |
54 | | UV_PAETH_PRED, // Predict from the direction of smallest gradient |
55 | | }; |
56 | | |
57 | | void calc_target_weighted_pred(PictureControlSet* pcs, ModeDecisionContext* ctx, const Av1Common* cm, |
58 | | const MacroBlockD* xd, int mi_row, int mi_col, const uint8_t* above, int above_stride, |
59 | | const uint8_t* left, int left_stride); |
60 | | #define INC_MD_CAND_CNT(cnt, max_can_count) \ |
61 | 253k | MULTI_LINE_MACRO_BEGIN \ |
62 | 253k | if (cnt + 1 < max_can_count) \ |
63 | 255k | cnt++; \ |
64 | 253k | else \ |
65 | 18.4E | SVT_ERROR("Mode decision candidate count exceeded"); \ |
66 | 253k | MULTI_LINE_MACRO_END |
67 | | |
68 | 0 | #define SUPERRES_INVALID_STATE 0x7fffffff |
69 | | |
70 | 3.71M | bool svt_av1_is_lossless_segment(PictureControlSet* pcs, int8_t segment_id) { |
71 | 3.71M | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
72 | 3.71M | if (frm_hdr->segmentation_params.segmentation_enabled) { |
73 | 0 | return pcs->lossless[segment_id]; |
74 | 3.71M | } else { |
75 | 3.71M | return pcs->lossless[0]; |
76 | 3.71M | } |
77 | 3.71M | } |
78 | | |
79 | 0 | static bool check_mv_validity(int16_t x_mv, int16_t y_mv, uint8_t need_shift) { |
80 | 0 | Mv mv; |
81 | | //go to 1/8th if input is 1/4pel |
82 | 0 | mv.y = y_mv << need_shift; |
83 | 0 | mv.x = x_mv << need_shift; |
84 | | /* AV1 limits |
85 | | -16384 < MV_x_in_1/8 or MV_y_in_1/8 < 16384 |
86 | | which means in full pel: |
87 | | -2048 < MV_x_in_full_pel or MV_y_in_full_pel < 2048 |
88 | | */ |
89 | 0 | if (!is_mv_valid(&mv)) { |
90 | 0 | return false; |
91 | 0 | } |
92 | 0 | return true; |
93 | 0 | } |
94 | | |
95 | | int svt_is_interintra_allowed(uint8_t enable_inter_intra, BlockSize bsize, PredictionMode mode, |
96 | 0 | const MvReferenceFrame ref_frame[2]) { |
97 | 0 | return enable_inter_intra && svt_aom_is_interintra_allowed_bsize((const BlockSize)bsize) && |
98 | 0 | svt_aom_is_interintra_allowed_mode(mode) && svt_aom_is_interintra_allowed_ref(ref_frame); |
99 | 0 | } |
100 | | |
101 | 0 | int svt_aom_filter_intra_allowed_bsize(BlockSize bs) { |
102 | 0 | return block_size_wide[bs] <= 32 && block_size_high[bs] <= 32; |
103 | 0 | } |
104 | | |
105 | 256k | int svt_aom_filter_intra_allowed(uint8_t enable_filter_intra, BlockSize bsize, uint8_t palette_size, uint32_t mode) { |
106 | 256k | return enable_filter_intra && mode == DC_PRED && palette_size == 0 && svt_aom_filter_intra_allowed_bsize(bsize); |
107 | 256k | } |
108 | | |
109 | | // returns the max inter-inter compound type based on settings and block size |
110 | 0 | static MD_COMP_TYPE get_tot_comp_types_bsize(MD_COMP_TYPE tot_comp_types, BlockSize bsize) { |
111 | 0 | return (svt_aom_get_wedge_params_bits(bsize) == 0) ? MIN(tot_comp_types, MD_COMP_WEDGE) : tot_comp_types; |
112 | 0 | } |
113 | | |
114 | | /* |
115 | | Get the ME offset for a given block (the offset used to locate the PA MVs from the parent PCS). |
116 | | */ |
117 | | uint32_t svt_aom_get_me_block_offset(const uint32_t org_x, const uint32_t org_y, const BlockSize bsize, |
118 | 246k | const uint8_t enable_me_8x8, const uint8_t enable_me_16x16) { |
119 | 246k | const int bwidth = block_size_wide[bsize]; |
120 | 246k | const int bheight = block_size_high[bsize]; |
121 | 246k | const uint32_t max_length = MAX(bwidth, bheight); |
122 | | |
123 | 246k | uint32_t me_idx = 0; |
124 | 246k | switch (max_length) { |
125 | 0 | case 4: |
126 | 241k | case 8: |
127 | 241k | me_idx++; |
128 | 241k | if (org_x & 8) { // (org_x % 16) / 8 |
129 | 117k | me_idx += 1; |
130 | 117k | } |
131 | 241k | if (org_y & 8) { // (org_y % 16) / 8 |
132 | 117k | me_idx += 2; |
133 | 117k | } |
134 | 241k | AOM_FALLTHROUGH_INTENDED; |
135 | 243k | case 16: |
136 | 243k | me_idx++; |
137 | 243k | if (org_x & 16) { // (org_x % 32) / 16 |
138 | 116k | me_idx += 5; |
139 | 116k | } |
140 | 243k | if (org_y & 16) { // (org_y % 32) / 16 |
141 | 115k | me_idx += 10; |
142 | 115k | } |
143 | 243k | AOM_FALLTHROUGH_INTENDED; |
144 | 243k | case 32: |
145 | 243k | me_idx++; |
146 | 243k | if (org_x & 32) { // (org_x % 64) / 32 |
147 | 113k | me_idx += 21; |
148 | 113k | } |
149 | 243k | if (org_y & 32) { // (org_y % 64) / 32 |
150 | 111k | me_idx += 42; |
151 | 111k | } |
152 | 243k | break; |
153 | 2.77k | default: |
154 | | // me_idx = 0; |
155 | 2.77k | break; |
156 | 246k | } |
157 | | |
158 | 246k | uint32_t me_block_offset = me_idx_85[me_idx]; // convert idx to me_idx |
159 | | |
160 | 246k | if (!enable_me_8x8) { |
161 | 246k | if (me_block_offset >= MAX_SB64_PU_COUNT_NO_8X8) { |
162 | 241k | me_block_offset = me_idx_85_8x8_to_16x16_conversion[me_block_offset - MAX_SB64_PU_COUNT_NO_8X8]; |
163 | 241k | } |
164 | 246k | assert(me_block_offset < 21); |
165 | 246k | if (!enable_me_16x16) { |
166 | 0 | if (me_block_offset >= MAX_SB64_PU_COUNT_WO_16X16) { |
167 | 0 | assert(me_block_offset < 21); |
168 | 0 | me_block_offset = me_idx_16x16_to_parent_32x32_conversion[me_block_offset - MAX_SB64_PU_COUNT_WO_16X16]; |
169 | 0 | } |
170 | 0 | } |
171 | 246k | } |
172 | | |
173 | 246k | return me_block_offset; |
174 | 246k | } |
175 | | |
176 | | //Given one reference frame identified by the pair (list_index,ref_index) |
177 | | //indicate if ME data is valid |
178 | | uint8_t svt_aom_is_me_data_present(uint32_t me_block_offset, uint32_t me_cand_offset, const MeSbResults* me_results, |
179 | 0 | uint8_t list_idx, uint8_t ref_idx) { |
180 | 0 | uint8_t total_me_cnt = me_results->total_me_candidate_index[me_block_offset]; |
181 | 0 | const MeCandidate* me_block_results = &me_results->me_candidate_array[me_cand_offset]; |
182 | 0 | for (uint32_t me_cand_i = 0; me_cand_i < total_me_cnt; ++me_cand_i) { |
183 | 0 | const MeCandidate* me_cand = &me_block_results[me_cand_i]; |
184 | 0 | assert(me_cand->direction <= 2); |
185 | 0 | if (me_cand->direction == 0 || me_cand->direction == 2) { |
186 | 0 | if (list_idx == me_cand->ref0_list && ref_idx == me_cand->ref_idx_l0) { |
187 | 0 | return 1; |
188 | 0 | } |
189 | 0 | } |
190 | 0 | if (me_cand->direction == 1 || me_cand->direction == 2) { |
191 | 0 | if (list_idx == me_cand->ref1_list && ref_idx == me_cand->ref_idx_l1) { |
192 | 0 | return 1; |
193 | 0 | } |
194 | 0 | } |
195 | 0 | } |
196 | 0 | return 0; |
197 | 0 | } |
198 | | |
199 | | /******************************************** |
200 | | * Constants |
201 | | ********************************************/ |
202 | | // 1 - Regular uni-pred , |
203 | | // 2 - Regular uni-pred + Wedge compound Inter Intra |
204 | | // 3 - Regular uni-pred + Wedge compound Inter Intra + Smooth compound Inter Intra |
205 | | |
206 | | #if CONFIG_ENABLE_OBMC |
207 | 0 | static bool warped_motion_mode_allowed(PictureControlSet* pcs, ModeDecisionContext* ctx) { |
208 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
209 | 0 | return frm_hdr->allow_warped_motion && has_overlappable_candidates(ctx->blk_ptr) && ctx->blk_geom->bwidth >= 8 && |
210 | 0 | ctx->blk_geom->bheight >= 8 && ctx->wm_ctrls.enabled; |
211 | 0 | } |
212 | | #endif |
213 | | MotionMode svt_aom_obmc_motion_mode_allowed( |
214 | | const PictureControlSet* pcs, ModeDecisionContext* ctx, const BlockSize bsize, |
215 | | uint8_t situation, // 0: candidate(s) preparation, 1: data preparation, 2: simple translation face-off |
216 | 0 | MvReferenceFrame rf0, MvReferenceFrame rf1, PredictionMode mode) { |
217 | 0 | if (ctx->obmc_ctrls.trans_face_off && !situation) { |
218 | 0 | return SIMPLE_TRANSLATION; |
219 | 0 | } |
220 | | // check if should cap the max block size for obmc |
221 | | |
222 | 0 | if (block_size_wide[bsize] > ctx->obmc_ctrls.max_blk_size || |
223 | 0 | block_size_high[bsize] > ctx->obmc_ctrls.max_blk_size) { |
224 | 0 | return SIMPLE_TRANSLATION; |
225 | 0 | } |
226 | 0 | if (!ctx->obmc_ctrls.enabled) { |
227 | 0 | return SIMPLE_TRANSLATION; |
228 | 0 | } |
229 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
230 | |
|
231 | 0 | if (!frm_hdr->is_motion_mode_switchable) { |
232 | 0 | return SIMPLE_TRANSLATION; |
233 | 0 | } |
234 | | |
235 | 0 | if (frm_hdr->force_integer_mv == 0) { |
236 | 0 | const TransformationType gm_type = pcs->ppcs->global_motion[rf0].wmtype; |
237 | 0 | if (is_global_mv_block(mode, bsize, gm_type)) { |
238 | 0 | return SIMPLE_TRANSLATION; |
239 | 0 | } |
240 | 0 | } |
241 | 0 | if (is_motion_variation_allowed_bsize(bsize) && is_inter_singleref_mode(mode) && rf1 != INTRA_FRAME && |
242 | 0 | !(rf1 > INTRA_FRAME)) // is_motion_variation_allowed_compound |
243 | 0 | { |
244 | 0 | if (!has_overlappable_candidates(ctx->blk_ptr)) { // check_num_overlappable_neighbors |
245 | 0 | return SIMPLE_TRANSLATION; |
246 | 0 | } |
247 | | |
248 | 0 | return OBMC_CAUSAL; |
249 | 0 | } else { |
250 | 0 | return SIMPLE_TRANSLATION; |
251 | 0 | } |
252 | 0 | } |
253 | | |
254 | | //static uint32_t AntiContouringIntraMode[11] = { EB_INTRA_PLANAR, EB_INTRA_DC, EB_INTRA_HORIZONTAL, EB_INTRA_VERTICAL, |
255 | | //EB_INTRA_MODE_2, EB_INTRA_MODE_6, EB_INTRA_MODE_14, EB_INTRA_MODE_18, EB_INTRA_MODE_22, EB_INTRA_MODE_30, EB_INTRA_MODE_34 }; |
256 | 0 | int32_t svt_aom_have_newmv_in_inter_mode(PredictionMode mode) { |
257 | 0 | return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV || mode == NEW_NEARESTMV || |
258 | 0 | mode == NEAR_NEWMV || mode == NEW_NEARMV); |
259 | 0 | } |
260 | | |
261 | | static MvReferenceFrame to_ref_frame[2][4] = {{LAST_FRAME, LAST2_FRAME, LAST3_FRAME, GOLDEN_FRAME}, |
262 | | {BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME, INVALID_REF}}; |
263 | | |
264 | 0 | MvReferenceFrame svt_get_ref_frame_type(uint8_t list, uint8_t ref_idx) { |
265 | 0 | return to_ref_frame[list][ref_idx]; |
266 | 0 | }; |
267 | | |
268 | 0 | uint8_t svt_aom_get_max_drl_index(uint8_t refmvCnt, PredictionMode mode) { |
269 | 0 | uint8_t max_drl = 0; |
270 | |
|
271 | 0 | if (mode == NEWMV || mode == NEW_NEWMV) { |
272 | 0 | if (refmvCnt < 2) { |
273 | 0 | max_drl = 1; |
274 | 0 | } else if (refmvCnt == 2) { |
275 | 0 | max_drl = 2; |
276 | 0 | } else { |
277 | 0 | max_drl = 3; |
278 | 0 | } |
279 | 0 | } |
280 | |
|
281 | 0 | if (mode == NEARMV || mode == NEAR_NEARMV || mode == NEAR_NEWMV || mode == NEW_NEARMV) { |
282 | 0 | if (refmvCnt < 3) { |
283 | 0 | max_drl = 1; |
284 | 0 | } else if (refmvCnt == 3) { |
285 | 0 | max_drl = 2; |
286 | 0 | } else { |
287 | 0 | max_drl = 3; |
288 | 0 | } |
289 | 0 | } |
290 | |
|
291 | 0 | return max_drl; |
292 | 0 | } |
293 | | |
294 | 0 | #define MV_COST_WEIGHT 108 |
295 | | |
296 | | static int64_t pick_interintra_wedge(PictureControlSet* pcs, ModeDecisionContext* ctx, const BlockSize bsize, |
297 | | const uint8_t* const p0, const uint8_t* const p1, uint8_t* src_buf, |
298 | 0 | uint32_t src_stride, int8_t* wedge_index_out) { |
299 | 0 | assert(svt_aom_is_interintra_wedge_used(bsize)); |
300 | | // assert(cpi->common.seq_params.enable_interintra_compound); |
301 | |
|
302 | 0 | const int bw = block_size_wide[bsize]; |
303 | 0 | const int bh = block_size_high[bsize]; |
304 | 0 | DECLARE_ALIGNED(32, int16_t, residual1[MAX_INTERINTRA_SB_SQUARE]); // src - pred1 |
305 | 0 | DECLARE_ALIGNED(32, int16_t, diff10[MAX_INTERINTRA_SB_SQUARE]); // pred1 - pred0 |
306 | 0 | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
307 | 0 | if (ctx->hbd_md) { |
308 | 0 | svt_aom_highbd_subtract_block(bh, bw, residual1, bw, src_buf, src_stride, p1, bw, EB_TEN_BIT); |
309 | 0 | svt_aom_highbd_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw, EB_TEN_BIT); |
310 | |
|
311 | 0 | } else |
312 | 0 | #endif |
313 | 0 | { |
314 | 0 | svt_aom_subtract_block(bh, bw, residual1, bw, src_buf, src_stride, p1, bw); |
315 | 0 | svt_aom_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw); |
316 | 0 | } |
317 | |
|
318 | 0 | int8_t wedge_index = -1; |
319 | 0 | int64_t rd = pick_wedge_fixed_sign(pcs, ctx, bsize, residual1, diff10, 0, &wedge_index); |
320 | 0 | *wedge_index_out = wedge_index; |
321 | |
|
322 | 0 | return rd; |
323 | 0 | } |
324 | | |
325 | 0 | static void inter_intra_search(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand) { |
326 | 0 | SequenceControlSet* scs = pcs->scs; |
327 | 0 | DECLARE_ALIGNED(16, uint8_t, tmp_buf[2 * MAX_INTERINTRA_SB_SQUARE]); |
328 | 0 | DECLARE_ALIGNED(16, uint8_t, ii_pred_buf[2 * MAX_INTERINTRA_SB_SQUARE]); |
329 | | // get inter pred for ref0 |
330 | 0 | EbPictureBufferDesc* src_pic = ctx->hbd_md ? pcs->input_frame16bit : pcs->ppcs->enhanced_pic; |
331 | 0 | uint16_t* src_buf_hbd = (uint16_t*)src_pic->y_buffer + (ctx->blk_org_x) + (ctx->blk_org_y) * src_pic->y_stride; |
332 | 0 | uint8_t* src_buf = src_pic->y_buffer + (ctx->blk_org_x) + (ctx->blk_org_y) * src_pic->y_stride; |
333 | |
|
334 | 0 | uint8_t bit_depth = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
335 | 0 | uint32_t full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD]; |
336 | |
|
337 | 0 | uint32_t bwidth = ctx->blk_geom->bwidth; |
338 | 0 | uint32_t bheight = ctx->blk_geom->bheight; |
339 | 0 | EbPictureBufferDesc pred_desc; |
340 | 0 | pred_desc.border = 0; |
341 | 0 | pred_desc.y_stride = bwidth; |
342 | |
|
343 | 0 | EbPictureBufferDesc* ref_pic_list0 = svt_aom_get_ref_pic_buffer(pcs, cand->block_mi.ref_frame[0]); |
344 | 0 | EbPictureBufferDesc* ref_pic_list1 = NULL; |
345 | | |
346 | | // Use scaled references if resolution of the reference is different from that of the input |
347 | | // Only have one ref |
348 | 0 | if (ref_pic_list0 != NULL) { |
349 | 0 | uint8_t list_idx0 = get_list_idx(cand->block_mi.ref_frame[0]); |
350 | 0 | int8_t ref_idx_l0 = get_ref_frame_idx(cand->block_mi.ref_frame[0]); |
351 | 0 | svt_aom_use_scaled_rec_refs_if_needed( |
352 | 0 | pcs, |
353 | 0 | pcs->ppcs->enhanced_pic, |
354 | 0 | (EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx0][ref_idx_l0]->object_ptr, |
355 | 0 | &ref_pic_list0, |
356 | 0 | ctx->hbd_md); |
357 | 0 | } |
358 | 0 | pred_desc.y_buffer = tmp_buf; |
359 | | |
360 | | //we call the regular inter prediction path here (no compound) |
361 | 0 | cand->block_mi.interp_filters = 0; |
362 | 0 | cand->block_mi.is_interintra_used = 0; |
363 | 0 | svt_aom_inter_prediction(scs, |
364 | 0 | pcs, |
365 | 0 | &cand->block_mi, |
366 | 0 | &cand->wm_params_l0, |
367 | 0 | &cand->wm_params_l1, |
368 | 0 | ctx->blk_ptr, |
369 | 0 | ctx->blk_geom->bsize, |
370 | 0 | ctx->shape, |
371 | 0 | false, // use_precomputed_obmc |
372 | 0 | false, // use_precomputed_ii - ii not performed here |
373 | 0 | ctx, |
374 | 0 | NULL, |
375 | 0 | NULL, |
376 | 0 | NULL, |
377 | 0 | ref_pic_list0, |
378 | 0 | ref_pic_list1, |
379 | 0 | ctx->blk_org_x, |
380 | 0 | ctx->blk_org_y, |
381 | 0 | &pred_desc, //output |
382 | 0 | 0, //output org_x, |
383 | 0 | 0, //output org_y, |
384 | 0 | PICTURE_BUFFER_DESC_LUMA_MASK, |
385 | 0 | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
386 | 0 | 0); // is_16bit_pipeline |
387 | |
|
388 | 0 | assert(svt_aom_is_interintra_wedge_used(ctx->blk_geom->bsize)); //if not I need to add nowedge path!! |
389 | |
|
390 | 0 | int64_t best_interintra_rd = INT64_MAX; |
391 | 0 | InterIntraMode best_interintra_mode = INTERINTRA_MODES; |
392 | 0 | for (int j = 0; j < INTERINTRA_MODES; ++j) { |
393 | | // if ((!cpi->oxcf.enable_smooth_intra || cpi->sf.disable_smooth_intra) && |
394 | | // (InterIntraMode)j == II_SMOOTH_PRED) |
395 | | // continue; |
396 | 0 | InterIntraMode interintra_mode = (InterIntraMode)j; |
397 | | // rmode = interintra_mode_cost[mbmi->interintra_mode]; |
398 | 0 | const int bsize_group = eb_size_group_lookup[ctx->blk_geom->bsize]; |
399 | 0 | const int rmode = ctx->md_rate_est_ctx->inter_intra_mode_fac_bits[bsize_group][interintra_mode]; |
400 | | // av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); |
401 | 0 | if (ctx->hbd_md) { |
402 | 0 | svt_aom_combine_interintra_highbd(interintra_mode, // mode, |
403 | 0 | 0, // use_wedge_interintra, |
404 | 0 | 0, // cand->interintra_wedge_index, |
405 | 0 | 0, // int wedge_sign, |
406 | 0 | ctx->blk_geom->bsize, |
407 | 0 | ctx->blk_geom->bsize, // plane_bsize, |
408 | 0 | ii_pred_buf, |
409 | 0 | bwidth, /*uint8_t *comppred, int compstride,*/ |
410 | 0 | tmp_buf, |
411 | 0 | bwidth, /*const uint8_t *interpred, int interstride,*/ |
412 | 0 | ctx->intrapred_buf[j], |
413 | 0 | bwidth /*const uint8_t *intrapred, int intrastride*/, |
414 | 0 | bit_depth); |
415 | 0 | } else { |
416 | 0 | svt_aom_combine_interintra(interintra_mode, //mode, |
417 | 0 | 0, //use_wedge_interintra, |
418 | 0 | 0, //cand->interintra_wedge_index, |
419 | 0 | 0, //int wedge_sign, |
420 | 0 | ctx->blk_geom->bsize, |
421 | 0 | ctx->blk_geom->bsize, // plane_bsize, |
422 | 0 | ii_pred_buf, |
423 | 0 | bwidth, /*uint8_t *comppred, int compstride,*/ |
424 | 0 | tmp_buf, |
425 | 0 | bwidth, /*const uint8_t *interpred, int interstride,*/ |
426 | 0 | ctx->intrapred_buf[j], |
427 | 0 | bwidth /*const uint8_t *intrapred, int intrastride*/); |
428 | 0 | } |
429 | 0 | int64_t rd; |
430 | 0 | if (ctx->inter_intra_comp_ctrls.use_rd_model) { |
431 | 0 | int rate_sum; |
432 | 0 | int64_t dist_sum; |
433 | 0 | model_rd_for_sb_with_curvfit(pcs, |
434 | 0 | ctx, |
435 | 0 | ctx->blk_geom->bsize, |
436 | 0 | bwidth, |
437 | 0 | bheight, |
438 | 0 | ctx->hbd_md ? (uint8_t*)src_buf_hbd : src_buf, |
439 | 0 | src_pic->y_stride, |
440 | 0 | ii_pred_buf, |
441 | 0 | bwidth, |
442 | 0 | 0, |
443 | 0 | 0, |
444 | 0 | 0, |
445 | 0 | 0, |
446 | 0 | &rate_sum, |
447 | 0 | &dist_sum, |
448 | 0 | NULL, |
449 | 0 | NULL, |
450 | 0 | NULL); |
451 | |
|
452 | 0 | rd = RDCOST(full_lambda, rate_sum + rmode, dist_sum); |
453 | 0 | } else { |
454 | 0 | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
455 | 0 | if (ctx->hbd_md) { |
456 | 0 | rd = svt_aom_highbd_sse((uint8_t*)src_buf_hbd, src_pic->y_stride, ii_pred_buf, bwidth, bwidth, bheight); |
457 | 0 | } else |
458 | 0 | #endif |
459 | 0 | { |
460 | 0 | rd = svt_aom_sse(src_buf, src_pic->y_stride, ii_pred_buf, bwidth, bwidth, bheight); |
461 | 0 | } |
462 | 0 | } |
463 | 0 | if (rd < best_interintra_rd) { |
464 | 0 | best_interintra_rd = rd; |
465 | 0 | cand->block_mi.interintra_mode = best_interintra_mode = interintra_mode; |
466 | 0 | } |
467 | 0 | } |
468 | | // To test: Enable wedge search if source variance and edge strength are above the thresholds. |
469 | | //CHKN need to re-do intra pred using the winner, or have a separate intra serch for wedge |
470 | 0 | int64_t best_interintra_rd_wedge = INT64_MAX; |
471 | 0 | const uint8_t ii_wedge_mode = ctx->shape == PART_N ? ctx->inter_intra_comp_ctrls.wedge_mode_sq |
472 | 0 | : ctx->inter_intra_comp_ctrls.wedge_mode_nsq; |
473 | 0 | if (ii_wedge_mode) { |
474 | 0 | best_interintra_rd_wedge = pick_interintra_wedge(pcs, |
475 | 0 | ctx, |
476 | 0 | ctx->blk_geom->bsize, |
477 | 0 | ctx->intrapred_buf[best_interintra_mode], |
478 | 0 | tmp_buf, |
479 | 0 | ctx->hbd_md ? (uint8_t*)src_buf_hbd : src_buf, |
480 | 0 | src_pic->y_stride, |
481 | 0 | &cand->block_mi.interintra_wedge_index); |
482 | 0 | } |
483 | | |
484 | | // for ii_wedge_mode 1, always inject wedge as a separate candidate; for wedge mode 2 only inject |
485 | | // if wedge is better than non-wedge |
486 | 0 | if (ii_wedge_mode == 1 || best_interintra_rd_wedge < best_interintra_rd) { |
487 | 0 | cand->block_mi.use_wedge_interintra = 1; |
488 | 0 | } else { |
489 | 0 | cand->block_mi.use_wedge_interintra = 0; |
490 | 0 | } |
491 | 0 | } |
492 | | |
493 | | static COMPOUND_TYPE to_av1_compound_lut[] = {COMPOUND_AVERAGE, COMPOUND_DISTWTD, COMPOUND_DIFFWTD, COMPOUND_WEDGE}; |
494 | | |
495 | | static void determine_compound_mode(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand, |
496 | 0 | MD_COMP_TYPE cur_type) { |
497 | 0 | BlockModeInfo* block_mi = &cand->block_mi; |
498 | 0 | block_mi->interinter_comp.type = to_av1_compound_lut[cur_type]; |
499 | 0 | switch (cur_type) { |
500 | 0 | case MD_COMP_AVG: |
501 | 0 | block_mi->comp_group_idx = 0; |
502 | 0 | block_mi->compound_idx = 1; |
503 | 0 | break; |
504 | 0 | case MD_COMP_DIST: |
505 | 0 | block_mi->comp_group_idx = 0; |
506 | 0 | block_mi->compound_idx = 0; |
507 | 0 | break; |
508 | 0 | case MD_COMP_DIFF0: |
509 | 0 | block_mi->comp_group_idx = 1; |
510 | 0 | block_mi->compound_idx = 1; |
511 | 0 | block_mi->interinter_comp.mask_type = 55; |
512 | 0 | svt_aom_search_compound_diff_wedge(pcs, ctx, cand); |
513 | 0 | break; |
514 | 0 | case MD_COMP_WEDGE: |
515 | 0 | block_mi->comp_group_idx = 1; |
516 | 0 | block_mi->compound_idx = 1; |
517 | 0 | svt_aom_search_compound_diff_wedge(pcs, ctx, cand); |
518 | 0 | break; |
519 | 0 | default: |
520 | 0 | SVT_ERROR("not used comp type\n"); |
521 | 0 | assert(0); |
522 | 0 | break; |
523 | 0 | } |
524 | 0 | } |
525 | | |
526 | | void svt_aom_choose_best_av1_mv_pred(ModeDecisionContext* ctx, MvReferenceFrame ref_frame, |
527 | | PredictionMode mode, // NEW or NEW_NEW |
528 | | Mv mv0, Mv mv1, |
529 | | uint8_t* bestDrlIndex, // output |
530 | | Mv best_pred_mv[2] // output |
531 | 0 | ) { |
532 | 0 | if (ctx->shut_fast_rate) { |
533 | 0 | return; |
534 | 0 | } |
535 | 0 | if (ctx->approx_inter_rate > 1) { |
536 | 0 | *bestDrlIndex = 0; |
537 | 0 | best_pred_mv[0] = ctx->ref_mv_stack[ref_frame][0].this_mv; |
538 | 0 | best_pred_mv[1] = ctx->ref_mv_stack[ref_frame][0].comp_mv; |
539 | 0 | return; |
540 | 0 | } |
541 | 0 | int16_t mv0x = mv0.x; |
542 | 0 | int16_t mv0y = mv0.y; |
543 | 0 | int16_t mv1x = mv1.x; |
544 | 0 | int16_t mv1y = mv1.y; |
545 | |
|
546 | 0 | uint8_t is_compound = is_inter_compound_mode(mode); |
547 | |
|
548 | 0 | struct MdRateEstimationContext* md_rate_est_ctx = ctx->md_rate_est_ctx; |
549 | 0 | BlkStruct* blk_ptr = ctx->blk_ptr; |
550 | 0 | uint8_t max_drl_index; |
551 | 0 | Mv nearestmv[2] = {{{0}}, {{0}}}; |
552 | 0 | Mv nearmv[2]; |
553 | 0 | Mv ref_mv[2]; |
554 | 0 | Mv mv; |
555 | |
|
556 | 0 | max_drl_index = svt_aom_get_max_drl_index(blk_ptr->av1xd->ref_mv_count[ref_frame], mode); |
557 | | // max_drl_index = 1; |
558 | |
|
559 | 0 | if (max_drl_index == 1) { |
560 | 0 | *bestDrlIndex = 0; |
561 | |
|
562 | 0 | best_pred_mv[0] = ctx->ref_mv_stack[ref_frame][0].this_mv; |
563 | 0 | best_pred_mv[1] = ctx->ref_mv_stack[ref_frame][0].comp_mv; |
564 | 0 | } else { |
565 | 0 | uint8_t drli; |
566 | 0 | uint32_t best_mv_cost = 0xFFFFFFFF; |
567 | 0 | for (drli = 0; drli < max_drl_index; drli++) { |
568 | 0 | svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, ref_frame, is_compound, mode, drli, nearestmv, nearmv, ref_mv); |
569 | | |
570 | | //compute the rate for this drli Cand |
571 | 0 | mv.y = mv0y; |
572 | 0 | mv.x = mv0x; |
573 | 0 | uint32_t mv_rate = 0; |
574 | 0 | if (ctx->approx_inter_rate) { |
575 | 0 | mv_rate = (uint32_t)svt_av1_mv_bit_cost_light(&mv, &(ref_mv[0])); |
576 | 0 | } else { |
577 | 0 | mv_rate = (uint32_t)svt_av1_mv_bit_cost( |
578 | 0 | &mv, &(ref_mv[0]), md_rate_est_ctx->nmv_vec_cost, md_rate_est_ctx->nmvcoststack, MV_COST_WEIGHT); |
579 | 0 | } |
580 | |
|
581 | 0 | if (is_compound) { |
582 | 0 | mv.y = mv1y; |
583 | 0 | mv.x = mv1x; |
584 | 0 | if (ctx->approx_inter_rate) { |
585 | 0 | mv_rate += (uint32_t)svt_av1_mv_bit_cost_light(&mv, &(ref_mv[1])); |
586 | 0 | } else { |
587 | 0 | mv_rate += (uint32_t)svt_av1_mv_bit_cost(&mv, |
588 | 0 | &(ref_mv[1]), |
589 | 0 | md_rate_est_ctx->nmv_vec_cost, |
590 | 0 | md_rate_est_ctx->nmvcoststack, |
591 | 0 | MV_COST_WEIGHT); |
592 | 0 | } |
593 | 0 | } |
594 | |
|
595 | 0 | const int32_t new_mv = (mode == NEWMV || mode == NEW_NEWMV); |
596 | 0 | if (new_mv) { |
597 | 0 | int32_t idx; |
598 | 0 | for (idx = 0; idx < 2; ++idx) { |
599 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame] > idx + 1) { |
600 | 0 | uint8_t drl_1_ctx = av1_drl_ctx(&(ctx->ref_mv_stack[ref_frame][0]), idx); |
601 | 0 | mv_rate += ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_1_ctx][drli != idx]; |
602 | 0 | if (drli == idx) { |
603 | 0 | break; |
604 | 0 | } |
605 | 0 | } |
606 | 0 | } |
607 | 0 | } |
608 | |
|
609 | 0 | if (mv_rate < best_mv_cost) { |
610 | 0 | best_mv_cost = mv_rate; |
611 | 0 | *bestDrlIndex = drli; |
612 | 0 | best_pred_mv[0] = ref_mv[0]; |
613 | 0 | best_pred_mv[1] = ref_mv[1]; |
614 | 0 | } |
615 | 0 | } |
616 | 0 | } |
617 | 0 | } |
618 | | |
619 | 14.8k | static void mode_decision_cand_bf_dctor(EbPtr p) { |
620 | 14.8k | ModeDecisionCandidateBuffer* obj = (ModeDecisionCandidateBuffer*)p; |
621 | 14.8k | EB_DELETE(obj->pred); |
622 | 14.8k | EB_DELETE(obj->rec_coeff); |
623 | 14.8k | EB_DELETE(obj->quant); |
624 | 14.8k | } |
625 | | |
626 | 2.96k | static void mode_decision_scratch_cand_bf_dctor(EbPtr p) { |
627 | 2.96k | ModeDecisionCandidateBuffer* obj = (ModeDecisionCandidateBuffer*)p; |
628 | 2.96k | EB_DELETE(obj->pred); |
629 | 2.96k | EB_DELETE(obj->residual); |
630 | 2.96k | EB_DELETE(obj->rec_coeff); |
631 | 2.96k | EB_DELETE(obj->recon); |
632 | 2.96k | EB_DELETE(obj->quant); |
633 | 2.96k | } |
634 | | |
635 | | /*************************************** |
636 | | * Mode Decision Candidate Ctor |
637 | | ***************************************/ |
638 | | EbErrorType svt_aom_mode_decision_cand_bf_ctor(ModeDecisionCandidateBuffer* buffer_ptr, EbBitDepth max_bitdepth, |
639 | | uint8_t sb_size, uint32_t buffer_desc_mask, |
640 | | EbPictureBufferDesc* temp_residual, EbPictureBufferDesc* temp_recon_ptr, |
641 | 14.8k | uint64_t* fast_cost, uint64_t* full_cost, uint64_t* full_cost_ssim) { |
642 | 14.8k | EbPictureBufferDescInitData picture_buffer_desc_init_data; |
643 | | |
644 | 14.8k | EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data; |
645 | | |
646 | 14.8k | buffer_ptr->dctor = mode_decision_cand_bf_dctor; |
647 | | |
648 | | // Init Picture Data |
649 | 14.8k | picture_buffer_desc_init_data.max_width = sb_size; |
650 | 14.8k | picture_buffer_desc_init_data.max_height = sb_size; |
651 | 14.8k | picture_buffer_desc_init_data.bit_depth = max_bitdepth; |
652 | 14.8k | picture_buffer_desc_init_data.color_format = EB_YUV420; |
653 | 14.8k | picture_buffer_desc_init_data.buffer_enable_mask = buffer_desc_mask; |
654 | 14.8k | picture_buffer_desc_init_data.border = 0; |
655 | 14.8k | picture_buffer_desc_init_data.split_mode = false; |
656 | 14.8k | picture_buffer_desc_init_data.is_16bit_pipeline = max_bitdepth > EB_EIGHT_BIT; |
657 | | |
658 | 14.8k | thirty_two_width_picture_buffer_desc_init_data.max_width = sb_size; |
659 | 14.8k | thirty_two_width_picture_buffer_desc_init_data.max_height = sb_size; |
660 | 14.8k | thirty_two_width_picture_buffer_desc_init_data.bit_depth = EB_THIRTYTWO_BIT; |
661 | 14.8k | thirty_two_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
662 | 14.8k | thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = buffer_desc_mask; |
663 | 14.8k | thirty_two_width_picture_buffer_desc_init_data.border = 0; |
664 | 14.8k | thirty_two_width_picture_buffer_desc_init_data.split_mode = false; |
665 | 14.8k | thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline = true; |
666 | | |
667 | | // Candidate Ptr |
668 | 14.8k | buffer_ptr->cand = NULL; |
669 | | |
670 | | // Video Buffers |
671 | 14.8k | EB_NEW(buffer_ptr->pred, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
672 | | // Reuse the residual_ptr memory in MD context |
673 | 14.8k | buffer_ptr->residual = temp_residual; |
674 | 14.8k | EB_NEW(buffer_ptr->rec_coeff, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
675 | 14.8k | EB_NEW(buffer_ptr->quant, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
676 | | // Reuse the recon_ptr memory in MD context |
677 | 14.8k | buffer_ptr->recon = temp_recon_ptr; |
678 | | |
679 | | // Costs |
680 | 14.8k | buffer_ptr->fast_cost = fast_cost; |
681 | 14.8k | buffer_ptr->full_cost = full_cost; |
682 | 14.8k | buffer_ptr->full_cost_ssim = full_cost_ssim; |
683 | 14.8k | return EB_ErrorNone; |
684 | 14.8k | } |
685 | | |
686 | | EbErrorType svt_aom_mode_decision_scratch_cand_bf_ctor(ModeDecisionCandidateBuffer* buffer_ptr, uint8_t sb_size, |
687 | 2.96k | EbBitDepth max_bitdepth) { |
688 | 2.96k | EbPictureBufferDescInitData picture_buffer_desc_init_data; |
689 | 2.96k | EbPictureBufferDescInitData double_width_picture_buffer_desc_init_data; |
690 | 2.96k | EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data; |
691 | | |
692 | 2.96k | buffer_ptr->dctor = mode_decision_scratch_cand_bf_dctor; |
693 | | |
694 | | // Init Picture Data |
695 | 2.96k | picture_buffer_desc_init_data.max_width = sb_size; |
696 | 2.96k | picture_buffer_desc_init_data.max_height = sb_size; |
697 | 2.96k | picture_buffer_desc_init_data.bit_depth = max_bitdepth; |
698 | 2.96k | picture_buffer_desc_init_data.color_format = EB_YUV420; |
699 | 2.96k | picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
700 | 2.96k | picture_buffer_desc_init_data.border = 0; |
701 | 2.96k | picture_buffer_desc_init_data.split_mode = false; |
702 | 2.96k | picture_buffer_desc_init_data.is_16bit_pipeline = max_bitdepth > EB_EIGHT_BIT; |
703 | 2.96k | double_width_picture_buffer_desc_init_data.max_width = sb_size; |
704 | 2.96k | double_width_picture_buffer_desc_init_data.max_height = sb_size; |
705 | 2.96k | double_width_picture_buffer_desc_init_data.bit_depth = EB_SIXTEEN_BIT; |
706 | 2.96k | double_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
707 | 2.96k | double_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
708 | 2.96k | double_width_picture_buffer_desc_init_data.border = 0; |
709 | 2.96k | double_width_picture_buffer_desc_init_data.split_mode = false; |
710 | 2.96k | double_width_picture_buffer_desc_init_data.is_16bit_pipeline = true; |
711 | 2.96k | thirty_two_width_picture_buffer_desc_init_data.max_width = sb_size; |
712 | 2.96k | thirty_two_width_picture_buffer_desc_init_data.max_height = sb_size; |
713 | 2.96k | thirty_two_width_picture_buffer_desc_init_data.bit_depth = EB_THIRTYTWO_BIT; |
714 | 2.96k | thirty_two_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
715 | 2.96k | thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
716 | 2.96k | thirty_two_width_picture_buffer_desc_init_data.border = 0; |
717 | 2.96k | thirty_two_width_picture_buffer_desc_init_data.split_mode = false; |
718 | 2.96k | thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline = true; |
719 | | |
720 | | // Candidate Ptr |
721 | 2.96k | buffer_ptr->cand = NULL; |
722 | | |
723 | | // Video Buffers |
724 | 2.96k | EB_NEW(buffer_ptr->pred, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
725 | 2.96k | EB_NEW(buffer_ptr->residual, svt_picture_buffer_desc_ctor, (EbPtr)&double_width_picture_buffer_desc_init_data); |
726 | 2.96k | EB_NEW(buffer_ptr->rec_coeff, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
727 | 2.96k | EB_NEW(buffer_ptr->quant, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
728 | | |
729 | 2.96k | EB_NEW(buffer_ptr->recon, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
730 | 2.96k | return EB_ErrorNone; |
731 | 2.96k | } |
732 | | |
733 | | /*************************************** |
734 | | * return true if the MV candidate is already injected |
735 | | ***************************************/ |
736 | 0 | static bool mv_is_already_injected(ModeDecisionContext* ctx, Mv mv0, Mv mv1, uint8_t ref_type) { |
737 | 0 | MvReferenceFrame rf[2]; |
738 | 0 | av1_set_ref_frame(rf, ref_type); |
739 | | |
740 | | // Unipred Candidate |
741 | 0 | if (rf[1] <= INTRA_FRAME) { |
742 | | // First check the validity of the candidate MV, and exit if invalid MV |
743 | 0 | if (ctx->corrupted_mv_check && !check_mv_validity(mv0.x, mv0.y, 0)) { |
744 | 0 | return true; |
745 | 0 | } |
746 | | |
747 | 0 | for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) { |
748 | 0 | if (ctx->injected_ref_types[cand_idx] == ref_type && ctx->injected_mvs[cand_idx][0].as_int == mv0.as_int) { |
749 | 0 | return true; |
750 | 0 | } |
751 | 0 | } |
752 | 0 | } else { // Bipred Candidate |
753 | | // First check the validity of the candidate MV, and exit if invalid MV |
754 | 0 | if (ctx->corrupted_mv_check && (!check_mv_validity(mv0.x, mv0.y, 0) || !check_mv_validity(mv1.x, mv1.y, 0))) { |
755 | 0 | return true; |
756 | 0 | } |
757 | | |
758 | 0 | RedundantCandCtrls* redund_ctrls = &ctx->cand_reduction_ctrls.redundant_cand_ctrls; |
759 | 0 | if (redund_ctrls->score_th) { |
760 | 0 | uint8_t is_high_mag = (ABS(mv0.x) > redund_ctrls->mag_th) && (ABS(mv0.y) > redund_ctrls->mag_th) && |
761 | 0 | (ABS(mv1.x) > redund_ctrls->mag_th) && (ABS(mv1.y) > redund_ctrls->mag_th); |
762 | 0 | for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) { |
763 | 0 | if (ctx->injected_ref_types[cand_idx] == ref_type) { |
764 | 0 | int score = ABS(ctx->injected_mvs[cand_idx][0].x - mv0.x) + |
765 | 0 | ABS(ctx->injected_mvs[cand_idx][0].y - mv0.y) + ABS(ctx->injected_mvs[cand_idx][1].x - mv1.x) + |
766 | 0 | ABS(ctx->injected_mvs[cand_idx][1].y - mv1.y); |
767 | |
|
768 | 0 | if (score == 0 || (score < redund_ctrls->score_th && is_high_mag)) { |
769 | 0 | return true; |
770 | 0 | } |
771 | 0 | } |
772 | 0 | } |
773 | 0 | } else { |
774 | 0 | for (int cand_idx = 0; cand_idx < ctx->injected_mv_count; cand_idx++) { |
775 | 0 | if (ctx->injected_ref_types[cand_idx] == ref_type && |
776 | 0 | ctx->injected_mvs[cand_idx][0].as_int == mv0.as_int && |
777 | 0 | ctx->injected_mvs[cand_idx][1].as_int == mv1.as_int) { |
778 | 0 | return true; |
779 | 0 | } |
780 | 0 | } |
781 | 0 | } |
782 | 0 | } |
783 | 0 | return false; |
784 | 0 | } |
785 | | |
786 | | bool svt_aom_is_valid_unipred_ref(ModeDecisionContext* ctx, uint8_t inter_cand_group, uint8_t list_idx, |
787 | 0 | uint8_t ref_idx) { |
788 | 0 | if (!ctx->ref_pruning_ctrls.enabled) { |
789 | 0 | return true; |
790 | 0 | } |
791 | 0 | if (!ctx->ref_filtering_res[inter_cand_group][list_idx][ref_idx].do_ref && |
792 | 0 | (ref_idx || !ctx->ref_pruning_ctrls.closest_refs[inter_cand_group])) { |
793 | 0 | return false; |
794 | 0 | } else { |
795 | 0 | return true; |
796 | 0 | } |
797 | 0 | } |
798 | | |
799 | | // Determine if the MV-to-MVP difference satisfies the mv_diff restriction |
800 | 0 | static bool is_valid_mv_diff(Mv best_pred_mv[2], Mv mv0, Mv mv1, uint8_t is_compound) { |
801 | 0 | const uint8_t mv_diff_max_bit = MV_IN_USE_BITS; |
802 | |
|
803 | 0 | if (abs(mv0.x - best_pred_mv[0].x) > (1 << mv_diff_max_bit) || |
804 | 0 | abs(mv0.y - best_pred_mv[0].y) > (1 << mv_diff_max_bit)) { |
805 | 0 | return false; |
806 | 0 | } |
807 | | |
808 | 0 | if (is_compound) { |
809 | 0 | if (abs(mv1.x - best_pred_mv[1].x) > (1 << mv_diff_max_bit) || |
810 | 0 | abs(mv1.y - best_pred_mv[1].y) > (1 << mv_diff_max_bit)) { |
811 | 0 | return false; |
812 | 0 | } |
813 | 0 | } |
814 | 0 | return true; |
815 | 0 | } |
816 | | |
817 | | static bool is_valid_bipred_ref(ModeDecisionContext* ctx, uint8_t inter_cand_group, uint8_t list_idx_0, |
818 | 0 | uint8_t ref_idx_0, uint8_t list_idx_1, uint8_t ref_idx_1) { |
819 | 0 | if (!ctx->ref_pruning_ctrls.enabled) { |
820 | 0 | return true; |
821 | 0 | } |
822 | | // Both ref should be 1 for bipred refs to be valid: if 1 is not best_refs then there is a chance to exit the injection |
823 | 0 | if (!ctx->ref_filtering_res[inter_cand_group][list_idx_0][ref_idx_0].do_ref || |
824 | 0 | !ctx->ref_filtering_res[inter_cand_group][list_idx_1][ref_idx_1].do_ref) { |
825 | | // Check whether we should check the closest, if no then there no need to move forward and return false |
826 | 0 | if (!ctx->ref_pruning_ctrls.closest_refs[inter_cand_group]) { |
827 | 0 | return false; |
828 | 0 | } |
829 | | |
830 | | // Else check if ref are LAST and BWD, if not then return false |
831 | 0 | if (ref_idx_0 || ref_idx_1) { |
832 | 0 | return false; |
833 | 0 | } |
834 | 0 | } |
835 | 0 | return true; |
836 | 0 | } |
837 | | |
838 | 0 | #define BIPRED_3x3_REFINMENT_POSITIONS 8 |
839 | | |
840 | | static int8_t allow_refinement_flag[BIPRED_3x3_REFINMENT_POSITIONS] = {1, 0, 1, 0, 1, 0, 1, 0}; |
841 | | static int8_t bipred_3x3_x_pos[BIPRED_3x3_REFINMENT_POSITIONS] = {-1, -1, 0, 1, 1, 1, 0, -1}; |
842 | | static int8_t bipred_3x3_y_pos[BIPRED_3x3_REFINMENT_POSITIONS] = {0, 1, 1, 1, 0, -1, -1, -1}; |
843 | | |
844 | 127k | static INLINE uint8_t is_dc_only_safe(PictureControlSet* pcs, ModeDecisionContext* ctx) { |
845 | | // Early exit if pruning not enabled, SB-128, NSQ, or 4x4 (no variance available) |
846 | 127k | if (!ctx->intra_ctrls.prune_using_edge_info || pcs->scs->super_block_size == 128 || ctx->shape != PART_N || |
847 | 127k | ctx->blk_geom->sq_size == 4) { |
848 | 0 | return 0; |
849 | 0 | } |
850 | | |
851 | | // Block variance lookup |
852 | 127k | int blk_idx; |
853 | 127k | int sub_idx[4]; |
854 | 127k | const Position blk_org = {.x = ctx->blk_org_x - ctx->sb_origin_x, .y = ctx->blk_org_y - ctx->sb_origin_y}; |
855 | 127k | svt_aom_get_blk_var_map(ctx->blk_geom->sq_size, blk_org.x, blk_org.y, &blk_idx, sub_idx); |
856 | | |
857 | 127k | uint16_t* sb_var = pcs->ppcs->variance[ctx->sb_index]; |
858 | 127k | uint32_t blk_var = sb_var[blk_idx]; |
859 | | |
860 | | // For 8x8, we do not have 4x4 sub-variance, skip spread check |
861 | 127k | if (ctx->blk_geom->sq_size == 8) { |
862 | 123k | return (blk_var < 2000); |
863 | 123k | } |
864 | | |
865 | | // For 16x16 and above, compute spread from sub-blocks |
866 | 3.81k | uint32_t min_var = UINT32_MAX; |
867 | 3.81k | uint32_t max_var = 0; |
868 | | |
869 | 22.7k | for (int i = 0; i < 4; i++) { |
870 | 18.9k | uint32_t v = sb_var[sub_idx[i]]; |
871 | 18.9k | min_var = MIN(min_var, v); |
872 | 18.9k | max_var = MAX(max_var, v); |
873 | 18.9k | } |
874 | | |
875 | 3.81k | uint32_t spread_var = max_var - min_var; |
876 | | |
877 | 4.72k | return (blk_var < 2000 && spread_var < 4000); |
878 | 127k | } |
879 | | |
880 | | // Inject inter-intra, WM, OBMC for unipred simple-trans candidate |
881 | | // |
882 | | // total_cand_count is the index to ctx->fast_cand_array for the next candidate injected (which is the |
883 | | // same as the number of candidates injected so far). It is assumed the simple-trans candidate to base |
884 | | // the other candidtes on is the previously injected candidate (at index total_cand_count - 1). |
885 | | // |
886 | | // enable_ii, enable_wm, and enable_obmc allow the caller to disable some modes explicitly; if enabled, the |
887 | | // mode will be injected if the block size/candidate type supports the mode. The enable signals are left as |
888 | | // arguments because some candidates do not inject all modes (e.g. unipred does not inject WM/OBMC). |
889 | | static void inj_non_simple_modes(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* total_cand_count, |
890 | 0 | const bool enable_ii, const bool enable_wm, const bool enable_obmc) { |
891 | | // index of simple translation candidate (to be used to copy cand info for other modes) |
892 | | // assumes the simple trans cand is the previously injected candidate |
893 | 0 | const uint32_t simple_trans_cand_idx = *total_cand_count - 1; |
894 | 0 | const ModeDecisionCandidate* const simple_trans_cand = &ctx->fast_cand_array[simple_trans_cand_idx]; |
895 | | |
896 | | // The candidate count to be used to track number of inj cands, and the index of fast_cand_array for new candidates |
897 | 0 | uint32_t cand_count = *total_cand_count; |
898 | |
|
899 | 0 | assert(simple_trans_cand->block_mi.ref_frame[1] == NONE_FRAME); |
900 | 0 | const uint8_t list_idx = get_list_idx(simple_trans_cand->block_mi.ref_frame[0]); |
901 | 0 | const uint8_t ref_idx = get_ref_frame_idx(simple_trans_cand->block_mi.ref_frame[0]); |
902 | | |
903 | | // INJECT INTER-INTRA |
904 | 0 | const uint8_t is_ii_allowed = svt_aom_is_valid_unipred_ref(ctx, INTER_INTRA_GROUP, list_idx, ref_idx) && |
905 | 0 | svt_is_interintra_allowed(ctx->inter_intra_comp_ctrls.enabled, |
906 | 0 | ctx->blk_geom->bsize, |
907 | 0 | simple_trans_cand->block_mi.mode, |
908 | 0 | simple_trans_cand->block_mi.ref_frame); |
909 | 0 | if (enable_ii && is_ii_allowed) { |
910 | 0 | ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count]; |
911 | 0 | svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate)); |
912 | |
|
913 | 0 | inter_intra_search(pcs, ctx, cand); |
914 | 0 | cand->block_mi.is_interintra_used = 1; |
915 | 0 | cand->block_mi.ref_frame[1] = INTRA_FRAME; |
916 | 0 | const InterIntraMode ii_mode = cand->block_mi.interintra_mode; |
917 | 0 | INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count); |
918 | | |
919 | | // if ii_wedge_mode is 1, then inject wedge/non-wedge as separate candidates; OW, only inject the best (above) |
920 | 0 | const uint8_t ii_wedge_mode = ctx->shape == PART_N ? ctx->inter_intra_comp_ctrls.wedge_mode_sq |
921 | 0 | : ctx->inter_intra_comp_ctrls.wedge_mode_nsq; |
922 | 0 | if (ii_wedge_mode == 1) { |
923 | 0 | cand = &ctx->fast_cand_array[cand_count]; |
924 | 0 | svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate)); |
925 | |
|
926 | 0 | cand->block_mi.is_interintra_used = 1; |
927 | 0 | cand->block_mi.ref_frame[1] = INTRA_FRAME; |
928 | 0 | cand->block_mi.interintra_mode = ii_mode; |
929 | 0 | cand->block_mi.use_wedge_interintra = 0; |
930 | 0 | INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count); |
931 | 0 | } |
932 | 0 | } |
933 | |
|
934 | 0 | #if CONFIG_ENABLE_OBMC |
935 | | // INJECT WARP |
936 | 0 | const uint8_t is_warp_allowed = warped_motion_mode_allowed(pcs, ctx) && |
937 | 0 | svt_aom_is_valid_unipred_ref(ctx, WARP_GROUP, list_idx, ref_idx); |
938 | 0 | if (enable_wm && is_warp_allowed) { |
939 | 0 | ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count]; |
940 | 0 | svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate)); |
941 | |
|
942 | 0 | cand->block_mi.is_interintra_used = 0; |
943 | 0 | cand->block_mi.motion_mode = WARPED_CAUSAL; |
944 | 0 | cand->wm_params_l0.wmtype = AFFINE; |
945 | |
|
946 | 0 | uint8_t motion_mode_valid = 1; |
947 | 0 | if (cand->block_mi.mode == NEWMV && ctx->wm_ctrls.refinement_iterations && ctx->wm_ctrls.refine_level == 0) { |
948 | | // Perform refinement; if refinement is off, then MV is valid, since it's been checked above |
949 | 0 | motion_mode_valid = svt_aom_wm_motion_refinement(pcs, ctx, cand, 0); |
950 | 0 | } |
951 | |
|
952 | 0 | if (motion_mode_valid) { |
953 | 0 | motion_mode_valid = svt_aom_warped_motion_parameters(ctx, |
954 | 0 | cand->block_mi.mv[0], |
955 | 0 | ctx->blk_geom, |
956 | 0 | cand->block_mi.ref_frame[0], |
957 | 0 | &cand->wm_params_l0, |
958 | 0 | &cand->block_mi.num_proj_ref, |
959 | 0 | ctx->wm_ctrls.lower_band_th, |
960 | 0 | ctx->wm_ctrls.upper_band_th, |
961 | 0 | 0); |
962 | 0 | } |
963 | |
|
964 | 0 | if (motion_mode_valid) { |
965 | 0 | INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count); |
966 | 0 | } |
967 | 0 | } |
968 | | |
969 | | // INJECT OBMC |
970 | 0 | const uint8_t is_obmc_allowed = svt_aom_is_valid_unipred_ref(ctx, OBMC_GROUP, list_idx, ref_idx) && |
971 | 0 | (svt_aom_obmc_motion_mode_allowed(pcs, |
972 | 0 | ctx, |
973 | 0 | ctx->blk_geom->bsize, |
974 | 0 | 0, |
975 | 0 | simple_trans_cand->block_mi.ref_frame[0], |
976 | 0 | simple_trans_cand->block_mi.ref_frame[1], |
977 | 0 | simple_trans_cand->block_mi.mode) == OBMC_CAUSAL); |
978 | 0 | if (enable_obmc && is_obmc_allowed) { |
979 | 0 | ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count]; |
980 | 0 | svt_memcpy(cand, simple_trans_cand, sizeof(ModeDecisionCandidate)); |
981 | |
|
982 | 0 | cand->block_mi.is_interintra_used = 0; |
983 | 0 | cand->block_mi.motion_mode = OBMC_CAUSAL; |
984 | |
|
985 | 0 | uint8_t motion_mode_valid = 1; |
986 | 0 | if (cand->block_mi.mode == NEWMV && ctx->obmc_ctrls.refine_level == 0) { |
987 | 0 | assert(cand->block_mi.ref_frame[1] == NONE_FRAME); |
988 | 0 | motion_mode_valid = svt_aom_obmc_motion_refinement(pcs, ctx, cand, ctx->obmc_ctrls.refine_level); |
989 | 0 | } |
990 | |
|
991 | 0 | if (motion_mode_valid) { |
992 | 0 | INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count); |
993 | 0 | } |
994 | 0 | } |
995 | | #else |
996 | | UNUSED(enable_wm); |
997 | | UNUSED(enable_obmc); |
998 | | #endif // CONFIG_ENABLE_OBMC |
999 | |
|
1000 | 0 | *total_cand_count = cand_count; |
1001 | 0 | } |
1002 | | |
1003 | | // Determines if inter MVP compound modes should be skipped based on info from neighbouring blocks/ref frame types. |
1004 | 0 | static bool skip_compound_on_ref_types(ModeDecisionContext* ctx, MvReferenceFrame rf[2]) { |
1005 | 0 | if (!ctx->inter_comp_ctrls.skip_on_ref_info) { |
1006 | 0 | return false; |
1007 | 0 | } |
1008 | | |
1009 | 0 | MacroBlockD* xd = ctx->blk_ptr->av1xd; |
1010 | | |
1011 | | // If both references are from the same list, skip compound |
1012 | 0 | const uint8_t list_idx_0 = get_list_idx(rf[0]); |
1013 | 0 | const uint8_t list_idx_1 = get_list_idx(rf[1]); |
1014 | 0 | if (list_idx_0 == list_idx_1) { |
1015 | 0 | return true; |
1016 | 0 | } |
1017 | | |
1018 | | // Skip compound unless neighbours selected the ref frames |
1019 | 0 | bool skip_comp = true; |
1020 | 0 | if (!xd->left_available && !xd->up_available) { |
1021 | 0 | return false; |
1022 | 0 | } |
1023 | | |
1024 | 0 | if (xd->left_available) { |
1025 | 0 | const BlockModeInfo* const left_mi = &xd->left_mbmi->block_mi; |
1026 | 0 | if ((is_inter_singleref_mode(left_mi->mode) && |
1027 | 0 | (left_mi->ref_frame[0] == rf[0] || left_mi->ref_frame[0] == rf[1])) || |
1028 | 0 | (is_inter_compound_mode(left_mi->mode) && |
1029 | 0 | (left_mi->ref_frame[0] == rf[0] && left_mi->ref_frame[1] == rf[1]))) { |
1030 | 0 | return false; |
1031 | 0 | } |
1032 | 0 | } |
1033 | 0 | if (xd->up_available) { |
1034 | 0 | const BlockModeInfo* const above_mi = &xd->above_mbmi->block_mi; |
1035 | 0 | if ((is_inter_singleref_mode(above_mi->mode) && |
1036 | 0 | (above_mi->ref_frame[0] == rf[0] || above_mi->ref_frame[0] == rf[1])) || |
1037 | 0 | (is_inter_compound_mode(above_mi->mode) && |
1038 | 0 | (above_mi->ref_frame[0] == rf[0] && above_mi->ref_frame[1] == rf[1]))) { |
1039 | 0 | return false; |
1040 | 0 | } |
1041 | 0 | } |
1042 | | |
1043 | 0 | return skip_comp; |
1044 | 0 | } |
1045 | | |
1046 | | // Inject inter-inter compound types (DIST, DIFF, WEDGE) for a bipred AVG candidate |
1047 | | // |
1048 | | // total_cand_count is the index to ctx->fast_cand_array for the next candidate injected (which is the |
1049 | | // same as the number of candidates injected so far). It is assumed the AVG candidate to base |
1050 | | // the other candidtes on is the previously injected candidate (at index total_cand_count - 1). |
1051 | 0 | static void inj_comp_modes(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* total_cand_count) { |
1052 | | // index of MD_COMP_AVG candidate (to be used to copy cand info for other modes) |
1053 | | // assumes the avg cand is the previously injected candidate |
1054 | 0 | const uint32_t avg_cand_idx = *total_cand_count - 1; |
1055 | 0 | ModeDecisionCandidate* avg_cand = &ctx->fast_cand_array[avg_cand_idx]; |
1056 | | |
1057 | | // Get allowable compound types based on settings and block size |
1058 | 0 | MD_COMP_TYPE tot_comp_types = get_tot_comp_types_bsize(ctx->inter_comp_ctrls.tot_comp_types, ctx->blk_geom->bsize); |
1059 | 0 | if (tot_comp_types == MD_COMP_DIST) { |
1060 | 0 | return; |
1061 | 0 | } |
1062 | | |
1063 | | // Distortion-based ref pruning for compound types |
1064 | 0 | const uint8_t ref_idx_0 = get_ref_frame_idx(avg_cand->block_mi.ref_frame[0]); |
1065 | 0 | const uint8_t ref_idx_1 = get_ref_frame_idx(avg_cand->block_mi.ref_frame[1]); |
1066 | 0 | const uint8_t list_idx_0 = get_list_idx(avg_cand->block_mi.ref_frame[0]); |
1067 | 0 | const uint8_t list_idx_1 = get_list_idx(avg_cand->block_mi.ref_frame[1]); |
1068 | 0 | if (!is_valid_bipred_ref(ctx, INTER_COMP_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) { |
1069 | 0 | return; |
1070 | 0 | } |
1071 | | |
1072 | | // Skip compound on neighbour info |
1073 | 0 | if (skip_compound_on_ref_types(ctx, avg_cand->block_mi.ref_frame)) { |
1074 | 0 | return; |
1075 | 0 | } |
1076 | | |
1077 | | // Skip compound on MV length |
1078 | 0 | if (ctx->inter_comp_ctrls.max_mv_length) { |
1079 | 0 | const uint16_t max_mv_length = ctx->inter_comp_ctrls.max_mv_length; |
1080 | 0 | if (abs(avg_cand->block_mi.mv[0].x) > max_mv_length || abs(avg_cand->block_mi.mv[0].y) > max_mv_length || |
1081 | 0 | abs(avg_cand->block_mi.mv[1].x) > max_mv_length || abs(avg_cand->block_mi.mv[1].y) > max_mv_length) { |
1082 | 0 | return; |
1083 | 0 | } |
1084 | 0 | } |
1085 | | // If compound modes are to be tested for this block, generate the buffers that will be used in the DIFF/WEDGE search. |
1086 | | // Even if DIFF/WEDGE are not used, still call the function because it is needed for pred0_to_pred1_mult to work. |
1087 | 0 | if (tot_comp_types > MD_COMP_DIST) { |
1088 | 0 | if (svt_aom_calc_pred_masked_compound(pcs, ctx, avg_cand)) { |
1089 | 0 | return; |
1090 | 0 | } |
1091 | 0 | } |
1092 | | |
1093 | | // The candidate count to be used to track number of inj cands, and the index of fast_cand_array for new candidates |
1094 | 0 | uint32_t cand_count = *total_cand_count; |
1095 | 0 | for (MD_COMP_TYPE cur_type = MD_COMP_DIST; cur_type < tot_comp_types; cur_type++) { |
1096 | 0 | if (ctx->inter_comp_ctrls.no_sym_dist && cur_type == MD_COMP_DIST && ref_idx_0 == 0 && ref_idx_1 == 0) { |
1097 | 0 | continue; |
1098 | 0 | } |
1099 | 0 | ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_count]; |
1100 | 0 | svt_memcpy(cand, &ctx->fast_cand_array[avg_cand_idx], sizeof(ModeDecisionCandidate)); |
1101 | 0 | cand->skip_mode_allowed = false; |
1102 | 0 | determine_compound_mode(pcs, ctx, cand, cur_type); |
1103 | 0 | INC_MD_CAND_CNT(cand_count, pcs->ppcs->max_can_count); |
1104 | 0 | } |
1105 | 0 | *total_cand_count = cand_count; |
1106 | 0 | } |
1107 | | |
1108 | | static void unipred_3x3_candidates_injection(PictureControlSet* pcs, ModeDecisionContext* ctx, |
1109 | 0 | uint32_t* candidate_total_cnt) { |
1110 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
1111 | 0 | const uint8_t allow_high_precision_mv = pcs->ppcs->frm_hdr.allow_high_precision_mv; |
1112 | 0 | MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr]; |
1113 | 0 | const uint8_t total_me_cnt = me_results->total_me_candidate_index[ctx->me_block_offset]; |
1114 | 0 | const MeCandidate* me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset]; |
1115 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
1116 | | |
1117 | | // (8 Best_L0 neighbors) |
1118 | 0 | for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) { |
1119 | 0 | const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index]; |
1120 | 0 | const uint8_t inter_direction = me_block_results_ptr->direction; |
1121 | 0 | const uint8_t list0_ref_index = me_block_results_ptr->ref_idx_l0; |
1122 | 0 | const uint8_t list1_ref_index = me_block_results_ptr->ref_idx_l1; |
1123 | 0 | if (inter_direction == BI_PRED) { |
1124 | 0 | continue; |
1125 | 0 | } |
1126 | 0 | assert(inter_direction == 0 || inter_direction == 1); |
1127 | 0 | const uint8_t list_idx = inter_direction; |
1128 | 0 | const uint8_t ref_idx = list_idx == REF_LIST_0 ? list0_ref_index : list1_ref_index; |
1129 | 0 | if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, UNI_3x3_GROUP), list_idx, ref_idx)) { |
1130 | 0 | continue; |
1131 | 0 | } |
1132 | 0 | for (int unipred_index = 0; unipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++unipred_index) { |
1133 | | /************** |
1134 | | NEWMV L0 |
1135 | | ************* */ |
1136 | 0 | if (ctx->unipred3x3_injection >= 2) { |
1137 | 0 | if (allow_refinement_flag[unipred_index] == 0) { |
1138 | 0 | continue; |
1139 | 0 | } |
1140 | 0 | } |
1141 | 0 | Mv to_inj_mv = ctx->sb_me_mv[list_idx][ref_idx]; |
1142 | 0 | to_inj_mv.x += (bipred_3x3_x_pos[unipred_index] << !allow_high_precision_mv); |
1143 | 0 | to_inj_mv.y += (bipred_3x3_y_pos[unipred_index] << !allow_high_precision_mv); |
1144 | 0 | const uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx); |
1145 | 0 | MvReferenceFrame rf[2] = {to_inject_ref_type, NONE_FRAME}; |
1146 | 0 | if ((ctx->injected_mv_count == 0 || |
1147 | 0 | mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false)) { |
1148 | 0 | uint8_t drl_index = 0; |
1149 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
1150 | 0 | svt_aom_choose_best_av1_mv_pred( |
1151 | 0 | ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv); |
1152 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) { |
1153 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
1154 | 0 | cand->block_mi.use_intrabc = 0; |
1155 | 0 | cand->skip_mode_allowed = false; |
1156 | 0 | cand->block_mi.mode = NEWMV; |
1157 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1158 | 0 | cand->block_mi.is_interintra_used = 0; |
1159 | 0 | cand->drl_index = drl_index; |
1160 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
1161 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1162 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1163 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
1164 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[to_inject_ref_type].num; |
1165 | |
|
1166 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
1167 | |
|
1168 | 0 | const bool enable_ii = true; |
1169 | | // OBMC and WM perform a refinement search around the ME MV, so they are not injected as unipred3x3 candidates, |
1170 | | // since this is effectively a refinement search |
1171 | 0 | const bool enable_obmc = false; |
1172 | 0 | const bool enable_warp = false; |
1173 | 0 | inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc); |
1174 | |
|
1175 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
1176 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
1177 | 0 | ++ctx->injected_mv_count; |
1178 | 0 | } |
1179 | 0 | } |
1180 | 0 | } |
1181 | 0 | } |
1182 | | |
1183 | | // update the total number of candidates injected |
1184 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
1185 | |
|
1186 | 0 | return; |
1187 | 0 | } |
1188 | | |
1189 | | static void bipred_3x3_candidates_injection(PictureControlSet* pcs, ModeDecisionContext* ctx, |
1190 | 0 | uint32_t* candidate_total_cnt) { |
1191 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
1192 | 0 | const uint8_t allow_high_precision_mv = pcs->ppcs->frm_hdr.allow_high_precision_mv; |
1193 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr]; |
1194 | 0 | const uint8_t total_me_cnt = me_results->total_me_candidate_index[ctx->me_block_offset]; |
1195 | 0 | const MeCandidate* me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset]; |
1196 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
1197 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
1198 | | |
1199 | | /************** |
1200 | | NEW_NEWMV |
1201 | | ************* */ |
1202 | 0 | for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) { |
1203 | 0 | const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index]; |
1204 | 0 | const uint8_t inter_direction = me_block_results_ptr->direction; |
1205 | 0 | const uint8_t list0_ref_index = me_block_results_ptr->ref_idx_l0; |
1206 | 0 | const uint8_t list1_ref_index = me_block_results_ptr->ref_idx_l1; |
1207 | 0 | if (inter_direction < BI_PRED) { |
1208 | 0 | continue; |
1209 | 0 | } |
1210 | 0 | assert(inter_direction == BI_PRED); |
1211 | |
|
1212 | 0 | const uint8_t ref0_list = me_block_results_ptr->ref0_list; |
1213 | 0 | const uint8_t ref1_list = me_block_results_ptr->ref1_list; |
1214 | 0 | if (!is_valid_bipred_ref(ctx, BI_3x3_GROUP, ref0_list, list0_ref_index, ref1_list, list1_ref_index)) { |
1215 | 0 | continue; |
1216 | 0 | } |
1217 | | |
1218 | 0 | int8_t best_list = -1; |
1219 | 0 | int diff = ((int)ctx->post_subpel_me_mv_cost[ref0_list][list0_ref_index] - |
1220 | 0 | (int)ctx->post_subpel_me_mv_cost[ref1_list][list1_ref_index]) * |
1221 | 0 | 100; |
1222 | |
|
1223 | 0 | if (ctx->bipred3x3_ctrls.use_l0_l1_dev != (uint8_t)~0) { |
1224 | 0 | if (abs(diff) > |
1225 | 0 | (ctx->bipred3x3_ctrls.use_l0_l1_dev * (int)ctx->post_subpel_me_mv_cost[ref0_list][list0_ref_index])) { |
1226 | 0 | return; |
1227 | 0 | } |
1228 | 0 | } |
1229 | | |
1230 | | // Best list in terms of distortion reduction |
1231 | 0 | if (ctx->bipred3x3_ctrls.use_best_list) { |
1232 | 0 | best_list = ref0_list; |
1233 | 0 | if (diff > 0) { |
1234 | 0 | best_list = ref1_list; |
1235 | 0 | } |
1236 | 0 | } |
1237 | |
|
1238 | 0 | MvReferenceFrame rf[2] = {svt_get_ref_frame_type(ref0_list, list0_ref_index), |
1239 | 0 | svt_get_ref_frame_type(ref1_list, list1_ref_index)}; |
1240 | 0 | const uint8_t to_inject_ref_type = av1_ref_frame_type(rf); |
1241 | 0 | if (best_list == -1 || best_list == ref0_list) { |
1242 | | // (Best_L0, 8 Best_L1 neighbors) |
1243 | 0 | for (uint32_t bipred_index = 0; bipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++bipred_index) { |
1244 | 0 | if (!ctx->bipred3x3_ctrls.search_diag) { |
1245 | 0 | if (allow_refinement_flag[bipred_index] == 0) { |
1246 | 0 | continue; |
1247 | 0 | } |
1248 | 0 | } |
1249 | 0 | Mv to_inj_mv0 = ctx->sb_me_mv[ref0_list][list0_ref_index]; |
1250 | 0 | Mv to_inj_mv1 = ctx->sb_me_mv[ref1_list][list1_ref_index]; |
1251 | 0 | to_inj_mv1.x += (bipred_3x3_x_pos[bipred_index] << !allow_high_precision_mv); |
1252 | 0 | to_inj_mv1.y += (bipred_3x3_y_pos[bipred_index] << !allow_high_precision_mv); |
1253 | 0 | if ((ctx->injected_mv_count == 0 || |
1254 | 0 | mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) { |
1255 | 0 | uint8_t drl_index = 0; |
1256 | 0 | svt_aom_choose_best_av1_mv_pred( |
1257 | 0 | ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv); |
1258 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) { |
1259 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
1260 | 0 | cand->block_mi.use_intrabc = 0; |
1261 | 0 | cand->skip_mode_allowed = false; |
1262 | 0 | cand->drl_index = drl_index; |
1263 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1264 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1265 | 0 | cand->block_mi.mode = NEW_NEWMV; |
1266 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1267 | 0 | cand->block_mi.is_interintra_used = 0; |
1268 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1269 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1270 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
1271 | 0 | cand->pred_mv[1].as_int = best_pred_mv[1].as_int; |
1272 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
1273 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
1274 | |
|
1275 | 0 | if (ctx->inter_comp_ctrls.do_3x3_bi) { |
1276 | 0 | ctx->cmp_store.pred0_cnt = 0; |
1277 | 0 | ctx->cmp_store.pred1_cnt = 0; |
1278 | 0 | inj_comp_modes(pcs, ctx, &cand_total_cnt); |
1279 | 0 | } |
1280 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1281 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1282 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
1283 | 0 | ++ctx->injected_mv_count; |
1284 | 0 | } |
1285 | 0 | } |
1286 | 0 | } |
1287 | 0 | } |
1288 | 0 | if (best_list == -1 || best_list == ref1_list) { |
1289 | | // (8 Best_L0 neighbors, Best_L1) : |
1290 | 0 | for (uint32_t bipred_index = 0; bipred_index < BIPRED_3x3_REFINMENT_POSITIONS; ++bipred_index) { |
1291 | 0 | if (!ctx->bipred3x3_ctrls.search_diag) { |
1292 | 0 | if (allow_refinement_flag[bipred_index] == 0) { |
1293 | 0 | continue; |
1294 | 0 | } |
1295 | 0 | } |
1296 | 0 | Mv to_inj_mv0 = ctx->sb_me_mv[ref0_list][list0_ref_index]; |
1297 | 0 | to_inj_mv0.x += (bipred_3x3_x_pos[bipred_index] << !allow_high_precision_mv); |
1298 | 0 | to_inj_mv0.y += (bipred_3x3_y_pos[bipred_index] << !allow_high_precision_mv); |
1299 | 0 | Mv to_inj_mv1 = ctx->sb_me_mv[ref1_list][list1_ref_index]; |
1300 | 0 | if ((ctx->injected_mv_count == 0 || |
1301 | 0 | mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) { |
1302 | 0 | uint8_t drl_index = 0; |
1303 | 0 | svt_aom_choose_best_av1_mv_pred( |
1304 | 0 | ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv); |
1305 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) { |
1306 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
1307 | 0 | cand->block_mi.use_intrabc = 0; |
1308 | 0 | cand->skip_mode_allowed = false; |
1309 | 0 | cand->drl_index = drl_index; |
1310 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1311 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1312 | 0 | cand->block_mi.mode = NEW_NEWMV; |
1313 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1314 | 0 | cand->block_mi.is_interintra_used = 0; |
1315 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1316 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1317 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
1318 | 0 | cand->pred_mv[1].as_int = best_pred_mv[1].as_int; |
1319 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
1320 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
1321 | |
|
1322 | 0 | if (ctx->inter_comp_ctrls.do_3x3_bi) { |
1323 | 0 | ctx->cmp_store.pred0_cnt = 0; |
1324 | 0 | ctx->cmp_store.pred1_cnt = 0; |
1325 | 0 | inj_comp_modes(pcs, ctx, &cand_total_cnt); |
1326 | 0 | } |
1327 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1328 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1329 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
1330 | 0 | ++ctx->injected_mv_count; |
1331 | 0 | } |
1332 | 0 | } |
1333 | 0 | } |
1334 | 0 | } |
1335 | 0 | } |
1336 | | |
1337 | | // update the total number of candidates injected |
1338 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
1339 | |
|
1340 | 0 | return; |
1341 | 0 | } |
1342 | | |
1343 | | /********************************************************************* |
1344 | | ********************************************************************** |
1345 | | Upto 12 inter Candidated injected |
1346 | | Min 6 inter Candidated injected |
1347 | | UniPred L0 : NEARST + upto 3x NEAR |
1348 | | UniPred L1 : NEARST + upto 3x NEAR |
1349 | | BIPred : NEARST_NEARST + upto 3x NEAR_NEAR |
1350 | | ********************************************************************** |
1351 | | **********************************************************************/ |
1352 | | static void inject_mvp_candidates_ii_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candTotCnt, |
1353 | 0 | const bool allow_bipred) { |
1354 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
1355 | 0 | uint32_t cand_idx = *candTotCnt; |
1356 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
1357 | 0 | MacroBlockD* xd = ctx->blk_ptr->av1xd; |
1358 | | |
1359 | | //all of ref pairs: (1)single-ref List0 (2)single-ref List1 (3)compound Bi-Dir List0-List1 |
1360 | 0 | for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) { |
1361 | 0 | MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it]; |
1362 | 0 | MvReferenceFrame rf[2]; |
1363 | 0 | av1_set_ref_frame(rf, ref_pair); |
1364 | | |
1365 | | //single ref/list |
1366 | 0 | if (rf[1] == NONE_FRAME) { |
1367 | 0 | MvReferenceFrame frame_type = rf[0]; |
1368 | 0 | uint8_t list_idx = get_list_idx(rf[0]); |
1369 | 0 | if (ctx->cand_reduction_ctrls.lpd1_mvp_best_me_list) { |
1370 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr]; |
1371 | 0 | const uint8_t total_me_cnt = me_results->total_me_candidate_index[ctx->me_block_offset]; |
1372 | 0 | const MeCandidate* me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset]; |
1373 | 0 | const MeCandidate* me_block_results_ptr = &me_block_results[0]; |
1374 | 0 | const uint8_t inter_direction = me_block_results_ptr->direction; |
1375 | 0 | if (total_me_cnt && list_idx != inter_direction) { |
1376 | 0 | continue; |
1377 | 0 | } |
1378 | 0 | } |
1379 | | //NEAREST |
1380 | | // Don't check if MV is already injected b/c NEAREST is the first INTER MV injected |
1381 | 0 | Mv to_inj_mv = {.as_int = ctx->ref_mv_stack[frame_type][0].this_mv.as_int}; |
1382 | |
|
1383 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1384 | 0 | cand->block_mi.mode = NEARESTMV; |
1385 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1386 | 0 | cand->skip_mode_allowed = false; |
1387 | 0 | cand->drl_index = 0; |
1388 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1389 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1390 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
1391 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[frame_type].num; |
1392 | 0 | cand->block_mi.use_intrabc = 0; |
1393 | 0 | cand->block_mi.is_interintra_used = 0; |
1394 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1395 | |
|
1396 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
1397 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = frame_type; |
1398 | 0 | ++ctx->injected_mv_count; |
1399 | | //NEAR |
1400 | 0 | const uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[frame_type], NEARMV); |
1401 | 0 | uint8_t cap_max_drl_index = 0; |
1402 | 0 | if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) { |
1403 | 0 | cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_count, max_drl_index); |
1404 | 0 | } |
1405 | 0 | for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) { |
1406 | 0 | to_inj_mv.as_int = ctx->ref_mv_stack[frame_type][1 + drli].this_mv.as_int; |
1407 | |
|
1408 | 0 | if ((ctx->injected_mv_count == 0 || |
1409 | 0 | mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) { |
1410 | 0 | cand = &cand_array[cand_idx]; |
1411 | 0 | cand->block_mi.mode = NEARMV; |
1412 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1413 | 0 | cand->skip_mode_allowed = false; |
1414 | 0 | cand->drl_index = drli; |
1415 | 0 | cand->block_mi.use_intrabc = 0; |
1416 | 0 | cand->block_mi.is_interintra_used = 0; |
1417 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1418 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1419 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
1420 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[frame_type].num; |
1421 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1422 | |
|
1423 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
1424 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = frame_type; |
1425 | 0 | ++ctx->injected_mv_count; |
1426 | 0 | } |
1427 | 0 | } |
1428 | 0 | } else if (allow_bipred) { |
1429 | | //NEAREST_NEAREST |
1430 | | // Don't check if MV is already injected b/c NEAREST_NEAREST is the first bipred INTER candidate injected |
1431 | 0 | Mv to_inj_mv0 = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int}; |
1432 | 0 | Mv to_inj_mv1 = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int}; |
1433 | 0 | const bool is_skip_mode = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && |
1434 | 0 | frm_hdr->skip_mode_params.skip_mode_flag && (rf[0] == frm_hdr->skip_mode_params.ref_frame_idx_0) && |
1435 | 0 | (rf[1] == frm_hdr->skip_mode_params.ref_frame_idx_1); |
1436 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1437 | 0 | cand->block_mi.mode = NEAREST_NEARESTMV; |
1438 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1439 | 0 | cand->skip_mode_allowed = is_skip_mode; |
1440 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1441 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1442 | 0 | cand->drl_index = 0; |
1443 | 0 | cand->block_mi.use_intrabc = 0; |
1444 | 0 | cand->block_mi.is_interintra_used = 0; |
1445 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1446 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1447 | 0 | cand->block_mi.comp_group_idx = 0; |
1448 | 0 | cand->block_mi.compound_idx = 1; |
1449 | 0 | cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE; |
1450 | |
|
1451 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1452 | |
|
1453 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1454 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1455 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = ref_pair; |
1456 | 0 | ++ctx->injected_mv_count; |
1457 | | |
1458 | | //NEAR_NEAR |
1459 | 0 | const uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEARMV); |
1460 | 0 | uint8_t cap_max_drl_index = 0; |
1461 | 0 | if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) { |
1462 | 0 | cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_near_count, max_drl_index); |
1463 | 0 | } |
1464 | 0 | for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) { |
1465 | 0 | to_inj_mv0.as_int = ctx->ref_mv_stack[ref_pair][1 + drli].this_mv.as_int; |
1466 | 0 | to_inj_mv1.as_int = ctx->ref_mv_stack[ref_pair][1 + drli].comp_mv.as_int; |
1467 | 0 | if ((ctx->injected_mv_count == 0 || |
1468 | 0 | mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) { |
1469 | 0 | cand = &cand_array[cand_idx]; |
1470 | 0 | cand->block_mi.mode = NEAR_NEARMV; |
1471 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1472 | 0 | cand->skip_mode_allowed = false; |
1473 | 0 | cand->block_mi.use_intrabc = 0; |
1474 | 0 | cand->block_mi.is_interintra_used = 0; |
1475 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1476 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1477 | 0 | cand->drl_index = drli; |
1478 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1479 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1480 | 0 | cand->block_mi.comp_group_idx = 0; |
1481 | 0 | cand->block_mi.compound_idx = 1; |
1482 | 0 | cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE; |
1483 | |
|
1484 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1485 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1486 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1487 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = ref_pair; |
1488 | 0 | ++ctx->injected_mv_count; |
1489 | 0 | } |
1490 | 0 | } |
1491 | 0 | } |
1492 | 0 | } |
1493 | | //update tot Candidate count |
1494 | 0 | *candTotCnt = cand_idx; |
1495 | 0 | } |
1496 | | |
1497 | | /********************************************************************* |
1498 | | ********************************************************************** |
1499 | | Upto 12 inter Candidated injected |
1500 | | Min 6 inter Candidated injected |
1501 | | UniPred L0 : NEARST + upto 3x NEAR |
1502 | | UniPred L1 : NEARST + upto 3x NEAR |
1503 | | BIPred : NEARST_NEARST + upto 3x NEAR_NEAR |
1504 | | ********************************************************************** |
1505 | | **********************************************************************/ |
1506 | | static void inject_mvp_candidates_ii(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* cand_total_cnt, |
1507 | 0 | const bool allow_bipred) { |
1508 | 0 | BlkStruct* blk_ptr = ctx->blk_ptr; |
1509 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
1510 | 0 | uint32_t cand_idx = *cand_total_cnt; |
1511 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
1512 | 0 | MacroBlockD* xd = blk_ptr->av1xd; |
1513 | 0 | Mv nearestmv[2], nearmv[2], ref_mv[2]; |
1514 | | |
1515 | | //all of ref pairs: (1)single-ref List0 (2)single-ref List1 (3)compound Bi-Dir List0-List1 (4)compound Uni-Dir List0-List0 (5)compound Uni-Dir List1-List1 |
1516 | 0 | for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) { |
1517 | 0 | MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it]; |
1518 | 0 | MvReferenceFrame rf[2]; |
1519 | 0 | av1_set_ref_frame(rf, ref_pair); |
1520 | | //single ref/list |
1521 | 0 | if (rf[1] == NONE_FRAME) { |
1522 | 0 | MvReferenceFrame frame_type = rf[0]; |
1523 | 0 | uint8_t list_idx = get_list_idx(rf[0]); |
1524 | 0 | uint8_t ref_idx = get_ref_frame_idx(rf[0]); |
1525 | | // Always consider the 2 closet ref frames (i.e. ref_idx=0) @ MVP cand generation |
1526 | 0 | if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEAR_GROUP), list_idx, ref_idx)) { |
1527 | 0 | continue; |
1528 | 0 | } |
1529 | | //NEAREST |
1530 | 0 | Mv to_inj_mv = {.as_int = ctx->ref_mv_stack[frame_type][0].this_mv.as_int}; |
1531 | 0 | if ((ctx->injected_mv_count == 0 || |
1532 | 0 | mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) { |
1533 | 0 | assert(list_idx == 0 || list_idx == 1); |
1534 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1535 | 0 | cand->block_mi.mode = NEARESTMV; |
1536 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1537 | 0 | cand->block_mi.use_intrabc = 0; |
1538 | 0 | cand->skip_mode_allowed = false; |
1539 | 0 | cand->drl_index = 0; |
1540 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1541 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1542 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
1543 | 0 | cand->block_mi.is_interintra_used = 0; |
1544 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[frame_type].num; |
1545 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1546 | |
|
1547 | 0 | const bool enable_ii = true; |
1548 | 0 | const bool enable_obmc = true; |
1549 | 0 | const bool enable_warp = ctx->wm_ctrls.use_wm_for_mvp ? true : false; |
1550 | 0 | inj_non_simple_modes(pcs, ctx, &cand_idx, enable_ii, enable_warp, enable_obmc); |
1551 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
1552 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = frame_type; |
1553 | 0 | ++ctx->injected_mv_count; |
1554 | 0 | } |
1555 | | |
1556 | | //NEAR |
1557 | 0 | const uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[frame_type], NEARMV); |
1558 | 0 | uint8_t cap_max_drl_index = 0; |
1559 | 0 | if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) { |
1560 | 0 | cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_count, max_drl_index); |
1561 | 0 | } |
1562 | 0 | for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) { |
1563 | 0 | svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, frame_type, 0, NEARMV, drli, nearestmv, nearmv, ref_mv); |
1564 | |
|
1565 | 0 | to_inj_mv.as_int = nearmv[0].as_int; |
1566 | 0 | if ((ctx->injected_mv_count == 0 || |
1567 | 0 | mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) { |
1568 | 0 | assert(list_idx == 0 || list_idx == 1); |
1569 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1570 | 0 | cand->block_mi.mode = NEARMV; |
1571 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1572 | 0 | cand->block_mi.use_intrabc = 0; |
1573 | 0 | cand->skip_mode_allowed = false; |
1574 | 0 | cand->drl_index = drli; |
1575 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1576 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1577 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
1578 | 0 | cand->block_mi.is_interintra_used = 0; |
1579 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[frame_type].num; |
1580 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1581 | |
|
1582 | 0 | const bool enable_ii = true; |
1583 | 0 | const bool enable_obmc = true; |
1584 | 0 | const bool enable_warp = ctx->wm_ctrls.use_wm_for_mvp ? true : false; |
1585 | 0 | inj_non_simple_modes(pcs, ctx, &cand_idx, enable_ii, enable_warp, enable_obmc); |
1586 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
1587 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = frame_type; |
1588 | 0 | ++ctx->injected_mv_count; |
1589 | 0 | } |
1590 | 0 | } |
1591 | 0 | } else if (allow_bipred) { |
1592 | 0 | const uint8_t ref_idx_0 = get_ref_frame_idx(rf[0]); |
1593 | 0 | const uint8_t ref_idx_1 = get_ref_frame_idx(rf[1]); |
1594 | |
|
1595 | 0 | const uint8_t list_idx_0 = get_list_idx(rf[0]); |
1596 | 0 | const uint8_t list_idx_1 = get_list_idx(rf[1]); |
1597 | |
|
1598 | 0 | ctx->cmp_store.pred0_cnt = 0; |
1599 | 0 | ctx->cmp_store.pred1_cnt = 0; |
1600 | | |
1601 | | // Always consider the 2 closet ref frames (i.e. ref_idx=0) @ MVP cand generation |
1602 | 0 | if (!is_valid_bipred_ref(ctx, NRST_NEAR_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) { |
1603 | 0 | continue; |
1604 | 0 | } |
1605 | | |
1606 | | //NEAREST_NEAREST |
1607 | 0 | Mv to_inj_mv0 = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int}; |
1608 | 0 | Mv to_inj_mv1 = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int}; |
1609 | 0 | if ((ctx->injected_mv_count == 0 || |
1610 | 0 | mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) { |
1611 | 0 | const bool is_skip_mode = !svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && |
1612 | 0 | frm_hdr->skip_mode_params.skip_mode_flag && (rf[0] == frm_hdr->skip_mode_params.ref_frame_idx_0) && |
1613 | 0 | (rf[1] == frm_hdr->skip_mode_params.ref_frame_idx_1); |
1614 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1615 | 0 | cand->block_mi.mode = NEAREST_NEARESTMV; |
1616 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1617 | 0 | cand->block_mi.is_interintra_used = 0; |
1618 | 0 | cand->block_mi.use_intrabc = 0; |
1619 | 0 | cand->skip_mode_allowed = /*cur_type == MD_COMP_AVG &&*/ is_skip_mode ? true : false; |
1620 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1621 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1622 | 0 | cand->drl_index = 0; |
1623 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1624 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1625 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
1626 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1627 | |
|
1628 | 0 | if (ctx->inter_comp_ctrls.do_nearest_nearest) { |
1629 | | // Don't reset ctx->cmp_store.pred0_cnt for MVP |
1630 | 0 | inj_comp_modes(pcs, ctx, &cand_idx); |
1631 | 0 | } |
1632 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1633 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1634 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = ref_pair; |
1635 | 0 | ++ctx->injected_mv_count; |
1636 | 0 | } |
1637 | | |
1638 | | //NEAR_NEAR |
1639 | 0 | const uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEARMV); |
1640 | 0 | uint8_t cap_max_drl_index = 0; |
1641 | 0 | if (ctx->cand_reduction_ctrls.near_count_ctrls.enabled) { |
1642 | 0 | cap_max_drl_index = MIN(ctx->cand_reduction_ctrls.near_count_ctrls.near_near_count, max_drl_index); |
1643 | 0 | } |
1644 | 0 | for (uint8_t drli = 0; drli < cap_max_drl_index; drli++) { |
1645 | 0 | svt_aom_get_av1_mv_pred_drl(ctx, blk_ptr, ref_pair, 1, NEAR_NEARMV, drli, nearestmv, nearmv, ref_mv); |
1646 | |
|
1647 | 0 | to_inj_mv0.as_int = nearmv[0].as_int; |
1648 | 0 | to_inj_mv1.as_int = nearmv[1].as_int; |
1649 | 0 | if ((ctx->injected_mv_count == 0 || |
1650 | 0 | mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair) == false)) { |
1651 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1652 | 0 | cand->block_mi.mode = NEAR_NEARMV; |
1653 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1654 | 0 | cand->block_mi.is_interintra_used = 0; |
1655 | 0 | cand->block_mi.use_intrabc = 0; |
1656 | 0 | cand->skip_mode_allowed = false; |
1657 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1658 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1659 | 0 | cand->drl_index = drli; |
1660 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1661 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1662 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
1663 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1664 | |
|
1665 | 0 | if (ctx->inter_comp_ctrls.do_near_near) { |
1666 | | // Don't reset ctx->cmp_store.pred0_cnt for MVP |
1667 | 0 | inj_comp_modes(pcs, ctx, &cand_idx); |
1668 | 0 | } |
1669 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1670 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1671 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = ref_pair; |
1672 | 0 | ++ctx->injected_mv_count; |
1673 | 0 | } |
1674 | 0 | } |
1675 | 0 | } |
1676 | 0 | } |
1677 | | //update tot Candidate count |
1678 | 0 | *cand_total_cnt = cand_idx; |
1679 | 0 | } |
1680 | | |
1681 | | static void inject_new_nearest_new_comb_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, |
1682 | 0 | uint32_t* cand_tot_cnt) { |
1683 | 0 | uint32_t cand_idx = *cand_tot_cnt; |
1684 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
1685 | 0 | MacroBlockD* xd = ctx->blk_ptr->av1xd; |
1686 | 0 | Mv nearestmv[2], nearmv[2], ref_mv[2]; |
1687 | | |
1688 | | //all of ref pairs: (1)single-ref List0 (2)single-ref List1 (3)compound Bi-Dir List0-List1 (4)compound Uni-Dir List0-List0 (5)compound Uni-Dir List1-List1 |
1689 | 0 | for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) { |
1690 | 0 | MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it]; |
1691 | 0 | MvReferenceFrame rf[2]; |
1692 | 0 | av1_set_ref_frame(rf, ref_pair); |
1693 | 0 | if (rf[1] != NONE_FRAME) { |
1694 | 0 | const uint8_t ref_idx_0 = get_ref_frame_idx(rf[0]); |
1695 | 0 | const uint8_t ref_idx_1 = get_ref_frame_idx(rf[1]); |
1696 | 0 | const uint8_t list_idx_0 = get_list_idx(rf[0]); |
1697 | 0 | const uint8_t list_idx_1 = get_list_idx(rf[1]); |
1698 | 0 | if (!svt_aom_is_valid_unipred_ref( |
1699 | 0 | ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEW_NEAR_GROUP), list_idx_0, ref_idx_0) || |
1700 | 0 | !svt_aom_is_valid_unipred_ref( |
1701 | 0 | ctx, MIN(TOT_INTER_GROUP - 1, NRST_NEW_NEAR_GROUP), list_idx_1, ref_idx_1)) { |
1702 | 0 | continue; |
1703 | 0 | } |
1704 | | |
1705 | 0 | { |
1706 | | //NEAREST_NEWMV |
1707 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr]; |
1708 | 0 | Mv to_inj_mv0 = {.as_int = ctx->ref_mv_stack[ref_pair][0].this_mv.as_int}; |
1709 | 0 | Mv to_inj_mv1 = ctx->sb_me_mv[list_idx_1][ref_idx_1]; |
1710 | 0 | bool inj_mv = |
1711 | 0 | (ctx->injected_mv_count == 0 || !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) && |
1712 | 0 | svt_aom_is_me_data_present( |
1713 | 0 | ctx->me_block_offset, ctx->me_cand_offset, me_results, get_list_idx(rf[1]), ref_idx_1); |
1714 | 0 | if (inj_mv) { |
1715 | 0 | svt_aom_get_av1_mv_pred_drl(ctx, |
1716 | 0 | ctx->blk_ptr, |
1717 | 0 | ref_pair, |
1718 | 0 | 1, // is_compound |
1719 | 0 | NEAREST_NEWMV, |
1720 | 0 | 0, //not needed drli, |
1721 | 0 | nearestmv, |
1722 | 0 | nearmv, |
1723 | 0 | ref_mv); |
1724 | |
|
1725 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1726 | 0 | cand->block_mi.mode = NEAREST_NEWMV; |
1727 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1728 | 0 | cand->block_mi.is_interintra_used = 0; |
1729 | 0 | cand->block_mi.use_intrabc = 0; |
1730 | 0 | cand->skip_mode_allowed = false; |
1731 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1732 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1733 | 0 | cand->drl_index = 0; |
1734 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1735 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1736 | 0 | cand->pred_mv[1].as_int = ref_mv[1].as_int; |
1737 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
1738 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1739 | |
|
1740 | 0 | if (ctx->inter_comp_ctrls.do_nearest_near_new) { |
1741 | 0 | ctx->cmp_store.pred0_cnt = 0; |
1742 | 0 | ctx->cmp_store.pred1_cnt = 0; |
1743 | 0 | inj_comp_modes(pcs, ctx, &cand_idx); |
1744 | 0 | } |
1745 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1746 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1747 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = ref_pair; |
1748 | 0 | ++ctx->injected_mv_count; |
1749 | 0 | } |
1750 | 0 | } |
1751 | |
|
1752 | 0 | { |
1753 | | //NEW_NEARESTMV |
1754 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr]; |
1755 | 0 | Mv to_inj_mv0 = ctx->sb_me_mv[list_idx_0][ref_idx_0]; |
1756 | 0 | Mv to_inj_mv1 = {.as_int = ctx->ref_mv_stack[ref_pair][0].comp_mv.as_int}; |
1757 | 0 | bool inj_mv = (ctx->injected_mv_count == 0 || |
1758 | 0 | !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) && |
1759 | 0 | svt_aom_is_me_data_present(ctx->me_block_offset, ctx->me_cand_offset, me_results, 0, ref_idx_0); |
1760 | 0 | if (inj_mv) { |
1761 | 0 | svt_aom_get_av1_mv_pred_drl(ctx, |
1762 | 0 | ctx->blk_ptr, |
1763 | 0 | ref_pair, |
1764 | 0 | 1, // is_compound |
1765 | 0 | NEW_NEARESTMV, |
1766 | 0 | 0, //not needed drli, |
1767 | 0 | nearestmv, |
1768 | 0 | nearmv, |
1769 | 0 | ref_mv); |
1770 | |
|
1771 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1772 | 0 | cand->block_mi.mode = NEW_NEARESTMV; |
1773 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1774 | 0 | cand->block_mi.is_interintra_used = 0; |
1775 | 0 | cand->block_mi.use_intrabc = 0; |
1776 | 0 | cand->skip_mode_allowed = false; |
1777 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1778 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1779 | 0 | cand->drl_index = 0; |
1780 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1781 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1782 | 0 | cand->pred_mv[0].as_int = ref_mv[0].as_int; |
1783 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
1784 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1785 | |
|
1786 | 0 | if (ctx->inter_comp_ctrls.do_nearest_near_new) { |
1787 | 0 | ctx->cmp_store.pred0_cnt = 0; |
1788 | 0 | ctx->cmp_store.pred1_cnt = 0; |
1789 | 0 | inj_comp_modes(pcs, ctx, &cand_idx); |
1790 | 0 | } |
1791 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1792 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1793 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = ref_pair; |
1794 | 0 | ++ctx->injected_mv_count; |
1795 | 0 | } |
1796 | 0 | } |
1797 | | // For level 2, only inject NEAREST_NEW/NEW_NEAREST candidates |
1798 | 0 | if (ctx->new_nearest_near_comb_injection >= 2) { |
1799 | 0 | continue; |
1800 | 0 | } |
1801 | | |
1802 | | //NEW_NEARMV |
1803 | 0 | { |
1804 | 0 | const uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEW_NEARMV); |
1805 | |
|
1806 | 0 | for (uint8_t drli = 0; drli < max_drl_index; drli++) { |
1807 | 0 | svt_aom_get_av1_mv_pred_drl( |
1808 | 0 | ctx, ctx->blk_ptr, ref_pair, 1, NEW_NEARMV, drli, nearestmv, nearmv, ref_mv); |
1809 | | |
1810 | | //NEW_NEARMV |
1811 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr]; |
1812 | 0 | Mv to_inj_mv0 = ctx->sb_me_mv[list_idx_0][ref_idx_0]; |
1813 | 0 | Mv to_inj_mv1 = {.as_int = nearmv[1].as_int}; |
1814 | 0 | bool inj_mv = (ctx->injected_mv_count == 0 || |
1815 | 0 | !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) && |
1816 | 0 | svt_aom_is_me_data_present(ctx->me_block_offset, ctx->me_cand_offset, me_results, 0, ref_idx_0); |
1817 | 0 | if (inj_mv) { |
1818 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1819 | 0 | cand->block_mi.mode = NEW_NEARMV; |
1820 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1821 | 0 | cand->block_mi.is_interintra_used = 0; |
1822 | 0 | cand->block_mi.use_intrabc = 0; |
1823 | 0 | cand->skip_mode_allowed = false; |
1824 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1825 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1826 | 0 | cand->drl_index = drli; |
1827 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1828 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1829 | 0 | cand->pred_mv[0].as_int = ref_mv[0].as_int; |
1830 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
1831 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1832 | |
|
1833 | 0 | if (ctx->inter_comp_ctrls.do_nearest_near_new) { |
1834 | 0 | ctx->cmp_store.pred0_cnt = 0; |
1835 | 0 | ctx->cmp_store.pred1_cnt = 0; |
1836 | 0 | inj_comp_modes(pcs, ctx, &cand_idx); |
1837 | 0 | } |
1838 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1839 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1840 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = ref_pair; |
1841 | 0 | ++ctx->injected_mv_count; |
1842 | 0 | } |
1843 | 0 | } |
1844 | 0 | } |
1845 | | //NEAR_NEWMV |
1846 | 0 | { |
1847 | 0 | uint8_t max_drl_index = svt_aom_get_max_drl_index(xd->ref_mv_count[ref_pair], NEAR_NEWMV); |
1848 | |
|
1849 | 0 | for (uint8_t drli = 0; drli < max_drl_index; drli++) { |
1850 | 0 | svt_aom_get_av1_mv_pred_drl( |
1851 | 0 | ctx, ctx->blk_ptr, ref_pair, 1, NEAR_NEWMV, drli, nearestmv, nearmv, ref_mv); |
1852 | | |
1853 | | //NEAR_NEWMV |
1854 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[ctx->me_sb_addr]; |
1855 | 0 | Mv to_inj_mv0 = {.as_int = nearmv[0].as_int}; |
1856 | 0 | Mv to_inj_mv1 = ctx->sb_me_mv[list_idx_1][ref_idx_1]; |
1857 | 0 | bool inj_mv = (ctx->injected_mv_count == 0 || |
1858 | 0 | !mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, ref_pair)) && |
1859 | 0 | svt_aom_is_me_data_present( |
1860 | 0 | ctx->me_block_offset, ctx->me_cand_offset, me_results, list_idx_1, ref_idx_1); |
1861 | |
|
1862 | 0 | if (inj_mv) { |
1863 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_idx]; |
1864 | 0 | cand->block_mi.mode = NEAR_NEWMV; |
1865 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
1866 | 0 | cand->block_mi.is_interintra_used = 0; |
1867 | 0 | cand->block_mi.use_intrabc = 0; |
1868 | 0 | cand->skip_mode_allowed = false; |
1869 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
1870 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
1871 | 0 | cand->drl_index = drli; |
1872 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
1873 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
1874 | 0 | cand->pred_mv[1].as_int = ref_mv[1].as_int; |
1875 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
1876 | 0 | INC_MD_CAND_CNT(cand_idx, pcs->ppcs->max_can_count); |
1877 | |
|
1878 | 0 | if (ctx->inter_comp_ctrls.do_nearest_near_new) { |
1879 | 0 | ctx->cmp_store.pred0_cnt = 0; |
1880 | 0 | ctx->cmp_store.pred1_cnt = 0; |
1881 | 0 | inj_comp_modes(pcs, ctx, &cand_idx); |
1882 | 0 | } |
1883 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
1884 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
1885 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = ref_pair; |
1886 | 0 | ++ctx->injected_mv_count; |
1887 | 0 | } |
1888 | 0 | } |
1889 | 0 | } |
1890 | 0 | } |
1891 | 0 | } |
1892 | | //update tot Candidate count |
1893 | 0 | *cand_tot_cnt = cand_idx; |
1894 | 0 | } |
1895 | | |
1896 | | // Refine the WM MV (8 bit search). Return true if search found a valid MV; false otherwise |
1897 | | uint8_t svt_aom_wm_motion_refinement(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand, |
1898 | 0 | const bool shut_approx) { |
1899 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
1900 | 0 | const Mv neighbors[9] = { |
1901 | 0 | {{0, 0}}, {{-1, 0}}, {{0, 1}}, {{1, 0}}, {{0, -1}}, {{1, -1}}, {{1, 1}}, {{-1, 1}}, {{-1, -1}}}; |
1902 | | |
1903 | | // Set info used to get MV cost |
1904 | 0 | int* mvjcost = ctx->md_rate_est_ctx->nmv_vec_cost; |
1905 | 0 | const int** mvcost = ctx->md_rate_est_ctx->nmvcoststack; |
1906 | 0 | uint32_t full_lambda = ctx->full_lambda_md[EB_8_BIT_MD]; // 8bit only |
1907 | 0 | int error_per_bit = full_lambda >> RD_EPB_SHIFT; |
1908 | 0 | error_per_bit += (error_per_bit == 0); |
1909 | 0 | EbPictureBufferDesc* input_pic = ppcs->enhanced_pic; // 10BIT not supported |
1910 | 0 | uint32_t input_origin_index = (ctx->blk_org_y) * input_pic->y_stride + (ctx->blk_org_x); |
1911 | 0 | const AomVarianceFnPtr* fn_ptr = &svt_aom_mefn_ptr[ctx->blk_geom->bsize]; |
1912 | 0 | unsigned int sse; |
1913 | 0 | uint8_t* src_y = input_pic->y_buffer + input_origin_index; |
1914 | |
|
1915 | 0 | int mv_prec_shift = ppcs->frm_hdr.allow_high_precision_mv ? 0 : 1; |
1916 | 0 | int best_cost = INT_MAX; |
1917 | | // local WM always uses one ref - MV for ref0 stored in idx0 |
1918 | 0 | assert(cand->block_mi.ref_frame[1] == NONE_FRAME); |
1919 | 0 | Mv search_centre_mv = {.as_int = cand->block_mi.mv[0].as_int}; |
1920 | 0 | Mv best_mv = {.as_int = cand->block_mi.mv[0].as_int}; |
1921 | 0 | Mv prev_mv = {.as_int = cand->block_mi.mv[0].as_int}; |
1922 | 0 | const Mv ref_mv = {.as_int = cand->pred_mv[0].as_int}; |
1923 | |
|
1924 | 0 | int max_iterations = ctx->wm_ctrls.refinement_iterations; |
1925 | 0 | int tot_checked_pos = 0; |
1926 | 0 | uint32_t mv_record[256]; |
1927 | 0 | for (int iter = 0; iter < max_iterations; iter++) { |
1928 | | // search the (0,0) offset position only for the first search iteration |
1929 | 0 | for (int i = (iter ? 1 : 0); i < (ctx->wm_ctrls.refine_diag ? 9 : 5); i++) { |
1930 | 0 | const Mv test_mv = (Mv){{search_centre_mv.x + (neighbors[i].x << mv_prec_shift), |
1931 | 0 | search_centre_mv.y + (neighbors[i].y << mv_prec_shift)}}; |
1932 | | |
1933 | | // Don't re-test previously tested positions |
1934 | 0 | if (iter) { |
1935 | 0 | if (prev_mv.as_int == test_mv.as_int) { |
1936 | 0 | continue; |
1937 | 0 | } |
1938 | 0 | int match_found = 0; |
1939 | 0 | for (int j = 0; j < tot_checked_pos; j++) { |
1940 | 0 | if (test_mv.as_int == mv_record[j]) { |
1941 | 0 | match_found = 1; |
1942 | 0 | } |
1943 | 0 | } |
1944 | 0 | if (match_found) { |
1945 | 0 | continue; |
1946 | 0 | } |
1947 | 0 | } |
1948 | 0 | mv_record[tot_checked_pos++] = test_mv.as_int; |
1949 | 0 | uint8_t local_warp_valid = svt_aom_warped_motion_parameters(ctx, |
1950 | 0 | test_mv, |
1951 | 0 | ctx->blk_geom, |
1952 | 0 | cand->block_mi.ref_frame[0], |
1953 | 0 | &cand->wm_params_l0, |
1954 | 0 | &cand->block_mi.num_proj_ref, |
1955 | 0 | ctx->wm_ctrls.lower_band_th, |
1956 | 0 | ctx->wm_ctrls.upper_band_th, |
1957 | 0 | shut_approx); |
1958 | 0 | if (!local_warp_valid) { |
1959 | 0 | continue; |
1960 | 0 | } |
1961 | 0 | assert(cand->block_mi.ref_frame[1] == NONE_FRAME); |
1962 | 0 | EbPictureBufferDesc* ref_pic_0 = svt_aom_get_ref_pic_buffer(pcs, cand->block_mi.ref_frame[0]); |
1963 | 0 | EbPictureBufferDesc* ref_pic_1 = NULL; // will stay NULL b/c this is unipred candidate |
1964 | | |
1965 | | // update MV to be testing MV before calling prediction function |
1966 | 0 | cand->block_mi.mv[0].as_int = test_mv.as_int; |
1967 | 0 | svt_aom_inter_prediction(pcs->scs, |
1968 | 0 | pcs, |
1969 | 0 | &cand->block_mi, |
1970 | 0 | &cand->wm_params_l0, |
1971 | 0 | &cand->wm_params_l1, |
1972 | 0 | ctx->blk_ptr, |
1973 | 0 | ctx->blk_geom->bsize, |
1974 | 0 | ctx->shape, |
1975 | | // If using 8bit MD for HBD content, can't use pre-computed OBMC/II to |
1976 | | // generate conformant recon |
1977 | 0 | true, //use_precomputed_obmc - not used here |
1978 | 0 | true, //use_precomputed_ii - not used here |
1979 | 0 | ctx, |
1980 | 0 | ctx->recon_neigh_y, |
1981 | 0 | ctx->recon_neigh_cb, |
1982 | 0 | ctx->recon_neigh_cr, |
1983 | 0 | ref_pic_0, |
1984 | 0 | ref_pic_1, // this is NULL |
1985 | 0 | ctx->blk_org_x, |
1986 | 0 | ctx->blk_org_y, |
1987 | 0 | ctx->scratch_prediction_ptr, |
1988 | 0 | 0, |
1989 | 0 | 0, |
1990 | 0 | PICTURE_BUFFER_DESC_LUMA_MASK, |
1991 | 0 | EB_EIGHT_BIT, |
1992 | 0 | 0); // is_16bit_pipeline |
1993 | |
|
1994 | 0 | int var = fn_ptr->vf(ctx->scratch_prediction_ptr->y_buffer, |
1995 | 0 | ctx->scratch_prediction_ptr->y_stride, |
1996 | 0 | src_y, |
1997 | 0 | input_pic->y_stride, |
1998 | 0 | &sse); |
1999 | 0 | if (ctx->approx_inter_rate) { |
2000 | 0 | var += svt_aom_mv_err_cost_light(&test_mv, &ref_mv); |
2001 | 0 | } else { |
2002 | 0 | var += svt_aom_mv_err_cost(&test_mv, &ref_mv, mvjcost, mvcost, error_per_bit); |
2003 | 0 | } |
2004 | |
|
2005 | 0 | if (var < best_cost) { |
2006 | 0 | best_mv.as_int = test_mv.as_int; |
2007 | 0 | best_cost = var; |
2008 | 0 | } |
2009 | 0 | } |
2010 | 0 | prev_mv.as_int = search_centre_mv.as_int; |
2011 | 0 | search_centre_mv.as_int = best_mv.as_int; |
2012 | 0 | if (prev_mv.as_int == best_mv.as_int) { |
2013 | 0 | break; |
2014 | 0 | } |
2015 | 0 | } |
2016 | 0 | cand->block_mi.mv[0].as_int = best_mv.as_int; |
2017 | | |
2018 | | // Derive pred MV for best WM position |
2019 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
2020 | 0 | svt_aom_choose_best_av1_mv_pred(ctx, |
2021 | 0 | cand->block_mi.ref_frame[0], // WM only allowed for unipred cands |
2022 | 0 | cand->block_mi.mode, |
2023 | 0 | cand->block_mi.mv[0], |
2024 | 0 | (Mv){{0}}, |
2025 | 0 | &cand->drl_index, |
2026 | 0 | best_pred_mv); |
2027 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
2028 | | |
2029 | | // Check that final chosen MV is valid |
2030 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, best_mv, best_mv, 0)) { |
2031 | 0 | return 1; |
2032 | 0 | } |
2033 | | |
2034 | 0 | return 0; |
2035 | 0 | } |
2036 | | |
2037 | | static INLINE void setup_pred_plane(Buf2D* dst, BlockSize bsize, uint8_t* src, int width, int height, int stride, |
2038 | 0 | int mi_row, int mi_col, int subsampling_x, int subsampling_y) { |
2039 | | // Offset the buffer pointer |
2040 | 0 | if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1)) { |
2041 | 0 | mi_row -= 1; |
2042 | 0 | } |
2043 | 0 | if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1)) { |
2044 | 0 | mi_col -= 1; |
2045 | 0 | } |
2046 | |
|
2047 | 0 | const int x = (MI_SIZE * mi_col) >> subsampling_x; |
2048 | 0 | const int y = (MI_SIZE * mi_row) >> subsampling_y; |
2049 | 0 | dst->buf = src + (y * stride + x); // scaled_buffer_offset(x, y, stride, scale); |
2050 | 0 | dst->buf0 = src; |
2051 | 0 | dst->width = width; |
2052 | 0 | dst->height = height; |
2053 | 0 | dst->stride = stride; |
2054 | 0 | } |
2055 | | |
2056 | | void svt_av1_setup_pred_block(BlockSize bsize, Buf2D dst[MAX_PLANES], const Yv12BufferConfig* src, int mi_row, |
2057 | 0 | int mi_col) { |
2058 | 0 | dst[0].buf = src->y_buffer; |
2059 | 0 | dst[0].stride = src->y_stride; |
2060 | 0 | dst[1].buf = src->u_buffer; |
2061 | 0 | dst[2].buf = src->v_buffer; |
2062 | 0 | dst[1].stride = dst[2].stride = src->uv_stride; |
2063 | |
|
2064 | 0 | setup_pred_plane( |
2065 | 0 | dst, bsize, dst[0].buf, src->y_crop_width, src->y_crop_height, dst[0].stride, mi_row, mi_col, 0, 0); |
2066 | 0 | } |
2067 | | |
2068 | | static int sad_per_bit_lut_8[QINDEX_RANGE]; |
2069 | | static int sad_per_bit_lut_10[QINDEX_RANGE]; |
2070 | | |
2071 | | // Get the sad per bit for the relevant qindex and bit depth |
2072 | 0 | int svt_aom_get_sad_per_bit(int qidx, EbBitDepth is_hbd) { |
2073 | 0 | return is_hbd ? sad_per_bit_lut_10[qidx] : sad_per_bit_lut_8[qidx]; |
2074 | 0 | } |
2075 | | |
2076 | 2 | static void init_me_luts_bd(int* bit16lut, int range, EbBitDepth bit_depth) { |
2077 | 2 | int i; |
2078 | | // Initialize the sad lut tables using a formulaic calculation for now. |
2079 | | // This is to make it easier to resolve the impact of experimental changes |
2080 | | // to the quantizer tables. |
2081 | 514 | for (i = 0; i < range; i++) { |
2082 | 512 | const double q = svt_av1_convert_qindex_to_q(i, bit_depth); |
2083 | 512 | bit16lut[i] = (int)(0.0418 * q + 2.4107); |
2084 | 512 | } |
2085 | 2 | } |
2086 | | |
2087 | 1 | void svt_av1_init_me_luts(void) { |
2088 | 1 | init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, EB_EIGHT_BIT); |
2089 | 1 | init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, EB_TEN_BIT); |
2090 | 1 | } |
2091 | | |
2092 | | #if CONFIG_ENABLE_OBMC |
2093 | | static void single_motion_search(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand, |
2094 | | Mv best_pred_mv, IntraBcContext* x, BlockSize bsize, Mv* ref_mv, int* rate_mv, |
2095 | 0 | int refine_level) { |
2096 | 0 | bool do_full_refine = 0; |
2097 | 0 | bool do_frac_refine = 0; |
2098 | 0 | switch (refine_level) { |
2099 | 0 | case 0: |
2100 | 0 | case 1: |
2101 | 0 | case 3: |
2102 | 0 | do_full_refine = 1; |
2103 | 0 | do_frac_refine = 1; |
2104 | 0 | break; |
2105 | 0 | case 2: |
2106 | 0 | case 4: |
2107 | 0 | do_full_refine = 0; |
2108 | 0 | do_frac_refine = 1; |
2109 | 0 | break; |
2110 | 0 | default: |
2111 | 0 | break; |
2112 | 0 | } |
2113 | 0 | const Av1Common* const cm = pcs->ppcs->av1_cm; |
2114 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
2115 | | // single_motion_search supports 8bit path only |
2116 | 0 | uint32_t full_lambda = ctx->full_lambda_md[EB_8_BIT_MD]; |
2117 | |
|
2118 | 0 | x->xd = ctx->blk_ptr->av1xd; |
2119 | 0 | const int mi_row = -x->xd->mb_to_top_edge / (8 * MI_SIZE); |
2120 | 0 | const int mi_col = -x->xd->mb_to_left_edge / (8 * MI_SIZE); |
2121 | |
|
2122 | 0 | x->nmv_vec_cost = ctx->md_rate_est_ctx->nmv_vec_cost; |
2123 | 0 | x->mv_cost_stack = ctx->md_rate_est_ctx->nmvcoststack; |
2124 | | // Set up limit values for MV components. |
2125 | | // Mv beyond the range do not produce new/different prediction block. |
2126 | 0 | const int mi_width = mi_size_wide[bsize]; |
2127 | 0 | const int mi_height = mi_size_high[bsize]; |
2128 | 0 | x->mv_limits.row_min = -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND); |
2129 | 0 | x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND); |
2130 | 0 | x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND; |
2131 | 0 | x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND; |
2132 | | //set search paramters |
2133 | 0 | x->sadperbit16 = svt_aom_get_sad_per_bit(frm_hdr->quantization_params.base_q_idx, 0); |
2134 | 0 | x->errorperbit = full_lambda >> RD_EPB_SHIFT; |
2135 | 0 | x->errorperbit += (x->errorperbit == 0); |
2136 | 0 | if (do_full_refine) { |
2137 | 0 | int sadpb = x->sadperbit16; |
2138 | 0 | MvLimits tmp_mv_limits = x->mv_limits; |
2139 | | |
2140 | | // Note: MV limits are modified here. Always restore the original values |
2141 | | // after full-pixel motion search. |
2142 | 0 | svt_av1_set_mv_search_range(&x->mv_limits, ref_mv); |
2143 | |
|
2144 | 0 | Mv mvp_full = best_pred_mv; // mbmi->mv[0].as_mv; |
2145 | | |
2146 | | // TODO: should use get_fullmv_from_mv instead of shifting |
2147 | 0 | mvp_full.x >>= 3; |
2148 | 0 | mvp_full.y >>= 3; |
2149 | |
|
2150 | 0 | x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV; //D |
2151 | |
|
2152 | 0 | switch (cand->block_mi.motion_mode) { |
2153 | 0 | case OBMC_CAUSAL: |
2154 | 0 | svt_av1_obmc_full_pixel_search( |
2155 | 0 | ctx, x, &mvp_full, sadpb, &svt_aom_mefn_ptr[bsize], ref_mv, &(x->best_mv), 0); |
2156 | 0 | break; |
2157 | 0 | default: |
2158 | 0 | assert(0 && "Invalid motion mode!\n"); |
2159 | 0 | } |
2160 | | |
2161 | 0 | x->mv_limits = tmp_mv_limits; |
2162 | 0 | } else { // round-up the default |
2163 | 0 | x->best_mv.x = best_pred_mv.x >> 3; |
2164 | 0 | x->best_mv.y = best_pred_mv.y >> 3; |
2165 | 0 | } |
2166 | | |
2167 | 0 | if (do_frac_refine) { |
2168 | 0 | int dis; /* TODO: use dis in distortion calculation later. */ |
2169 | 0 | unsigned int sse1; //unused |
2170 | 0 | switch (cand->block_mi.motion_mode) { |
2171 | 0 | case OBMC_CAUSAL: |
2172 | 0 | svt_av1_find_best_obmc_sub_pixel_tree_up(ctx, |
2173 | 0 | x, |
2174 | 0 | cm, |
2175 | 0 | mi_row, |
2176 | 0 | mi_col, |
2177 | 0 | &x->best_mv, |
2178 | 0 | ref_mv, |
2179 | 0 | frm_hdr->allow_high_precision_mv, |
2180 | 0 | x->errorperbit, |
2181 | 0 | &svt_aom_mefn_ptr[bsize], |
2182 | 0 | 0, // mv.subpel_force_stop |
2183 | 0 | 2, // mv.subpel_iters_per_step |
2184 | 0 | x->nmv_vec_cost, |
2185 | 0 | x->mv_cost_stack, |
2186 | 0 | &dis, |
2187 | 0 | &sse1, |
2188 | 0 | 0, |
2189 | 0 | USE_8_TAPS); |
2190 | |
|
2191 | 0 | break; |
2192 | 0 | default: |
2193 | 0 | assert(0 && "Invalid motion mode!\n"); |
2194 | 0 | } |
2195 | 0 | } else { |
2196 | 0 | x->best_mv.x *= 8; |
2197 | 0 | x->best_mv.y *= 8; |
2198 | 0 | } |
2199 | 0 | if (ctx->approx_inter_rate) { |
2200 | 0 | *rate_mv = svt_av1_mv_bit_cost_light(&x->best_mv, ref_mv); |
2201 | 0 | } else { |
2202 | 0 | *rate_mv = svt_av1_mv_bit_cost(&x->best_mv, ref_mv, x->nmv_vec_cost, x->mv_cost_stack, MV_COST_WEIGHT); |
2203 | 0 | } |
2204 | 0 | } |
2205 | | |
2206 | | // Refine the OBMC MV (8 bit search). Return true if search found a valid MV; false otherwise |
2207 | | uint8_t svt_aom_obmc_motion_refinement(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidate* cand, |
2208 | 0 | int refine_level) { |
2209 | 0 | if (block_size_wide[ctx->blk_geom->bsize] > ctx->obmc_ctrls.max_blk_size_to_refine || |
2210 | 0 | block_size_high[ctx->blk_geom->bsize] > ctx->obmc_ctrls.max_blk_size_to_refine) { |
2211 | 0 | return 1; |
2212 | 0 | } |
2213 | | |
2214 | 0 | if (ctx->obmc_weighted_pred_ready == false) { |
2215 | 0 | int mi_row = ctx->blk_org_y >> 2; |
2216 | 0 | int mi_col = ctx->blk_org_x >> 2; |
2217 | |
|
2218 | 0 | DECLARE_ALIGNED(16, uint8_t, dst_buf1_8b[4 * MAX_PLANES * MAX_SB_SQUARE]); |
2219 | |
|
2220 | 0 | uint8_t* dst_buf2_8b = dst_buf1_8b + 2 * MAX_PLANES * MAX_SB_SQUARE; |
2221 | 0 | if (ctx->obmc_is_luma_neigh_10bit) { |
2222 | 0 | svt_aom_un_pack2d((uint16_t*)ctx->obmc_buff_0, |
2223 | 0 | ctx->blk_geom->bwidth, |
2224 | 0 | dst_buf1_8b, |
2225 | 0 | ctx->blk_geom->bwidth, |
2226 | 0 | NULL, |
2227 | 0 | ctx->blk_geom->bwidth, |
2228 | 0 | ctx->blk_geom->bwidth, |
2229 | 0 | ctx->blk_geom->bheight); |
2230 | |
|
2231 | 0 | svt_aom_un_pack2d((uint16_t*)ctx->obmc_buff_1, |
2232 | 0 | ctx->blk_geom->bwidth, |
2233 | 0 | dst_buf2_8b, |
2234 | 0 | ctx->blk_geom->bwidth, |
2235 | 0 | NULL, |
2236 | 0 | ctx->blk_geom->bwidth, |
2237 | 0 | ctx->blk_geom->bwidth, |
2238 | 0 | ctx->blk_geom->bheight); |
2239 | 0 | } |
2240 | |
|
2241 | 0 | calc_target_weighted_pred(pcs, |
2242 | 0 | ctx, |
2243 | 0 | pcs->ppcs->av1_cm, |
2244 | 0 | ctx->blk_ptr->av1xd, |
2245 | 0 | mi_row, |
2246 | 0 | mi_col, |
2247 | 0 | ctx->obmc_is_luma_neigh_10bit ? dst_buf1_8b : ctx->obmc_buff_0, |
2248 | 0 | ctx->blk_geom->bwidth, |
2249 | 0 | ctx->obmc_is_luma_neigh_10bit ? dst_buf2_8b : ctx->obmc_buff_1, |
2250 | 0 | ctx->blk_geom->bwidth); |
2251 | |
|
2252 | 0 | ctx->obmc_weighted_pred_ready = true; |
2253 | 0 | } |
2254 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
2255 | 0 | IntraBcContext x_st; |
2256 | 0 | IntraBcContext* x = &x_st; |
2257 | |
|
2258 | 0 | MacroBlockD* xd; |
2259 | 0 | xd = x->xd = ctx->blk_ptr->av1xd; |
2260 | 0 | const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE); |
2261 | 0 | const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE); |
2262 | |
|
2263 | 0 | { |
2264 | 0 | assert(cand->block_mi.ref_frame[1] == NONE_FRAME); // OBMC only allowed for unipred cands |
2265 | 0 | uint8_t ref_idx = get_ref_frame_idx(cand->block_mi.ref_frame[0]); |
2266 | 0 | uint8_t list_idx = get_list_idx(cand->block_mi.ref_frame[0]); |
2267 | |
|
2268 | 0 | assert(list_idx < MAX_NUM_OF_REF_PIC_LIST); |
2269 | 0 | EbPictureBufferDesc* reference_picture = |
2270 | 0 | ((EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr)->reference_picture; |
2271 | |
|
2272 | 0 | svt_aom_use_scaled_rec_refs_if_needed(pcs, |
2273 | 0 | pcs->ppcs->enhanced_pic, |
2274 | 0 | (EbReferenceObject*)pcs->ref_pic_ptr_array[list_idx][ref_idx]->object_ptr, |
2275 | 0 | &reference_picture, |
2276 | 0 | EB_8_BIT_MD); |
2277 | 0 | Yv12BufferConfig ref_buf; |
2278 | 0 | svt_aom_link_eb_to_aom_buffer_desc_8bit(reference_picture, &ref_buf); |
2279 | |
|
2280 | 0 | Buf2D yv12_mb[MAX_PLANES]; |
2281 | 0 | svt_av1_setup_pred_block(ctx->blk_geom->bsize, yv12_mb, &ref_buf, mi_row, mi_col); |
2282 | 0 | for (int i = 0; i < 1; ++i) { |
2283 | 0 | x->xdplane[i].pre[0] = yv12_mb[i]; //ref in ME |
2284 | 0 | } |
2285 | |
|
2286 | 0 | x->plane[0].src.buf = 0; // x->xdplane[0].pre[0]; |
2287 | 0 | x->plane[0].src.buf0 = 0; |
2288 | 0 | } |
2289 | |
|
2290 | 0 | Mv best_mv = {.as_int = cand->block_mi.mv[0].as_int}; |
2291 | 0 | int tmp_rate_mv; |
2292 | |
|
2293 | 0 | Mv ref_mv = {.as_int = cand->pred_mv[0].as_int}; |
2294 | |
|
2295 | 0 | single_motion_search(pcs, ctx, cand, best_mv, x, ctx->blk_geom->bsize, &ref_mv, &tmp_rate_mv, refine_level); |
2296 | 0 | cand->block_mi.mv[0].as_int = x->best_mv.as_int; |
2297 | 0 | svt_aom_choose_best_av1_mv_pred(ctx, |
2298 | 0 | cand->block_mi.ref_frame[0], // OBMC only allowed for unipred candidtes |
2299 | 0 | cand->block_mi.mode, |
2300 | 0 | cand->block_mi.mv[0], |
2301 | 0 | (Mv){{0}}, |
2302 | 0 | &cand->drl_index, |
2303 | 0 | best_pred_mv); |
2304 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
2305 | | // Check that final chosen MV is valid |
2306 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, cand->block_mi.mv[0], cand->block_mi.mv[0], 0)) { |
2307 | 0 | return 1; |
2308 | 0 | } |
2309 | | |
2310 | 0 | return 0; |
2311 | 0 | } |
2312 | | #endif // CONFIG_ENABLE_OBMC |
2313 | | |
2314 | | /* |
2315 | | inject ME candidates for Light PD0 |
2316 | | */ |
2317 | | static void inject_new_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx, |
2318 | 0 | uint32_t* candidate_total_cnt, const bool allow_bipred) { |
2319 | 0 | const uint32_t me_sb_addr = ctx->me_sb_addr; |
2320 | 0 | const uint32_t me_block_offset = ctx->me_block_offset; |
2321 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
2322 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
2323 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[me_sb_addr]; |
2324 | 0 | const uint8_t total_me_cnt = me_results->total_me_candidate_index[me_block_offset]; |
2325 | 0 | const MeCandidate* me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset]; |
2326 | |
|
2327 | 0 | const uint8_t max_refs = pcs->ppcs->pa_me_data->max_refs; |
2328 | 0 | const uint8_t max_l0 = pcs->ppcs->pa_me_data->max_l0; |
2329 | |
|
2330 | 0 | for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) { |
2331 | 0 | const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index]; |
2332 | 0 | const uint8_t inter_direction = me_block_results_ptr->direction; |
2333 | 0 | const uint8_t list0_ref_index = me_block_results_ptr->ref_idx_l0; |
2334 | 0 | const uint8_t list1_ref_index = me_block_results_ptr->ref_idx_l1; |
2335 | |
|
2336 | 0 | if (ctx->lpd0_ctrls.pd0_level == VERY_LIGHT_PD0 && inter_direction == BI_PRED) { |
2337 | 0 | continue; |
2338 | 0 | } |
2339 | | |
2340 | | /************** |
2341 | | NEWMV |
2342 | | ************* */ |
2343 | 0 | if (inter_direction < BI_PRED) { |
2344 | 0 | const uint8_t list_idx = inter_direction; |
2345 | 0 | const uint8_t ref_idx = inter_direction ? list1_ref_index : list0_ref_index; |
2346 | 0 | const int16_t to_inject_mv_x = |
2347 | 0 | (me_results->me_mv_array[me_block_offset * max_refs + (inter_direction ? max_l0 : 0) + ref_idx].x) * 8; |
2348 | 0 | const int16_t to_inject_mv_y = |
2349 | 0 | (me_results->me_mv_array[me_block_offset * max_refs + (inter_direction ? max_l0 : 0) + ref_idx].y) * 8; |
2350 | 0 | const uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx); |
2351 | |
|
2352 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2353 | 0 | cand->block_mi.mode = NEWMV; |
2354 | 0 | cand->block_mi.mv[0] = (Mv){{to_inject_mv_x, to_inject_mv_y}}; |
2355 | 0 | cand->block_mi.ref_frame[0] = to_inject_ref_type; |
2356 | 0 | cand->block_mi.ref_frame[1] = NONE_FRAME; |
2357 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2358 | 0 | if (cand_total_cnt > 2) { |
2359 | 0 | break; |
2360 | 0 | } |
2361 | 0 | } else if (allow_bipred) { |
2362 | 0 | assert(inter_direction == BI_PRED); |
2363 | | /************** |
2364 | | NEW_NEWMV |
2365 | | ************* */ |
2366 | 0 | const uint32_t ref0_offset = me_block_offset * max_refs + |
2367 | 0 | (me_block_results_ptr->ref0_list > 0 ? max_l0 : 0) + list0_ref_index; |
2368 | 0 | const uint32_t ref1_offset = me_block_offset * max_refs + |
2369 | 0 | (me_block_results_ptr->ref1_list > 0 ? max_l0 : 0) + list1_ref_index; |
2370 | 0 | const int16_t to_inject_mv_x_l0 = (me_results->me_mv_array[ref0_offset].x) * 8; |
2371 | 0 | const int16_t to_inject_mv_y_l0 = (me_results->me_mv_array[ref0_offset].y) * 8; |
2372 | 0 | const int16_t to_inject_mv_x_l1 = (me_results->me_mv_array[ref1_offset].x) * 8; |
2373 | 0 | const int16_t to_inject_mv_y_l1 = (me_results->me_mv_array[ref1_offset].y) * 8; |
2374 | |
|
2375 | 0 | MvReferenceFrame rf[2] = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index), |
2376 | 0 | svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)}; |
2377 | | |
2378 | | // Inject AVG candidate only |
2379 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2380 | 0 | cand->block_mi.mv[REF_LIST_0] = (Mv){{to_inject_mv_x_l0, to_inject_mv_y_l0}}; |
2381 | 0 | cand->block_mi.mv[REF_LIST_1] = (Mv){{to_inject_mv_x_l1, to_inject_mv_y_l1}}; |
2382 | 0 | cand->block_mi.mode = NEW_NEWMV; |
2383 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
2384 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
2385 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
2386 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2387 | 0 | if (cand_total_cnt > 2) { |
2388 | 0 | break; |
2389 | 0 | } |
2390 | 0 | } |
2391 | 0 | } |
2392 | | // update the total number of candidates injected |
2393 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
2394 | 0 | } |
2395 | | |
2396 | | static void inject_new_candidates_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx, |
2397 | 0 | uint32_t* candidate_total_cnt, const bool allow_bipred) { |
2398 | 0 | const uint32_t me_sb_addr = ctx->me_sb_addr; |
2399 | 0 | const uint32_t me_block_offset = ctx->me_block_offset; |
2400 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
2401 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
2402 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
2403 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[me_sb_addr]; |
2404 | 0 | const uint8_t total_me_cnt = me_results->total_me_candidate_index[me_block_offset]; |
2405 | 0 | const MeCandidate* me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset]; |
2406 | |
|
2407 | 0 | for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) { |
2408 | 0 | const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index]; |
2409 | 0 | const uint8_t inter_direction = me_block_results_ptr->direction; |
2410 | 0 | const uint8_t list0_ref_index = me_block_results_ptr->ref_idx_l0; |
2411 | 0 | const uint8_t list1_ref_index = me_block_results_ptr->ref_idx_l1; |
2412 | |
|
2413 | 0 | if (ctx->cand_reduction_ctrls.reduce_unipred_candidates >= 2) { |
2414 | 0 | if ((total_me_cnt > 1) && (inter_direction != 2)) { |
2415 | 0 | continue; |
2416 | 0 | } |
2417 | 0 | } else if (ctx->cand_reduction_ctrls.reduce_unipred_candidates) { |
2418 | 0 | if ((total_me_cnt > 3) && (inter_direction != 2)) { |
2419 | 0 | continue; |
2420 | 0 | } |
2421 | 0 | } |
2422 | | |
2423 | | /************** |
2424 | | NEWMV |
2425 | | ************* */ |
2426 | 0 | if (inter_direction < BI_PRED) { |
2427 | 0 | const uint8_t list_idx = inter_direction; |
2428 | 0 | const uint8_t ref_idx = inter_direction ? list1_ref_index : list0_ref_index; |
2429 | 0 | Mv to_inj_mv = ctx->sb_me_mv[list_idx][ref_idx]; |
2430 | 0 | const uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx); |
2431 | 0 | if (ctx->injected_mv_count == 0 || |
2432 | 0 | mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false) { |
2433 | 0 | uint8_t drl_index = 0; |
2434 | 0 | svt_aom_choose_best_av1_mv_pred( |
2435 | 0 | ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv); |
2436 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) { |
2437 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2438 | 0 | cand->block_mi.use_intrabc = 0; |
2439 | 0 | cand->block_mi.is_interintra_used = 0; |
2440 | 0 | cand->skip_mode_allowed = false; |
2441 | 0 | cand->block_mi.mode = NEWMV; |
2442 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
2443 | 0 | cand->drl_index = drl_index; |
2444 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
2445 | 0 | cand->block_mi.ref_frame[0] = to_inject_ref_type; |
2446 | 0 | cand->block_mi.ref_frame[1] = NONE_FRAME; |
2447 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
2448 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[to_inject_ref_type].num; |
2449 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2450 | | // Add the injected MV to the list of injected MVs |
2451 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
2452 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
2453 | 0 | ++ctx->injected_mv_count; |
2454 | 0 | } |
2455 | 0 | } |
2456 | 0 | } else if (allow_bipred && inter_direction == 2 && |
2457 | 0 | !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) { |
2458 | | /************** |
2459 | | NEW_NEWMV |
2460 | | ************* */ |
2461 | 0 | Mv to_inj_mv0 = ctx->sb_me_mv[me_block_results_ptr->ref0_list][list0_ref_index]; |
2462 | 0 | Mv to_inj_mv1 = ctx->sb_me_mv[me_block_results_ptr->ref1_list][list1_ref_index]; |
2463 | 0 | MvReferenceFrame rf[2] = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index), |
2464 | 0 | svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)}; |
2465 | 0 | uint8_t to_inject_ref_type = av1_ref_frame_type(rf); |
2466 | 0 | if ((ctx->injected_mv_count == 0 || |
2467 | 0 | mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) { |
2468 | 0 | uint8_t drl_index = 0; |
2469 | 0 | svt_aom_choose_best_av1_mv_pred( |
2470 | 0 | ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv); |
2471 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) { |
2472 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2473 | 0 | cand->block_mi.use_intrabc = 0; |
2474 | 0 | cand->block_mi.is_interintra_used = 0; |
2475 | 0 | cand->skip_mode_allowed = false; |
2476 | 0 | cand->drl_index = drl_index; |
2477 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
2478 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
2479 | 0 | cand->block_mi.mode = NEW_NEWMV; |
2480 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
2481 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
2482 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
2483 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
2484 | 0 | cand->pred_mv[1].as_int = best_pred_mv[1].as_int; |
2485 | 0 | cand->block_mi.comp_group_idx = 0; |
2486 | 0 | cand->block_mi.compound_idx = 1; |
2487 | 0 | cand->block_mi.interinter_comp.type = COMPOUND_AVERAGE; |
2488 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2489 | | |
2490 | | // Add the injected MV to the list of injected MVs |
2491 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
2492 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
2493 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
2494 | 0 | ++ctx->injected_mv_count; |
2495 | 0 | } |
2496 | 0 | } |
2497 | 0 | } |
2498 | 0 | } |
2499 | | // update the total number of candidates injected |
2500 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
2501 | 0 | } |
2502 | | |
2503 | | static void inject_new_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt, |
2504 | 0 | const bool allow_bipred) { |
2505 | 0 | const uint32_t me_sb_addr = ctx->me_sb_addr; |
2506 | 0 | const uint32_t me_block_offset = ctx->me_block_offset; |
2507 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
2508 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
2509 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
2510 | 0 | const MeSbResults* me_results = pcs->ppcs->pa_me_data->me_results[me_sb_addr]; |
2511 | 0 | const uint8_t total_me_cnt = me_results->total_me_candidate_index[me_block_offset]; |
2512 | 0 | const MeCandidate* me_block_results = &me_results->me_candidate_array[ctx->me_cand_offset]; |
2513 | |
|
2514 | 0 | for (uint8_t me_candidate_index = 0; me_candidate_index < total_me_cnt; ++me_candidate_index) { |
2515 | 0 | const MeCandidate* me_block_results_ptr = &me_block_results[me_candidate_index]; |
2516 | 0 | const uint8_t inter_direction = me_block_results_ptr->direction; |
2517 | 0 | const uint8_t list0_ref_index = me_block_results_ptr->ref_idx_l0; |
2518 | 0 | const uint8_t list1_ref_index = me_block_results_ptr->ref_idx_l1; |
2519 | |
|
2520 | 0 | if (ctx->cand_reduction_ctrls.reduce_unipred_candidates) { |
2521 | 0 | if ((total_me_cnt > 3) && (inter_direction != 2)) { |
2522 | 0 | continue; |
2523 | 0 | } |
2524 | 0 | } |
2525 | | |
2526 | | /************** |
2527 | | NEWMV unipred |
2528 | | ************* */ |
2529 | 0 | if (inter_direction < BI_PRED) { |
2530 | 0 | const uint8_t list_idx = inter_direction; |
2531 | 0 | const uint8_t ref_idx = list_idx == REF_LIST_0 ? list0_ref_index : list1_ref_index; |
2532 | 0 | if (!svt_aom_is_valid_unipred_ref(ctx, MIN(TOT_INTER_GROUP - 1, PA_ME_GROUP), list_idx, ref_idx)) { |
2533 | 0 | continue; |
2534 | 0 | } |
2535 | 0 | Mv to_inj_mv = ctx->sb_me_mv[list_idx][ref_idx]; |
2536 | 0 | uint8_t to_inject_ref_type = svt_get_ref_frame_type(list_idx, ref_idx); |
2537 | 0 | if ((ctx->injected_mv_count == 0 || |
2538 | 0 | mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, to_inject_ref_type) == false)) { |
2539 | 0 | uint8_t drl_index = 0; |
2540 | 0 | svt_aom_choose_best_av1_mv_pred( |
2541 | 0 | ctx, to_inject_ref_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv); |
2542 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) { |
2543 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2544 | 0 | cand->block_mi.use_intrabc = 0; |
2545 | 0 | cand->skip_mode_allowed = false; |
2546 | 0 | cand->block_mi.mode = NEWMV; |
2547 | 0 | cand->drl_index = drl_index; |
2548 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
2549 | 0 | cand->block_mi.ref_frame[0] = to_inject_ref_type; |
2550 | 0 | cand->block_mi.ref_frame[1] = NONE_FRAME; |
2551 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
2552 | 0 | cand->block_mi.is_interintra_used = 0; |
2553 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
2554 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[to_inject_ref_type].num; |
2555 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2556 | |
|
2557 | 0 | const bool enable_ii = true; |
2558 | 0 | const bool enable_obmc = true; |
2559 | 0 | const bool enable_warp = true; |
2560 | 0 | inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc); |
2561 | |
|
2562 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
2563 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
2564 | 0 | ++ctx->injected_mv_count; |
2565 | 0 | } |
2566 | 0 | } |
2567 | 0 | } else if (allow_bipred && |
2568 | 0 | !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) { |
2569 | 0 | assert(inter_direction == BI_PRED); |
2570 | | /************** |
2571 | | NEW_NEWMV |
2572 | | ************* */ |
2573 | 0 | if (!is_valid_bipred_ref(ctx, |
2574 | 0 | PA_ME_GROUP, |
2575 | 0 | me_block_results_ptr->ref0_list, |
2576 | 0 | list0_ref_index, |
2577 | 0 | me_block_results_ptr->ref1_list, |
2578 | 0 | list1_ref_index)) { |
2579 | 0 | continue; |
2580 | 0 | } |
2581 | 0 | Mv to_inj_mv0 = ctx->sb_me_mv[me_block_results_ptr->ref0_list][list0_ref_index]; |
2582 | 0 | Mv to_inj_mv1 = ctx->sb_me_mv[me_block_results_ptr->ref1_list][list1_ref_index]; |
2583 | 0 | uint8_t to_inject_ref_type = av1_ref_frame_type( |
2584 | 0 | (const MvReferenceFrame[]){svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index), |
2585 | 0 | svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)}); |
2586 | 0 | if ((ctx->injected_mv_count == 0 || |
2587 | 0 | mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) { |
2588 | 0 | uint8_t drl_index = 0; |
2589 | 0 | svt_aom_choose_best_av1_mv_pred( |
2590 | 0 | ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv); |
2591 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) { |
2592 | 0 | MvReferenceFrame rf[2] = {svt_get_ref_frame_type(me_block_results_ptr->ref0_list, list0_ref_index), |
2593 | 0 | svt_get_ref_frame_type(me_block_results_ptr->ref1_list, list1_ref_index)}; |
2594 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2595 | 0 | cand->block_mi.use_intrabc = 0; |
2596 | 0 | cand->skip_mode_allowed = false; |
2597 | 0 | cand->drl_index = drl_index; |
2598 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
2599 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
2600 | 0 | cand->block_mi.mode = NEW_NEWMV; |
2601 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
2602 | 0 | cand->block_mi.is_interintra_used = 0; |
2603 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
2604 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
2605 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
2606 | 0 | cand->pred_mv[1].as_int = best_pred_mv[1].as_int; |
2607 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
2608 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2609 | |
|
2610 | 0 | if (ctx->inter_comp_ctrls.do_me) { |
2611 | 0 | ctx->cmp_store.pred0_cnt = 0; |
2612 | 0 | ctx->cmp_store.pred1_cnt = 0; |
2613 | 0 | inj_comp_modes(pcs, ctx, &cand_total_cnt); |
2614 | 0 | } |
2615 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
2616 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
2617 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
2618 | 0 | ++ctx->injected_mv_count; |
2619 | 0 | } |
2620 | 0 | } |
2621 | 0 | } |
2622 | 0 | } |
2623 | | // update the total number of candidates injected |
2624 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
2625 | 0 | } |
2626 | | |
2627 | | static void inject_global_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt, |
2628 | 0 | const bool allow_bipred) { |
2629 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
2630 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
2631 | 0 | uint32_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2; |
2632 | 0 | uint32_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2; |
2633 | |
|
2634 | 0 | for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) { |
2635 | 0 | MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it]; |
2636 | 0 | MvReferenceFrame rf[2]; |
2637 | 0 | av1_set_ref_frame(rf, ref_pair); |
2638 | | |
2639 | | //single ref/list |
2640 | 0 | if (rf[1] == NONE_FRAME) { |
2641 | 0 | MvReferenceFrame frame_type = rf[0]; |
2642 | 0 | uint8_t list_idx = get_list_idx(rf[0]); |
2643 | 0 | uint8_t ref_idx = get_ref_frame_idx(rf[0]); |
2644 | |
|
2645 | 0 | if (!svt_aom_is_valid_unipred_ref(ctx, GLOBAL_GROUP, list_idx, ref_idx)) { |
2646 | 0 | continue; |
2647 | 0 | } |
2648 | | // Get gm params |
2649 | 0 | WarpedMotionParams* gm_params = &pcs->ppcs->global_motion[frame_type]; |
2650 | 0 | if (pcs->ppcs->gm_ctrls.skip_identity && gm_params->wmtype == IDENTITY) { |
2651 | 0 | continue; |
2652 | 0 | } |
2653 | 0 | Mv to_inj_mv = svt_aom_gm_get_motion_vector_enc(gm_params, |
2654 | 0 | pcs->ppcs->frm_hdr.allow_high_precision_mv, |
2655 | 0 | ctx->blk_geom->bsize, |
2656 | 0 | mi_col, |
2657 | 0 | mi_row, |
2658 | 0 | 0 /* force_integer_mv */); |
2659 | |
|
2660 | 0 | assert(list_idx == 0 || list_idx == 1); |
2661 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2662 | 0 | cand->block_mi.mode = GLOBALMV; |
2663 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
2664 | 0 | cand->block_mi.is_interintra_used = 0; |
2665 | 0 | cand->wm_params_l0 = *gm_params; |
2666 | 0 | cand->wm_params_l1 = *gm_params; |
2667 | 0 | cand->block_mi.use_intrabc = 0; |
2668 | 0 | cand->skip_mode_allowed = false; |
2669 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
2670 | 0 | cand->drl_index = 0; |
2671 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
2672 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
2673 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[frame_type].num; |
2674 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2675 | |
|
2676 | 0 | const bool enable_ii = true; |
2677 | 0 | const bool enable_obmc = false; |
2678 | 0 | const bool enable_warp = false; |
2679 | 0 | inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc); |
2680 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
2681 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = frame_type; |
2682 | 0 | ++ctx->injected_mv_count; |
2683 | 0 | } else if (allow_bipred) { |
2684 | 0 | uint8_t ref_idx_0 = get_ref_frame_idx(rf[0]); |
2685 | 0 | uint8_t ref_idx_1 = get_ref_frame_idx(rf[1]); |
2686 | 0 | uint8_t list_idx_0 = get_list_idx(rf[0]); |
2687 | 0 | uint8_t list_idx_1 = get_list_idx(rf[1]); |
2688 | |
|
2689 | 0 | if (!is_valid_bipred_ref(ctx, GLOBAL_GROUP, list_idx_0, ref_idx_0, list_idx_1, ref_idx_1)) { |
2690 | 0 | return; |
2691 | 0 | } |
2692 | | // Get gm params |
2693 | 0 | WarpedMotionParams* gm_params_0 = &pcs->ppcs->global_motion[svt_get_ref_frame_type(list_idx_0, ref_idx_0)]; |
2694 | |
|
2695 | 0 | WarpedMotionParams* gm_params_1 = &pcs->ppcs->global_motion[svt_get_ref_frame_type(list_idx_1, ref_idx_1)]; |
2696 | |
|
2697 | 0 | if (pcs->ppcs->gm_ctrls.skip_identity && |
2698 | 0 | (gm_params_0->wmtype == IDENTITY || gm_params_1->wmtype == IDENTITY)) { |
2699 | 0 | continue; |
2700 | 0 | } |
2701 | 0 | Mv to_inj_mv0 = svt_aom_gm_get_motion_vector_enc(gm_params_0, |
2702 | 0 | pcs->ppcs->frm_hdr.allow_high_precision_mv, |
2703 | 0 | ctx->blk_geom->bsize, |
2704 | 0 | mi_col, |
2705 | 0 | mi_row, |
2706 | 0 | 0 /* force_integer_mv */); |
2707 | |
|
2708 | 0 | Mv to_inj_mv1 = svt_aom_gm_get_motion_vector_enc(gm_params_1, |
2709 | 0 | pcs->ppcs->frm_hdr.allow_high_precision_mv, |
2710 | 0 | ctx->blk_geom->bsize, |
2711 | 0 | mi_col, |
2712 | 0 | mi_row, |
2713 | 0 | 0 /* force_integer_mv */); |
2714 | 0 | uint8_t to_inject_ref_type = av1_ref_frame_type(rf); |
2715 | |
|
2716 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2717 | 0 | cand->block_mi.use_intrabc = 0; |
2718 | 0 | cand->skip_mode_allowed = false; |
2719 | 0 | cand->block_mi.mode = GLOBAL_GLOBALMV; |
2720 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
2721 | 0 | cand->wm_params_l0 = *gm_params_0; |
2722 | 0 | cand->wm_params_l1 = *gm_params_1; |
2723 | 0 | cand->block_mi.is_interintra_used = 0; |
2724 | 0 | cand->drl_index = 0; |
2725 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
2726 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
2727 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
2728 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
2729 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
2730 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2731 | |
|
2732 | 0 | if (ctx->inter_comp_ctrls.do_global) { |
2733 | 0 | ctx->cmp_store.pred0_cnt = 0; |
2734 | 0 | ctx->cmp_store.pred1_cnt = 0; |
2735 | 0 | inj_comp_modes(pcs, ctx, &cand_total_cnt); |
2736 | 0 | } |
2737 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
2738 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
2739 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
2740 | 0 | ++ctx->injected_mv_count; |
2741 | 0 | } |
2742 | 0 | } |
2743 | | // update the total number of candidates injected |
2744 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
2745 | 0 | } |
2746 | | |
2747 | | static void inject_pme_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt, |
2748 | 0 | const bool allow_bipred) { |
2749 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
2750 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
2751 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
2752 | 0 | for (uint32_t ref_it = 0; ref_it < ctx->tot_ref_frame_types; ++ref_it) { |
2753 | 0 | MvReferenceFrame ref_pair = ctx->ref_frame_type_arr[ref_it]; |
2754 | 0 | MvReferenceFrame rf[2]; |
2755 | 0 | av1_set_ref_frame(rf, ref_pair); |
2756 | | |
2757 | | //single ref/list |
2758 | 0 | if (rf[1] == NONE_FRAME) { |
2759 | 0 | MvReferenceFrame frame_type = rf[0]; |
2760 | 0 | uint8_t list_idx = get_list_idx(rf[0]); |
2761 | 0 | uint8_t ref_idx = get_ref_frame_idx(rf[0]); |
2762 | |
|
2763 | 0 | if (ctx->valid_pme_mv[list_idx][ref_idx]) { |
2764 | 0 | Mv to_inj_mv = ctx->best_pme_mv[list_idx][ref_idx]; |
2765 | 0 | if ((ctx->injected_mv_count == 0 || |
2766 | 0 | mv_is_already_injected(ctx, to_inj_mv, to_inj_mv, frame_type) == false)) { |
2767 | 0 | uint8_t drl_index = 0; |
2768 | 0 | svt_aom_choose_best_av1_mv_pred( |
2769 | 0 | ctx, frame_type, NEWMV, to_inj_mv, (Mv){{0}}, &drl_index, best_pred_mv); |
2770 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv, to_inj_mv, 0)) { |
2771 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2772 | 0 | cand->block_mi.use_intrabc = 0; |
2773 | 0 | cand->skip_mode_allowed = false; |
2774 | 0 | cand->block_mi.mode = NEWMV; |
2775 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
2776 | 0 | cand->block_mi.is_interintra_used = 0; |
2777 | 0 | cand->drl_index = drl_index; |
2778 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv.as_int; |
2779 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
2780 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
2781 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
2782 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[frame_type].num; |
2783 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2784 | |
|
2785 | 0 | const bool enable_ii = true; |
2786 | 0 | const bool enable_obmc = true; |
2787 | 0 | const bool enable_warp = true; |
2788 | 0 | inj_non_simple_modes(pcs, ctx, &cand_total_cnt, enable_ii, enable_warp, enable_obmc); |
2789 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv.as_int; |
2790 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = frame_type; |
2791 | 0 | ++ctx->injected_mv_count; |
2792 | 0 | } |
2793 | 0 | } |
2794 | 0 | } |
2795 | 0 | } else if (allow_bipred) { |
2796 | 0 | uint8_t ref_idx_0 = get_ref_frame_idx(rf[0]); |
2797 | 0 | uint8_t ref_idx_1 = get_ref_frame_idx(rf[1]); |
2798 | 0 | uint8_t list_idx_0 = get_list_idx(rf[0]); |
2799 | 0 | uint8_t list_idx_1 = get_list_idx(rf[1]); |
2800 | |
|
2801 | 0 | if (ctx->valid_pme_mv[list_idx_0][ref_idx_0] && ctx->valid_pme_mv[list_idx_1][ref_idx_1]) { |
2802 | 0 | Mv to_inj_mv0 = ctx->best_pme_mv[list_idx_0][ref_idx_0]; |
2803 | 0 | Mv to_inj_mv1 = ctx->best_pme_mv[list_idx_1][ref_idx_1]; |
2804 | 0 | const uint8_t to_inject_ref_type = av1_ref_frame_type((const MvReferenceFrame[]){ |
2805 | 0 | svt_get_ref_frame_type(list_idx_0, ref_idx_0), |
2806 | 0 | svt_get_ref_frame_type(list_idx_1, ref_idx_1), |
2807 | 0 | }); |
2808 | 0 | if ((ctx->injected_mv_count == 0 || |
2809 | 0 | mv_is_already_injected(ctx, to_inj_mv0, to_inj_mv1, to_inject_ref_type) == false)) { |
2810 | 0 | uint8_t drl_index = 0; |
2811 | 0 | svt_aom_choose_best_av1_mv_pred( |
2812 | 0 | ctx, to_inject_ref_type, NEW_NEWMV, to_inj_mv0, to_inj_mv1, &drl_index, best_pred_mv); |
2813 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, to_inj_mv0, to_inj_mv1, 1)) { |
2814 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
2815 | 0 | cand->block_mi.use_intrabc = 0; |
2816 | 0 | cand->skip_mode_allowed = false; |
2817 | 0 | cand->drl_index = drl_index; |
2818 | 0 | cand->block_mi.mv[0].as_int = to_inj_mv0.as_int; |
2819 | 0 | cand->block_mi.mv[1].as_int = to_inj_mv1.as_int; |
2820 | 0 | cand->block_mi.mode = NEW_NEWMV; |
2821 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
2822 | 0 | cand->block_mi.is_interintra_used = 0; |
2823 | 0 | cand->block_mi.ref_frame[0] = rf[0]; |
2824 | 0 | cand->block_mi.ref_frame[1] = rf[1]; |
2825 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
2826 | 0 | cand->pred_mv[1].as_int = best_pred_mv[1].as_int; |
2827 | 0 | determine_compound_mode(pcs, ctx, cand, MD_COMP_AVG); |
2828 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
2829 | |
|
2830 | 0 | if (ctx->inter_comp_ctrls.do_pme) { |
2831 | 0 | ctx->cmp_store.pred0_cnt = 0; |
2832 | 0 | ctx->cmp_store.pred1_cnt = 0; |
2833 | 0 | inj_comp_modes(pcs, ctx, &cand_total_cnt); |
2834 | 0 | } |
2835 | 0 | ctx->injected_mvs[ctx->injected_mv_count][0].as_int = to_inj_mv0.as_int; |
2836 | 0 | ctx->injected_mvs[ctx->injected_mv_count][1].as_int = to_inj_mv1.as_int; |
2837 | 0 | ctx->injected_ref_types[ctx->injected_mv_count] = to_inject_ref_type; |
2838 | 0 | ++ctx->injected_mv_count; |
2839 | 0 | } |
2840 | 0 | } |
2841 | 0 | } |
2842 | 0 | } |
2843 | 0 | } |
2844 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
2845 | 0 | } |
2846 | | |
2847 | | static void inject_inter_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx, |
2848 | 0 | uint32_t* candidate_total_cnt) { |
2849 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
2850 | | // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it. |
2851 | | // See AV1 spec 5.11.25 |
2852 | 0 | const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 || |
2853 | 0 | ctx->blk_geom->bheight == 4) |
2854 | 0 | ? false |
2855 | 0 | : true; |
2856 | |
|
2857 | 0 | inject_new_candidates_light_pd0(pcs, ctx, candidate_total_cnt, allow_bipred); |
2858 | 0 | } |
2859 | | |
2860 | | static void inject_inter_candidates_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx, |
2861 | 0 | uint32_t* cand_total_cnt) { |
2862 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
2863 | | // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it. |
2864 | | // See AV1 spec 5.11.25 |
2865 | 0 | const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 || |
2866 | 0 | ctx->blk_geom->bheight == 4) |
2867 | 0 | ? false |
2868 | 0 | : true; |
2869 | | // Needed in case WM/OBMC is on at the frame level (even though not used in light-PD1 path) |
2870 | 0 | if (frm_hdr->is_motion_mode_switchable) { |
2871 | 0 | const uint16_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2; |
2872 | 0 | const uint16_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2; |
2873 | 0 | svt_av1_count_overlappable_neighbors(pcs, ctx->blk_ptr, ctx->blk_geom->bsize, mi_row, mi_col); |
2874 | 0 | } else { |
2875 | | // Overlappable neighbours only needed for non-"SIMPLE_TRANSLATION" candidates |
2876 | 0 | ctx->blk_ptr->overlappable_neighbors = 0; |
2877 | 0 | } |
2878 | 0 | svt_aom_init_wm_samples(pcs, ctx); |
2879 | | // Inject MVP candidates |
2880 | 0 | if (ctx->new_nearest_injection && |
2881 | 0 | !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) { |
2882 | 0 | inject_mvp_candidates_ii_light_pd1(pcs, ctx, cand_total_cnt, allow_bipred); |
2883 | 0 | } |
2884 | | |
2885 | | // Inject ME candidates |
2886 | 0 | if (ctx->inject_new_me) { |
2887 | 0 | inject_new_candidates_light_pd1(pcs, ctx, cand_total_cnt, allow_bipred); |
2888 | 0 | } |
2889 | 0 | } |
2890 | | |
2891 | | static void svt_aom_inject_inter_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, |
2892 | 0 | uint32_t* cand_total_cnt) { |
2893 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
2894 | | // Bipred prediction is only allowed when both dimensions are > 4 and the frame-header reference mode allows it. |
2895 | | // See AV1 spec 5.11.25 |
2896 | 0 | const bool allow_bipred = (frm_hdr->reference_mode == SINGLE_REFERENCE || ctx->blk_geom->bwidth == 4 || |
2897 | 0 | ctx->blk_geom->bheight == 4) |
2898 | 0 | ? false |
2899 | 0 | : true; |
2900 | |
|
2901 | 0 | const uint32_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2; |
2902 | 0 | const uint32_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2; |
2903 | |
|
2904 | 0 | svt_av1_count_overlappable_neighbors(pcs, ctx->blk_ptr, ctx->blk_geom->bsize, mi_row, mi_col); |
2905 | 0 | svt_aom_init_wm_samples(pcs, ctx); |
2906 | 0 | #if CONFIG_ENABLE_OBMC |
2907 | 0 | if (ctx->obmc_ctrls.enabled && ctx->obmc_ctrls.refine_level == 0) { |
2908 | 0 | const uint8_t is_obmc_allowed = svt_aom_obmc_motion_mode_allowed( |
2909 | 0 | pcs, ctx, ctx->blk_geom->bsize, 1, LAST_FRAME, -1, NEWMV) == OBMC_CAUSAL; |
2910 | 0 | if (is_obmc_allowed) { |
2911 | 0 | svt_aom_precompute_obmc_data(pcs, ctx, PICTURE_BUFFER_DESC_LUMA_MASK); |
2912 | 0 | } |
2913 | 0 | } |
2914 | 0 | #endif |
2915 | | /************** |
2916 | | MVP |
2917 | | ************* */ |
2918 | 0 | if (ctx->new_nearest_injection && |
2919 | 0 | !(ctx->is_intra_bordered && ctx->cand_reduction_ctrls.use_neighbouring_mode_ctrls.enabled)) { |
2920 | 0 | inject_mvp_candidates_ii(pcs, ctx, cand_total_cnt, allow_bipred); |
2921 | 0 | } |
2922 | | //---------------------- |
2923 | | // NEAREST_NEWMV, NEW_NEARESTMV, NEAR_NEWMV, NEW_NEARMV. |
2924 | | //---------------------- |
2925 | 0 | if (ctx->new_nearest_near_comb_injection && allow_bipred) { |
2926 | 0 | inject_new_nearest_new_comb_candidates(pcs, ctx, cand_total_cnt); |
2927 | 0 | } |
2928 | 0 | if (ctx->inject_new_me) { |
2929 | 0 | inject_new_candidates(pcs, ctx, cand_total_cnt, allow_bipred); |
2930 | 0 | } |
2931 | 0 | if (ctx->global_mv_injection) { |
2932 | 0 | inject_global_candidates(pcs, ctx, cand_total_cnt, allow_bipred); |
2933 | 0 | } |
2934 | 0 | if (ctx->bipred3x3_ctrls.enabled && allow_bipred) { |
2935 | 0 | bipred_3x3_candidates_injection(pcs, ctx, cand_total_cnt); |
2936 | 0 | } |
2937 | |
|
2938 | 0 | if (ctx->unipred3x3_injection) { |
2939 | 0 | unipred_3x3_candidates_injection(pcs, ctx, cand_total_cnt); |
2940 | 0 | } |
2941 | | |
2942 | | // determine when to inject pme candidates based on size and resolution of block |
2943 | 0 | if (ctx->inject_new_pme && ctx->updated_enable_pme) { |
2944 | 0 | inject_pme_candidates(pcs, ctx, cand_total_cnt, allow_bipred); |
2945 | 0 | } |
2946 | 0 | } |
2947 | | |
2948 | | static const TxType g_intra_mode_to_tx_type[INTRA_MODES] = { |
2949 | | DCT_DCT, // DC |
2950 | | ADST_DCT, // V |
2951 | | DCT_ADST, // H |
2952 | | DCT_DCT, // D45 |
2953 | | ADST_ADST, // D135 |
2954 | | ADST_DCT, // D117 |
2955 | | DCT_ADST, // D153 |
2956 | | DCT_ADST, // D207 |
2957 | | ADST_DCT, // D63 |
2958 | | ADST_ADST, // SMOOTH |
2959 | | ADST_DCT, // SMOOTH_V |
2960 | | DCT_ADST, // SMOOTH_H |
2961 | | ADST_ADST, // PAETH |
2962 | | }; |
2963 | | |
2964 | | static INLINE TxType intra_mode_to_tx_type(PredictionMode pred_mode, UvPredictionMode pred_mode_uv, |
2965 | 246k | PlaneType plane_type) { |
2966 | 246k | const PredictionMode mode = (plane_type == PLANE_TYPE_Y) ? pred_mode : get_uv_mode(pred_mode_uv); |
2967 | 246k | assert(mode < INTRA_MODES); |
2968 | 246k | return g_intra_mode_to_tx_type[mode]; |
2969 | 246k | } |
2970 | | |
2971 | | /* For intra prediction, the chroma transform type may not follow the luma type. |
2972 | | This function will return the intra chroma TX type to be used, which is based on TX size and chroma mode. |
2973 | | Refer to section 5.11.40 of the AV1 spec (compute_tx_type). */ |
2974 | 246k | TxType svt_aom_get_intra_uv_tx_type(UvPredictionMode pred_mode_uv, TxSize tx_size, int32_t reduced_tx_set) { |
2975 | 246k | if (txsize_sqr_up_map[tx_size] > TX_32X32) { |
2976 | 0 | return DCT_DCT; |
2977 | 0 | } |
2978 | | |
2979 | | // In intra mode, uv planes don't share the same prediction mode as y |
2980 | | // plane, so the tx_type should not be shared. Pass DC_PRED as luma mode because the argument |
2981 | | // will not be used. |
2982 | 246k | TxType tx_type = intra_mode_to_tx_type(DC_PRED, pred_mode_uv, PLANE_TYPE_UV); |
2983 | 246k | assert(tx_type < TX_TYPES); |
2984 | 246k | const TxSetType tx_set_type = get_ext_tx_set_type(tx_size, /*is_inter*/ 0, reduced_tx_set); |
2985 | 246k | return !av1_ext_tx_used[tx_set_type][tx_type] ? DCT_DCT : tx_type; |
2986 | 246k | } |
2987 | | |
2988 | | // Values are now correlated to quantizer. |
2989 | 0 | static INLINE int mv_check_bounds(const MvLimits* mv_limits, const Mv* mv) { |
2990 | 0 | return (mv->y >> 3) < mv_limits->row_min || (mv->y >> 3) > mv_limits->row_max || |
2991 | 0 | (mv->x >> 3) < mv_limits->col_min || (mv->x >> 3) > mv_limits->col_max; |
2992 | 0 | } |
2993 | | |
2994 | 0 | static void assert_release(int statement) { |
2995 | 0 | if (statement == 0) { |
2996 | 0 | SVT_LOG("ASSERT_ERRRR\n"); |
2997 | 0 | } |
2998 | 0 | } |
2999 | | |
3000 | | static void intra_bc_search(PictureControlSet* pcs, ModeDecisionContext* ctx, const SequenceControlSet* scs, |
3001 | 0 | BlkStruct* blk_ptr, Mv* dv_cand, uint8_t* num_dv_cand) { |
3002 | 0 | IntraBcContext x_st; |
3003 | 0 | IntraBcContext* x = &x_st; |
3004 | 0 | uint32_t full_lambda = ctx->hbd_md ? ctx->full_lambda_md[EB_10_BIT_MD] : ctx->full_lambda_md[EB_8_BIT_MD]; |
3005 | |
|
3006 | 0 | svt_memcpy(&x->crc_calculator, &pcs->crc_calculator, sizeof(pcs->crc_calculator)); |
3007 | 0 | x->approx_inter_rate = ctx->approx_inter_rate; |
3008 | 0 | x->xd = blk_ptr->av1xd; |
3009 | 0 | x->nmv_vec_cost = ctx->md_rate_est_ctx->nmv_vec_cost; |
3010 | 0 | x->mv_cost_stack = ctx->md_rate_est_ctx->nmvcoststack; |
3011 | 0 | BlockSize bsize = ctx->blk_geom->bsize; |
3012 | 0 | assert(bsize < BLOCK_SIZES_ALL); |
3013 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
3014 | 0 | const Av1Common* const cm = pcs->ppcs->av1_cm; |
3015 | 0 | MvReferenceFrame ref_frame = INTRA_FRAME; |
3016 | 0 | const int num_planes = 3; |
3017 | 0 | MacroBlockD* xd = blk_ptr->av1xd; |
3018 | 0 | const TileInfo* tile = &xd->tile; |
3019 | 0 | const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE); |
3020 | 0 | const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE); |
3021 | 0 | const int w = block_size_wide[bsize]; |
3022 | 0 | const int h = block_size_high[bsize]; |
3023 | 0 | const int sb_row = mi_row >> scs->seq_header.sb_size_log2; |
3024 | 0 | const int sb_col = mi_col >> scs->seq_header.sb_size_log2; |
3025 | | |
3026 | | // Set up limit values for MV components. |
3027 | | // Mv beyond the range do not produce new/different prediction block. |
3028 | 0 | const int mi_width = mi_size_wide[bsize]; |
3029 | 0 | const int mi_height = mi_size_high[bsize]; |
3030 | 0 | x->mv_limits.row_min = -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND); |
3031 | 0 | x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND); |
3032 | 0 | x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND; |
3033 | 0 | x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND; |
3034 | | //set search paramters |
3035 | 0 | x->sadperbit16 = svt_aom_get_sad_per_bit(frm_hdr->quantization_params.base_q_idx, 0); |
3036 | 0 | x->errorperbit = full_lambda >> RD_EPB_SHIFT; |
3037 | 0 | x->errorperbit += (x->errorperbit == 0); |
3038 | | //temp buffer for hash me |
3039 | 0 | for (int i = 0; i < 2; i++) { |
3040 | 0 | EB_MALLOC_ARRAY_NO_CHECK(x->hash_value_buffer[i], AOM_BUFFER_SIZE_FOR_BLOCK_HASH); |
3041 | 0 | } |
3042 | |
|
3043 | 0 | Mv nearestmv, nearmv; |
3044 | 0 | svt_av1_find_best_ref_mvs_from_stack(0, ctx->ref_mv_stack /*mbmi_ext*/, xd, ref_frame, &nearestmv, &nearmv, 0); |
3045 | 0 | if (nearestmv.as_int == INVALID_MV) { |
3046 | 0 | nearestmv.as_int = 0; |
3047 | 0 | } |
3048 | 0 | if (nearmv.as_int == INVALID_MV) { |
3049 | 0 | nearmv.as_int = 0; |
3050 | 0 | } |
3051 | 0 | Mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv; |
3052 | 0 | if (dv_ref.as_int == 0) { |
3053 | 0 | svt_aom_find_ref_dv(&dv_ref, tile, scs->seq_header.sb_mi_size, mi_row, mi_col); |
3054 | 0 | } |
3055 | | // Ref DV should not have sub-pel. |
3056 | 0 | assert((dv_ref.x & 7) == 0); |
3057 | 0 | assert((dv_ref.y & 7) == 0); |
3058 | 0 | ctx->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref; |
3059 | | |
3060 | | /* pointer to current frame */ |
3061 | 0 | Yv12BufferConfig cur_buf; |
3062 | 0 | svt_aom_link_eb_to_aom_buffer_desc_8bit(pcs->ppcs->enhanced_pic, &cur_buf); |
3063 | 0 | struct Buf2D yv12_mb[MAX_PLANES]; |
3064 | 0 | svt_av1_setup_pred_block(bsize, yv12_mb, &cur_buf, mi_row, mi_col); |
3065 | 0 | for (int i = 0; i < num_planes; ++i) { |
3066 | 0 | x->xdplane[i].pre[0] = yv12_mb[i]; // ref in ME |
3067 | 0 | } |
3068 | | // setup src for DV search same as ref |
3069 | 0 | x->plane[0].src = x->xdplane[0].pre[0]; |
3070 | |
|
3071 | 0 | enum IntrabcMotionDirection max_dir = pcs->ppcs->intrabc_ctrls.search_dir ? IBC_MOTION_LEFT : IBC_MOTION_DIRECTIONS; |
3072 | |
|
3073 | 0 | for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; dir < max_dir; ++dir) { |
3074 | 0 | const MvLimits tmp_mv_limits = x->mv_limits; |
3075 | |
|
3076 | 0 | switch (dir) { |
3077 | 0 | case IBC_MOTION_ABOVE: |
3078 | 0 | x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; |
3079 | 0 | x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w; |
3080 | 0 | x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; |
3081 | 0 | x->mv_limits.row_max = (sb_row * scs->seq_header.sb_mi_size - mi_row) * MI_SIZE - h; |
3082 | 0 | break; |
3083 | 0 | case IBC_MOTION_LEFT: |
3084 | 0 | x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; |
3085 | 0 | x->mv_limits.col_max = (sb_col * scs->seq_header.sb_mi_size - mi_col) * MI_SIZE - w; |
3086 | | // TODO: Minimize the overlap between above and |
3087 | | // left areas. |
3088 | 0 | x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; |
3089 | 0 | int bottom_coded_mi_edge = AOMMIN((sb_row + 1) * scs->seq_header.sb_mi_size, tile->mi_row_end); |
3090 | 0 | x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h; |
3091 | 0 | break; |
3092 | 0 | default: |
3093 | 0 | assert(0); |
3094 | 0 | } |
3095 | 0 | assert_release(x->mv_limits.col_min >= tmp_mv_limits.col_min); |
3096 | 0 | assert_release(x->mv_limits.col_max <= tmp_mv_limits.col_max); |
3097 | 0 | assert_release(x->mv_limits.row_min >= tmp_mv_limits.row_min); |
3098 | 0 | assert_release(x->mv_limits.row_max <= tmp_mv_limits.row_max); |
3099 | |
|
3100 | 0 | svt_av1_set_mv_search_range(&x->mv_limits, &dv_ref); |
3101 | |
|
3102 | 0 | if (x->mv_limits.col_max < x->mv_limits.col_min || x->mv_limits.row_max < x->mv_limits.row_min) { |
3103 | 0 | x->mv_limits = tmp_mv_limits; |
3104 | 0 | continue; |
3105 | 0 | } |
3106 | 0 | Mv mvp_full = dv_ref; |
3107 | 0 | mvp_full.x >>= 3; |
3108 | 0 | mvp_full.y >>= 3; |
3109 | 0 | x->best_mv.as_int = 0; |
3110 | | |
3111 | | // Hash Search |
3112 | 0 | const AomVarianceFnPtr* fn_ptr = &svt_aom_mefn_ptr[bsize]; |
3113 | |
|
3114 | 0 | int best_hash_cost = INT_MAX; |
3115 | 0 | Mv best_hash_mv = {{0, 0}}; |
3116 | |
|
3117 | 0 | svt_av1_intrabc_hash_search( |
3118 | 0 | pcs, x, bsize, mi_col * MI_SIZE, mi_row * MI_SIZE, &dv_ref, 1, fn_ptr, &best_hash_cost, &best_hash_mv); |
3119 | | |
3120 | | // Hash produced a candidate |
3121 | 0 | if (best_hash_cost < INT_MAX) { |
3122 | 0 | Mv dv; |
3123 | 0 | dv.x = best_hash_mv.x * 8; |
3124 | 0 | dv.y = best_hash_mv.y * 8; |
3125 | |
|
3126 | 0 | dv_cand[*num_dv_cand] = dv; |
3127 | 0 | (*num_dv_cand)++; |
3128 | |
|
3129 | 0 | x->best_mv = best_hash_mv; |
3130 | 0 | } |
3131 | | // Full-pixel fallback if hash didn't produce a candidate |
3132 | 0 | else { |
3133 | 0 | svt_av1_full_pixel_search(pcs, x, bsize, &mvp_full, 0, x->sadperbit16, NULL, &dv_ref); |
3134 | |
|
3135 | 0 | Mv dv = {{x->best_mv.x * 8, x->best_mv.y * 8}}; |
3136 | |
|
3137 | 0 | if (!mv_check_bounds(&x->mv_limits, &dv) && |
3138 | 0 | svt_aom_is_dv_valid(dv, xd, mi_row, mi_col, bsize, scs->seq_header.sb_size_log2)) { |
3139 | 0 | dv_cand[*num_dv_cand] = dv; |
3140 | 0 | (*num_dv_cand)++; |
3141 | 0 | } |
3142 | 0 | } |
3143 | |
|
3144 | 0 | x->mv_limits = tmp_mv_limits; |
3145 | 0 | } |
3146 | | |
3147 | 0 | for (int i = 0; i < 2; i++) { |
3148 | 0 | EB_FREE_ARRAY(x->hash_value_buffer[i]); |
3149 | 0 | } |
3150 | 0 | } |
3151 | | |
3152 | | static void inject_intra_bc_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, const SequenceControlSet* scs, |
3153 | 0 | BlkStruct* blk_ptr, uint32_t* cand_cnt) { |
3154 | 0 | Mv dv_cand[2]; |
3155 | 0 | uint8_t num_dv_cand = 0; |
3156 | | |
3157 | | //perform dv-pred + search up to 2 dv(s) |
3158 | 0 | intra_bc_search(pcs, ctx, scs, blk_ptr, dv_cand, &num_dv_cand); |
3159 | |
|
3160 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
3161 | |
|
3162 | 0 | for (uint32_t dv_i = 0; dv_i < num_dv_cand; dv_i++) { |
3163 | 0 | ModeDecisionCandidate* cand = &cand_array[*cand_cnt]; |
3164 | 0 | cand->palette_info = NULL; |
3165 | 0 | cand->block_mi.use_intrabc = 1; |
3166 | 0 | cand->block_mi.angle_delta[PLANE_TYPE_Y] = 0; |
3167 | 0 | cand->block_mi.angle_delta[PLANE_TYPE_UV] = 0; |
3168 | 0 | cand->block_mi.uv_mode = UV_DC_PRED; |
3169 | 0 | cand->block_mi.cfl_alpha_signs = 0; |
3170 | 0 | cand->block_mi.cfl_alpha_idx = 0; |
3171 | 0 | cand->transform_type[0] = DCT_DCT; |
3172 | 0 | cand->transform_type_uv = DCT_DCT; |
3173 | 0 | cand->block_mi.ref_frame[0] = INTRA_FRAME; |
3174 | 0 | cand->block_mi.ref_frame[1] = NONE_FRAME; |
3175 | 0 | cand->block_mi.mode = DC_PRED; |
3176 | 0 | cand->block_mi.filter_intra_mode = FILTER_INTRA_MODES; |
3177 | | //inter ralated |
3178 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3179 | 0 | cand->block_mi.is_interintra_used = 0; |
3180 | 0 | cand->skip_mode_allowed = false; |
3181 | 0 | cand->block_mi.mv[0].as_int = dv_cand[dv_i].as_int; |
3182 | 0 | cand->pred_mv[0].as_int = ctx->ref_mv_stack[INTRA_FRAME][0].this_mv.as_int; |
3183 | 0 | cand->drl_index = 0; |
3184 | 0 | cand->block_mi.interp_filters = av1_broadcast_interp_filter(BILINEAR); |
3185 | 0 | INC_MD_CAND_CNT((*cand_cnt), pcs->ppcs->max_can_count); |
3186 | 0 | } |
3187 | 0 | } |
3188 | | |
3189 | | static void inject_intra_candidates_light_pd0(PictureControlSet* pcs, ModeDecisionContext* ctx, |
3190 | 7.53k | uint32_t* candidate_total_cnt) { |
3191 | 7.53k | uint32_t cand_total_cnt = 0; |
3192 | 7.53k | ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_total_cnt]; |
3193 | 7.53k | cand->skip_mode_allowed = false; |
3194 | 7.53k | cand->palette_info = NULL; |
3195 | 7.53k | cand->block_mi.use_intrabc = 0; |
3196 | 7.53k | cand->block_mi.filter_intra_mode = FILTER_INTRA_MODES; |
3197 | 7.53k | cand->block_mi.angle_delta[PLANE_TYPE_Y] = 0; |
3198 | 7.53k | cand->block_mi.uv_mode = UV_DC_PRED; |
3199 | 7.53k | cand->block_mi.angle_delta[PLANE_TYPE_UV] = 0; |
3200 | 7.53k | cand->block_mi.cfl_alpha_signs = 0; |
3201 | 7.53k | cand->block_mi.cfl_alpha_idx = 0; |
3202 | 7.53k | cand->transform_type[0] = DCT_DCT; |
3203 | 7.53k | cand->transform_type_uv = DCT_DCT; |
3204 | 7.53k | cand->block_mi.ref_frame[0] = INTRA_FRAME; |
3205 | 7.53k | cand->block_mi.ref_frame[1] = NONE_FRAME; |
3206 | 7.53k | cand->block_mi.mode = DC_PRED; |
3207 | 7.53k | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3208 | 7.53k | cand->block_mi.is_interintra_used = 0; |
3209 | 7.53k | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
3210 | | // update the total number of candidates injected |
3211 | 7.53k | (*candidate_total_cnt) = cand_total_cnt; |
3212 | 7.53k | return; |
3213 | 7.53k | } |
3214 | | |
3215 | | static void inject_intra_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, const bool dc_cand_only_flag, |
3216 | 246k | uint32_t* candidate_total_cnt) { |
3217 | 246k | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
3218 | 246k | PredictionMode intra_mode_start = DC_PRED; |
3219 | 246k | PredictionMode intra_mode_end = dc_cand_only_flag ? DC_PRED : ctx->intra_ctrls.intra_mode_end; |
3220 | 246k | uint32_t cand_total_cnt = *candidate_total_cnt; |
3221 | 246k | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
3222 | 246k | const bool use_angle_delta = ctx->intra_ctrls.angular_pred_level ? av1_use_angle_delta(ctx->blk_geom->bsize) : 0; |
3223 | 246k | const uint8_t disable_angle_prediction = (ctx->intra_ctrls.angular_pred_level == 0); |
3224 | 246k | uint8_t directional_mode_skip_mask[INTRA_MODES] = {0}; |
3225 | 246k | if (ctx->intra_ctrls.angular_pred_level >= 4) { |
3226 | 1.38M | for (uint8_t i = D45_PRED; i < INTRA_MODE_END; i++) { |
3227 | 1.26M | directional_mode_skip_mask[i] = 1; |
3228 | 1.26M | } |
3229 | 126k | } |
3230 | 246k | const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1); |
3231 | | |
3232 | 492k | for (PredictionMode intra_mode = intra_mode_start; intra_mode <= intra_mode_end; ++intra_mode) { |
3233 | 245k | if (av1_is_directional_mode(intra_mode) && |
3234 | 0 | (disable_angle_prediction || directional_mode_skip_mask[intra_mode])) { |
3235 | 0 | continue; |
3236 | 0 | } |
3237 | | |
3238 | 245k | const uint8_t angle_delta_count = av1_is_directional_mode(intra_mode) && |
3239 | 0 | ctx->intra_ctrls.angular_pred_level <= 2 && use_angle_delta |
3240 | 245k | ? 7 |
3241 | 245k | : 1; |
3242 | | |
3243 | 491k | for (uint8_t angle_delta_counter = 0; angle_delta_counter < angle_delta_count; ++angle_delta_counter) { |
3244 | 245k | int32_t angle_delta = CLIP((angle_delta_count == 1 ? 0 : angle_delta_counter - MAX_ANGLE_DELTA), |
3245 | 245k | -MAX_ANGLE_DELTA, |
3246 | 245k | MAX_ANGLE_DELTA); |
3247 | 245k | if ((ctx->intra_ctrls.angular_pred_level >= 2 && |
3248 | 126k | (angle_delta == -1 || angle_delta == 1 || angle_delta == -2 || angle_delta == 2)) || |
3249 | 245k | (ctx->intra_ctrls.angular_pred_level >= 3 && angle_delta != 0)) { |
3250 | 0 | continue; |
3251 | 0 | } |
3252 | 245k | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
3253 | 245k | cand->skip_mode_allowed = false; |
3254 | 245k | cand->palette_info = NULL; |
3255 | 245k | cand->block_mi.mode = intra_mode; |
3256 | 245k | cand->block_mi.use_intrabc = 0; |
3257 | 245k | cand->block_mi.filter_intra_mode = FILTER_INTRA_MODES; |
3258 | 245k | cand->block_mi.angle_delta[PLANE_TYPE_Y] = angle_delta; |
3259 | 245k | cand->block_mi.uv_mode = ctx->ind_uv_avail ? ctx->best_uv_mode[intra_mode] |
3260 | 245k | : intra_luma_to_chroma[intra_mode]; |
3261 | 245k | cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail ? ctx->best_uv_angle[intra_mode] |
3262 | 245k | : cand->block_mi.angle_delta[PLANE_TYPE_Y]; |
3263 | 245k | cand->block_mi.cfl_alpha_signs = 0; |
3264 | 245k | cand->block_mi.cfl_alpha_idx = 0; |
3265 | 245k | cand->transform_type[0] = DCT_DCT; |
3266 | 245k | cand->transform_type_uv = svt_aom_get_intra_uv_tx_type( |
3267 | 245k | cand->block_mi.uv_mode, tx_size_uv, frm_hdr->reduced_tx_set); |
3268 | | |
3269 | 245k | if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) { |
3270 | 0 | continue; |
3271 | 0 | } |
3272 | 245k | cand->block_mi.ref_frame[0] = INTRA_FRAME; |
3273 | 245k | cand->block_mi.ref_frame[1] = NONE_FRAME; |
3274 | 245k | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3275 | 245k | cand->block_mi.is_interintra_used = 0; |
3276 | 245k | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
3277 | 245k | } |
3278 | 245k | } |
3279 | | |
3280 | | // update the total number of candidates injected |
3281 | 246k | (*candidate_total_cnt) = cand_total_cnt; |
3282 | | |
3283 | 246k | return; |
3284 | 246k | } |
3285 | | |
3286 | | static void inject_filter_intra_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, |
3287 | 0 | uint32_t* candidate_total_cnt) { |
3288 | 0 | FilterIntraMode intra_mode_start = FILTER_DC_PRED; |
3289 | 0 | FilterIntraMode intra_mode_end = ctx->intra_ctrls.intra_mode_end == PAETH_PRED ? FILTER_PAETH_PRED |
3290 | 0 | : ctx->intra_ctrls.intra_mode_end >= D157_PRED ? FILTER_D157_PRED |
3291 | 0 | : ctx->intra_ctrls.intra_mode_end >= H_PRED ? FILTER_H_PRED |
3292 | 0 | : ctx->intra_ctrls.intra_mode_end >= V_PRED ? FILTER_V_PRED |
3293 | 0 | : FILTER_DC_PRED; |
3294 | 0 | intra_mode_end = MIN(intra_mode_end, ctx->filter_intra_ctrls.max_filter_intra_mode); |
3295 | |
|
3296 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1); |
3297 | 0 | uint32_t cand_total_cnt = *candidate_total_cnt; |
3298 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
3299 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
3300 | |
|
3301 | 0 | for (FilterIntraMode filter_intra_mode = intra_mode_start; filter_intra_mode <= intra_mode_end; |
3302 | 0 | filter_intra_mode++) { |
3303 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
3304 | 0 | cand->skip_mode_allowed = false; |
3305 | 0 | cand->block_mi.mode = DC_PRED; |
3306 | 0 | cand->block_mi.use_intrabc = 0; |
3307 | 0 | cand->block_mi.filter_intra_mode = filter_intra_mode; |
3308 | 0 | cand->palette_info = NULL; |
3309 | 0 | cand->block_mi.angle_delta[PLANE_TYPE_Y] = 0; |
3310 | |
|
3311 | 0 | cand->block_mi.uv_mode = ctx->ind_uv_avail ? ctx->best_uv_mode[fimode_to_intramode[filter_intra_mode]] |
3312 | 0 | : intra_luma_to_chroma[fimode_to_intramode[filter_intra_mode]]; |
3313 | 0 | cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail |
3314 | 0 | ? ctx->best_uv_angle[fimode_to_intramode[filter_intra_mode]] |
3315 | 0 | : cand->block_mi.angle_delta[PLANE_TYPE_Y]; |
3316 | |
|
3317 | 0 | cand->block_mi.cfl_alpha_signs = 0; |
3318 | 0 | cand->block_mi.cfl_alpha_idx = 0; |
3319 | 0 | cand->transform_type[0] = DCT_DCT; |
3320 | 0 | cand->transform_type_uv = svt_aom_get_intra_uv_tx_type( |
3321 | 0 | cand->block_mi.uv_mode, tx_size_uv, frm_hdr->reduced_tx_set); |
3322 | 0 | if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) { |
3323 | 0 | continue; |
3324 | 0 | } |
3325 | 0 | cand->block_mi.ref_frame[0] = INTRA_FRAME; |
3326 | 0 | cand->block_mi.ref_frame[1] = NONE_FRAME; |
3327 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3328 | 0 | cand->block_mi.is_interintra_used = 0; |
3329 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
3330 | 0 | } |
3331 | | |
3332 | | // update the total number of candidates injected |
3333 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
3334 | |
|
3335 | 0 | return; |
3336 | 0 | } |
3337 | | |
3338 | | static void inject_zz_backup_candidate(PictureControlSet* pcs, ModeDecisionContext* ctx, |
3339 | 0 | uint32_t* candidate_total_cnt) { |
3340 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
3341 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
3342 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
3343 | 0 | cand_array[cand_total_cnt].drl_index = 0; |
3344 | 0 | svt_aom_choose_best_av1_mv_pred(ctx, |
3345 | 0 | svt_get_ref_frame_type(REF_LIST_0, 0), |
3346 | 0 | NEWMV, |
3347 | 0 | (Mv){{0}}, |
3348 | 0 | (Mv){{0}}, |
3349 | 0 | &cand_array[cand_total_cnt].drl_index, |
3350 | 0 | best_pred_mv); |
3351 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, (Mv){{0, 0}}, (Mv){{0, 0}}, 0)) { |
3352 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
3353 | 0 | cand->block_mi.use_intrabc = 0; |
3354 | 0 | cand->skip_mode_allowed = false; |
3355 | 0 | cand->block_mi.mode = NEWMV; |
3356 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3357 | 0 | cand->block_mi.mv[0] = (Mv){{0, 0}}; |
3358 | 0 | cand->block_mi.ref_frame[0] = svt_get_ref_frame_type(REF_LIST_0, 0); |
3359 | 0 | cand->block_mi.ref_frame[1] = NONE_FRAME; |
3360 | 0 | cand->transform_type[0] = DCT_DCT; |
3361 | 0 | cand->transform_type_uv = DCT_DCT; |
3362 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
3363 | 0 | cand->block_mi.is_interintra_used = 0; |
3364 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3365 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[svt_get_ref_frame_type(REF_LIST_0, 0)].num; |
3366 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
3367 | | // update the total number of candidates injected |
3368 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
3369 | 0 | } |
3370 | 0 | } |
3371 | | |
3372 | 1.01M | int svt_av1_allow_palette(int allow_palette, BlockSize bsize) { |
3373 | 1.01M | assert(bsize < BLOCK_SIZES_ALL); |
3374 | 1.01M | return allow_palette && block_size_wide[bsize] <= 64 && block_size_high[bsize] <= 64 && bsize >= BLOCK_8X8; |
3375 | 1.01M | } |
3376 | | |
3377 | | void search_palette_luma(PictureControlSet* pcs, ModeDecisionContext* ctx, PaletteInfo* palette_cand, |
3378 | | uint8_t* palette_size_array, uint32_t* tot_palette_cands); |
3379 | | |
3380 | 0 | static void inject_palette_candidates(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t* candidate_total_cnt) { |
3381 | 0 | uint32_t can_total_cnt = *candidate_total_cnt; |
3382 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
3383 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1); |
3384 | 0 | uint32_t tot_palette_cands = 0; |
3385 | 0 | PaletteInfo* palette_cand_array = ctx->palette_cand_array; |
3386 | | // MD palette search |
3387 | 0 | uint8_t* palette_size_array_0 = ctx->palette_size_array_0; |
3388 | |
|
3389 | 0 | search_palette_luma(pcs, ctx, palette_cand_array, palette_size_array_0, &tot_palette_cands); |
3390 | |
|
3391 | 0 | for (uint32_t cand_i = 0; cand_i < tot_palette_cands; ++cand_i) { |
3392 | 0 | ModeDecisionCandidate* cand = &cand_array[can_total_cnt]; |
3393 | 0 | cand->block_mi.is_interintra_used = 0; |
3394 | 0 | cand->palette_size[0] = palette_size_array_0[cand_i]; |
3395 | | // Palette is not supported for chroma |
3396 | 0 | cand->palette_size[1] = 0; |
3397 | 0 | cand->palette_info = &palette_cand_array[cand_i]; |
3398 | 0 | assert(palette_size_array_0[cand_i] < 9); |
3399 | | //to re check these fields |
3400 | 0 | cand->skip_mode_allowed = false; |
3401 | 0 | cand->block_mi.mode = DC_PRED; |
3402 | 0 | cand->block_mi.use_intrabc = 0; |
3403 | |
|
3404 | 0 | cand->block_mi.filter_intra_mode = FILTER_INTRA_MODES; |
3405 | 0 | cand->block_mi.angle_delta[PLANE_TYPE_Y] = 0; |
3406 | | // Palette is not supported for chroma mode, so we can set the intra chroma mode to anything. To use palette |
3407 | | // for chroma, we must force DC_PRED to be used for the intra chroma mode |
3408 | 0 | assert(cand_array[can_total_cnt].palette_size[1] == 0); |
3409 | 0 | cand->block_mi.uv_mode = ctx->ind_uv_avail ? ctx->best_uv_mode[DC_PRED] : intra_luma_to_chroma[DC_PRED]; |
3410 | 0 | cand->block_mi.angle_delta[PLANE_TYPE_UV] = ctx->ind_uv_avail ? ctx->best_uv_angle[DC_PRED] |
3411 | 0 | : cand->block_mi.angle_delta[PLANE_TYPE_Y]; |
3412 | 0 | cand->block_mi.cfl_alpha_signs = 0; |
3413 | 0 | cand->block_mi.cfl_alpha_idx = 0; |
3414 | 0 | cand->transform_type[0] = DCT_DCT; |
3415 | 0 | cand->transform_type_uv = svt_aom_get_intra_uv_tx_type( |
3416 | 0 | cand->block_mi.uv_mode, tx_size_uv, pcs->ppcs->frm_hdr.reduced_tx_set); |
3417 | 0 | if (svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && cand->transform_type_uv != DCT_DCT) { |
3418 | 0 | continue; |
3419 | 0 | } |
3420 | 0 | cand->block_mi.ref_frame[0] = INTRA_FRAME; |
3421 | 0 | cand->block_mi.ref_frame[1] = NONE_FRAME; |
3422 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3423 | 0 | INC_MD_CAND_CNT(can_total_cnt, pcs->ppcs->max_can_count); |
3424 | 0 | } |
3425 | | |
3426 | | // update the total number of candidates injected |
3427 | 0 | (*candidate_total_cnt) = can_total_cnt; |
3428 | |
|
3429 | 0 | return; |
3430 | 0 | } |
3431 | | |
3432 | 0 | static INLINE void eliminate_candidate_based_on_pme_me_results(ModeDecisionContext* ctx, uint8_t* dc_cand_only_flag) { |
3433 | 0 | if (ctx->md_pme_dist != (uint32_t)~0 || ctx->md_me_dist != (uint32_t)~0) { |
3434 | 0 | uint32_t th = ctx->cand_reduction_ctrls.cand_elimination_ctrls.dc_only_th; |
3435 | 0 | th *= ctx->blk_geom->bheight * ctx->blk_geom->bwidth; |
3436 | 0 | const uint32_t best_me_distotion = MIN(ctx->md_pme_dist, ctx->md_me_dist); |
3437 | 0 | if (best_me_distotion < th) { |
3438 | 0 | *dc_cand_only_flag = 1; |
3439 | 0 | } |
3440 | 0 | } |
3441 | 0 | } |
3442 | | |
3443 | | static bool valid_ref_frame_type(MvReferenceFrame rf[2], const MvReferenceFrame ref_frame_type_arr[], |
3444 | 0 | uint8_t tot_ref_frame_types) { |
3445 | | // INTRA_FRAME is added in candidates sometimes, skip validation |
3446 | 0 | if (rf[0] == INTRA_FRAME) { |
3447 | 0 | return true; |
3448 | 0 | } |
3449 | | |
3450 | 0 | for (uint8_t i = 0; i < tot_ref_frame_types; i++) { |
3451 | 0 | MvReferenceFrame rf_in_arr[2]; |
3452 | 0 | av1_set_ref_frame(rf_in_arr, ref_frame_type_arr[i]); |
3453 | 0 | if (rf[0] == rf_in_arr[0] && rf[1] == rf_in_arr[1]) { |
3454 | 0 | return true; |
3455 | 0 | } |
3456 | 0 | } |
3457 | 0 | return false; |
3458 | 0 | } |
3459 | | |
3460 | | // refer to inject_zz_backup_candidate, but use BWD ref instead of LAST |
3461 | | static void inject_sframe_backup_candidate(PictureControlSet* pcs, ModeDecisionContext* ctx, |
3462 | 0 | uint32_t* candidate_total_cnt) { |
3463 | 0 | ModeDecisionCandidate* cand_array = ctx->fast_cand_array; |
3464 | 0 | Mv best_pred_mv[2] = {{{0}}, {{0}}}; |
3465 | 0 | uint32_t cand_total_cnt = (*candidate_total_cnt); |
3466 | 0 | cand_array[cand_total_cnt].drl_index = 0; |
3467 | 0 | svt_aom_choose_best_av1_mv_pred(ctx, |
3468 | 0 | svt_get_ref_frame_type(REF_LIST_1, 0), |
3469 | 0 | NEWMV, |
3470 | 0 | (Mv){{0}}, |
3471 | 0 | (Mv){{0}}, |
3472 | 0 | &cand_array[cand_total_cnt].drl_index, |
3473 | 0 | best_pred_mv); |
3474 | 0 | if (!ctx->corrupted_mv_check || is_valid_mv_diff(best_pred_mv, (Mv){{0, 0}}, (Mv){{0, 0}}, 0)) { |
3475 | 0 | ModeDecisionCandidate* cand = &cand_array[cand_total_cnt]; |
3476 | 0 | cand->block_mi.use_intrabc = 0; |
3477 | 0 | cand->skip_mode_allowed = false; |
3478 | 0 | cand->block_mi.mode = NEWMV; |
3479 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3480 | 0 | cand->block_mi.mv[0] = (Mv){{0, 0}}; |
3481 | 0 | cand->block_mi.ref_frame[0] = svt_get_ref_frame_type(REF_LIST_1, 0); |
3482 | 0 | cand->block_mi.ref_frame[1] = NONE_FRAME; |
3483 | 0 | cand->transform_type[0] = DCT_DCT; |
3484 | 0 | cand->transform_type_uv = DCT_DCT; |
3485 | 0 | cand->pred_mv[0].as_int = best_pred_mv[0].as_int; |
3486 | 0 | cand->block_mi.is_interintra_used = 0; |
3487 | 0 | cand->block_mi.motion_mode = SIMPLE_TRANSLATION; |
3488 | 0 | cand->block_mi.num_proj_ref = ctx->wm_sample_info[svt_get_ref_frame_type(REF_LIST_1, 0)].num; |
3489 | 0 | INC_MD_CAND_CNT(cand_total_cnt, pcs->ppcs->max_can_count); |
3490 | | // update the total number of candidates injected |
3491 | 0 | (*candidate_total_cnt) = cand_total_cnt; |
3492 | 0 | } |
3493 | 0 | } |
3494 | | |
3495 | | // in MD stage 0, candidates are injected by different tools, but for S-Frame in RA mode |
3496 | | // the ref frame types in ref_list0 has be pruned in PD for the reversed direction of ref MVs |
3497 | | // here to check and reject the candidates if mismatches the available frame types array |
3498 | 0 | static uint32_t reject_candidate_sframe(PictureControlSet* pcs, ModeDecisionContext* ctx, uint32_t cand_total_cnt) { |
3499 | 0 | for (uint32_t i = 0; i < cand_total_cnt;) { |
3500 | 0 | if (!valid_ref_frame_type( |
3501 | 0 | ctx->fast_cand_array[i].block_mi.ref_frame, ctx->ref_frame_type_arr, ctx->tot_ref_frame_types)) { |
3502 | 0 | for (uint32_t j = i; j < cand_total_cnt; j++) { |
3503 | 0 | memcpy(&ctx->fast_cand_array[j], &ctx->fast_cand_array[j + 1], sizeof(ModeDecisionCandidate)); |
3504 | 0 | } |
3505 | 0 | cand_total_cnt--; |
3506 | 0 | continue; |
3507 | 0 | } |
3508 | 0 | i++; |
3509 | 0 | } |
3510 | | // zero candidate in fast cand array risks in md stage 0, add a candidate from ref list1 as backup |
3511 | 0 | if (cand_total_cnt == 0) { |
3512 | 0 | inject_sframe_backup_candidate(pcs, ctx, &cand_total_cnt); |
3513 | 0 | } |
3514 | 0 | assert(cand_total_cnt > 0); |
3515 | 0 | return cand_total_cnt; |
3516 | 0 | } |
3517 | | |
3518 | | EbErrorType generate_md_stage_0_cand_light_pd0(ModeDecisionContext* ctx, uint32_t* candidate_total_count_ptr, |
3519 | 7.53k | PictureControlSet* pcs) { |
3520 | 7.53k | const SliceType slice_type = pcs->slice_type; |
3521 | 7.53k | uint32_t cand_total_cnt = 0; |
3522 | | //---------------------- |
3523 | | // Intra |
3524 | 7.53k | if (ctx->blk_geom->sq_size < 128 && ctx->intra_ctrls.enable_intra) { |
3525 | 7.53k | inject_intra_candidates_light_pd0(pcs, ctx, &cand_total_cnt); |
3526 | 7.53k | } |
3527 | | |
3528 | 7.53k | if (slice_type != I_SLICE) { |
3529 | 0 | inject_inter_candidates_light_pd0(pcs, ctx, &cand_total_cnt); |
3530 | 0 | } |
3531 | | |
3532 | | // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_stage_0() |
3533 | | // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques |
3534 | 7.53k | if (slice_type != I_SLICE && cand_total_cnt == 0) { |
3535 | 0 | inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt); |
3536 | 0 | } |
3537 | | |
3538 | 7.53k | if (pcs->ppcs->sframe_ref_pruned) { |
3539 | 0 | cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt); |
3540 | 0 | } |
3541 | | |
3542 | 7.53k | *candidate_total_count_ptr = cand_total_cnt; |
3543 | | |
3544 | 7.53k | return EB_ErrorNone; |
3545 | 7.53k | } |
3546 | | |
3547 | | /* |
3548 | | generate candidates for light pd1 |
3549 | | */ |
3550 | | void generate_md_stage_0_cand_light_pd1(ModeDecisionContext* ctx, uint32_t* candidate_total_count_ptr, |
3551 | 0 | PictureControlSet* pcs) { |
3552 | 0 | const SliceType slice_type = pcs->slice_type; |
3553 | 0 | uint32_t cand_total_cnt = 0; |
3554 | | // Reset duplicates variables |
3555 | 0 | ctx->injected_mv_count = 0; |
3556 | 0 | ctx->inject_new_me = 1; |
3557 | 0 | if (slice_type != I_SLICE) { |
3558 | 0 | inject_inter_candidates_light_pd1(pcs, ctx, &cand_total_cnt); |
3559 | 0 | } |
3560 | | //---------------------- |
3561 | | // Intra |
3562 | 0 | if (ctx->intra_ctrls.enable_intra && ctx->blk_geom->sq_size < 128) { |
3563 | 0 | uint8_t dc_cand_only_flag = ctx->intra_ctrls.intra_mode_end == DC_PRED || is_dc_only_safe(pcs, ctx); |
3564 | 0 | if (ctx->cand_reduction_ctrls.cand_elimination_ctrls.enabled && !dc_cand_only_flag && |
3565 | 0 | ctx->md_me_dist != (uint32_t)~0) { |
3566 | 0 | uint32_t th = ctx->cand_reduction_ctrls.cand_elimination_ctrls.dc_only_th; |
3567 | 0 | th *= (ctx->blk_geom->bheight * ctx->blk_geom->bwidth); |
3568 | 0 | if (ctx->md_me_dist < th) { |
3569 | 0 | dc_cand_only_flag = 1; |
3570 | 0 | } |
3571 | 0 | } |
3572 | 0 | inject_intra_candidates(pcs, ctx, dc_cand_only_flag, &cand_total_cnt); |
3573 | 0 | } |
3574 | | |
3575 | | // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_syage_0() |
3576 | | // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques |
3577 | 0 | if (slice_type != I_SLICE && cand_total_cnt == 0) { |
3578 | 0 | inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt); |
3579 | 0 | } |
3580 | |
|
3581 | 0 | if (pcs->ppcs->sframe_ref_pruned) { |
3582 | 0 | cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt); |
3583 | 0 | } |
3584 | |
|
3585 | 0 | *candidate_total_count_ptr = cand_total_cnt; |
3586 | 0 | } |
3587 | | |
3588 | | EbErrorType generate_md_stage_0_cand(PictureControlSet* pcs, ModeDecisionContext* ctx, const PC_TREE* const pc_tree, |
3589 | 245k | uint32_t* candidate_total_count_ptr) { |
3590 | 245k | const SequenceControlSet* scs = pcs->scs; |
3591 | 245k | const SliceType slice_type = pcs->slice_type; |
3592 | 245k | uint32_t cand_total_cnt = 0; |
3593 | | // Reset duplicates variables |
3594 | 245k | ctx->injected_mv_count = 0; |
3595 | 245k | ctx->inject_new_me = 1; |
3596 | 245k | ctx->inject_new_pme = 1; |
3597 | | //---------------------- |
3598 | | // Intra |
3599 | 245k | if (ctx->intra_ctrls.enable_intra) { |
3600 | 245k | uint8_t dc_cand_only_flag = ctx->intra_ctrls.intra_mode_end == DC_PRED || is_dc_only_safe(pcs, ctx); |
3601 | 245k | if (ctx->cand_reduction_ctrls.cand_elimination_ctrls.enabled) { |
3602 | 0 | eliminate_candidate_based_on_pme_me_results(ctx, &dc_cand_only_flag); |
3603 | 0 | } |
3604 | 246k | if (ctx->blk_geom->sq_size < 128) { |
3605 | 246k | inject_intra_candidates(pcs, ctx, dc_cand_only_flag, &cand_total_cnt); |
3606 | 246k | } |
3607 | 245k | if (ctx->filter_intra_ctrls.enabled && svt_aom_filter_intra_allowed_bsize(ctx->blk_geom->bsize)) { |
3608 | 0 | inject_filter_intra_candidates(pcs, ctx, &cand_total_cnt); |
3609 | 0 | } |
3610 | | |
3611 | 245k | bool eval_intrabc = true; |
3612 | | |
3613 | 245k | if (svt_av1_allow_palette(ctx->md_palette_level, ctx->blk_geom->bsize)) { |
3614 | 0 | uint32_t palette_start_cnt = cand_total_cnt; |
3615 | |
|
3616 | 0 | inject_palette_candidates(pcs, ctx, &cand_total_cnt); |
3617 | |
|
3618 | 0 | eval_intrabc = cand_total_cnt > palette_start_cnt; |
3619 | 0 | } |
3620 | | |
3621 | 245k | if (ctx->md_allow_intrabc) { |
3622 | 0 | if (!pcs->ppcs->intrabc_ctrls.palette_hint || eval_intrabc) { |
3623 | 0 | bool do_intra_bc = true; |
3624 | |
|
3625 | 0 | if (ctx->shape == PART_N) { |
3626 | 0 | if (pcs->ppcs->intrabc_ctrls.b4_parent_gating && ctx->blk_geom->sq_size == 4 && |
3627 | 0 | pc_tree->parent->tested_blk[PART_N][0]) { |
3628 | 0 | if (pc_tree->parent->block_data[PART_N][0]->block_mi.use_intrabc == 0) { |
3629 | 0 | do_intra_bc = false; |
3630 | 0 | } |
3631 | 0 | } |
3632 | 0 | } else { |
3633 | 0 | if (pcs->ppcs->intrabc_ctrls.nsq_parent_gating && pc_tree->tested_blk[PART_N][0]) { |
3634 | 0 | if (pc_tree->block_data[PART_N][0]->block_mi.use_intrabc == 0) { |
3635 | 0 | do_intra_bc = false; |
3636 | 0 | } |
3637 | 0 | } |
3638 | 0 | } |
3639 | |
|
3640 | 0 | if (do_intra_bc) { |
3641 | 0 | inject_intra_bc_candidates(pcs, ctx, scs, ctx->blk_ptr, &cand_total_cnt); |
3642 | 0 | } |
3643 | 0 | } |
3644 | 0 | } |
3645 | 245k | } |
3646 | 245k | if (slice_type != I_SLICE) { |
3647 | 0 | svt_aom_inject_inter_candidates(pcs, ctx, &cand_total_cnt); |
3648 | 0 | } |
3649 | | // For I_SLICE, DC is always injected, and therefore there is no a risk of no candidates @ md_syage_0() |
3650 | | // For non I_SLICE, there is a risk of no candidates @ md_stage_0() because of the INTER candidates pruning techniques |
3651 | 245k | if (slice_type != I_SLICE && cand_total_cnt == 0) { |
3652 | 0 | inject_zz_backup_candidate(pcs, ctx, &cand_total_cnt); |
3653 | 0 | } |
3654 | | |
3655 | 245k | if (pcs->ppcs->sframe_ref_pruned) { |
3656 | 0 | cand_total_cnt = reject_candidate_sframe(pcs, ctx, cand_total_cnt); |
3657 | 0 | } |
3658 | | |
3659 | 245k | *candidate_total_count_ptr = cand_total_cnt; |
3660 | | |
3661 | 245k | memset(ctx->md_stage_0_count, 0, CAND_CLASS_TOTAL * sizeof(uint32_t)); |
3662 | 245k | bool merge_inter_cands = 0; |
3663 | 246k | if (ctx->nic_ctrls.pruning_ctrls.merge_inter_cands_mult != (uint8_t)~0) { |
3664 | 246k | uint16_t th = (ctx->nic_ctrls.pruning_ctrls.merge_inter_cands_mult * (63 - pcs->scs->static_config.qp)) >> 1; |
3665 | 246k | if ((MIN(ctx->md_me_dist, ctx->md_pme_dist) / (ctx->blk_geom->bwidth * ctx->blk_geom->bheight)) < th) { |
3666 | 231k | merge_inter_cands = 1; |
3667 | 231k | } |
3668 | 246k | } |
3669 | | |
3670 | 492k | for (uint32_t cand_i = 0; cand_i < cand_total_cnt; cand_i++) { |
3671 | 246k | ModeDecisionCandidate* cand = &ctx->fast_cand_array[cand_i]; |
3672 | 246k | if (is_intra_mode(cand->block_mi.mode)) { |
3673 | | // Intra prediction |
3674 | 246k | if ((cand->palette_info == NULL || cand->palette_size[0] == 0) && cand->block_mi.use_intrabc == 0) { |
3675 | 246k | cand->cand_class = CAND_CLASS_0; |
3676 | 246k | ctx->md_stage_0_count[CAND_CLASS_0]++; |
3677 | 18.4E | } else if (cand->block_mi.use_intrabc == 0) { |
3678 | | // Palette Prediction |
3679 | 0 | cand->cand_class = CAND_CLASS_3; |
3680 | 0 | ctx->md_stage_0_count[CAND_CLASS_3]++; |
3681 | 18.4E | } else { |
3682 | | // Intra-BC Prediction |
3683 | 18.4E | cand->cand_class = CAND_CLASS_4; |
3684 | 18.4E | ctx->md_stage_0_count[CAND_CLASS_4]++; |
3685 | 18.4E | } |
3686 | 18.4E | } else { // INTER |
3687 | 18.4E | if (cand->block_mi.mode == NEWMV || cand->block_mi.mode == NEW_NEWMV || merge_inter_cands) { |
3688 | | // MV Prediction |
3689 | 0 | cand->cand_class = CAND_CLASS_2; |
3690 | 0 | ctx->md_stage_0_count[CAND_CLASS_2]++; |
3691 | 18.4E | } else { |
3692 | | //MVP Prediction |
3693 | 18.4E | cand->cand_class = CAND_CLASS_1; |
3694 | 18.4E | ctx->md_stage_0_count[CAND_CLASS_1]++; |
3695 | 18.4E | } |
3696 | 18.4E | } |
3697 | 246k | } |
3698 | 245k | return EB_ErrorNone; |
3699 | 245k | } |
3700 | | |
3701 | | uint8_t av1_drl_ctx(const CandidateMv* ref_mv_stack, int32_t ref_idx); |
3702 | | |
3703 | | /*************************************** |
3704 | | * Update symbols for light-PD1 path |
3705 | | ***************************************/ |
3706 | | void svt_aom_product_full_mode_decision_light_pd1(PictureControlSet* pcs, ModeDecisionContext* ctx, |
3707 | 0 | ModeDecisionCandidateBuffer* cand_bf) { |
3708 | 0 | BlkStruct* blk_ptr = ctx->blk_ptr; |
3709 | 0 | ModeDecisionCandidate* cand = cand_bf->cand; |
3710 | 0 | blk_ptr->total_rate = cand_bf->total_rate; |
3711 | | |
3712 | | // Set common signals (INTER/INTRA) |
3713 | 0 | svt_memcpy(&blk_ptr->block_mi, &cand->block_mi, sizeof(BlockModeInfo)); |
3714 | 0 | blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0; |
3715 | | |
3716 | | // Set INTER mode signals |
3717 | 0 | if (is_inter_mode(cand->block_mi.mode)) { |
3718 | 0 | blk_ptr->drl_index = cand->drl_index; |
3719 | 0 | assert(IMPLIES( |
3720 | 0 | is_inter_compound_mode(cand->block_mi.mode) && blk_ptr->block_mi.interinter_comp.type == COMPOUND_AVERAGE, |
3721 | 0 | (blk_ptr->block_mi.comp_group_idx == 0 && blk_ptr->block_mi.compound_idx == 1))); |
3722 | | |
3723 | | // Set MVs |
3724 | 0 | blk_ptr->predmv[0].as_int = cand->pred_mv[0].as_int; |
3725 | 0 | if (has_second_ref(&blk_ptr->block_mi)) { |
3726 | 0 | blk_ptr->predmv[1].as_int = cand->pred_mv[1].as_int; |
3727 | 0 | } |
3728 | |
|
3729 | 0 | const int8_t ref_frame_type = av1_ref_frame_type(blk_ptr->block_mi.ref_frame); |
3730 | | // Store winning inter_mode_ctx in blk to avoid storing for all ref frames for EC |
3731 | 0 | blk_ptr->inter_mode_ctx = ctx->inter_mode_ctx[ref_frame_type]; |
3732 | | // Store drl_ctx in blk to avoid storing final_ref_mv_stack for EC |
3733 | 0 | if (blk_ptr->block_mi.mode == NEWMV || blk_ptr->block_mi.mode == NEW_NEWMV) { |
3734 | 0 | for (uint8_t idx = 0; idx < 2; ++idx) { |
3735 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
3736 | 0 | blk_ptr->drl_ctx[idx] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx); |
3737 | 0 | } else { |
3738 | 0 | blk_ptr->drl_ctx[idx] = -1; |
3739 | 0 | } |
3740 | 0 | } |
3741 | 0 | } |
3742 | |
|
3743 | 0 | if (have_nearmv_in_inter_mode(blk_ptr->block_mi.mode)) { |
3744 | | // TODO(jingning): Temporary solution to compensate the NEARESTMV offset. |
3745 | 0 | for (uint8_t idx = 1; idx < 3; ++idx) { |
3746 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
3747 | 0 | blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx); |
3748 | 0 | } else { |
3749 | 0 | blk_ptr->drl_ctx_near[idx - 1] = -1; |
3750 | 0 | } |
3751 | 0 | } |
3752 | 0 | } |
3753 | 0 | } else { // Set INTRA mode signals |
3754 | 0 | cand->skip_mode_allowed = false; |
3755 | 0 | } |
3756 | | // Set TX and coeff-related data |
3757 | 0 | blk_ptr->block_has_coeff = ((cand_bf->block_has_coeff) > 0) ? true : false; |
3758 | 0 | ctx->blk_ptr->cnt_nz_coeff = cand_bf->cnt_nz_coeff; |
3759 | | |
3760 | | // If skip_mode is allowed, and block has no coeffs, use skip_mode |
3761 | 0 | if (cand->skip_mode_allowed == true) { |
3762 | 0 | blk_ptr->block_mi.skip_mode |= !blk_ptr->block_has_coeff; |
3763 | 0 | } |
3764 | |
|
3765 | 0 | assert(IMPLIES(pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE && blk_ptr->block_mi.skip_mode, |
3766 | 0 | cand->block_mi.interp_filters == 0)); |
3767 | 0 | if (blk_ptr->block_mi.skip_mode) { |
3768 | 0 | blk_ptr->block_has_coeff = 0; |
3769 | 0 | cand_bf->y_has_coeff = 0; |
3770 | 0 | cand_bf->u_has_coeff = 0; |
3771 | 0 | cand_bf->v_has_coeff = 0; |
3772 | 0 | } |
3773 | 0 | blk_ptr->block_mi.skip = !blk_ptr->block_has_coeff; |
3774 | |
|
3775 | 0 | const uint16_t txb_itr = 0; |
3776 | 0 | const int32_t txb_1d_offset = 0, txb_1d_offset_uv = 0; |
3777 | 0 | blk_ptr->y_has_coeff = cand_bf->y_has_coeff; |
3778 | 0 | blk_ptr->u_has_coeff = cand_bf->u_has_coeff; |
3779 | 0 | blk_ptr->v_has_coeff = cand_bf->v_has_coeff; |
3780 | 0 | blk_ptr->tx_type[txb_itr] = cand->transform_type[txb_itr]; |
3781 | 0 | blk_ptr->tx_type_uv = cand->transform_type_uv; |
3782 | 0 | blk_ptr->quant_dc.y[txb_itr] = cand_bf->quant_dc.y[txb_itr]; |
3783 | 0 | blk_ptr->quant_dc.u[txb_itr] = cand_bf->quant_dc.u[txb_itr]; |
3784 | 0 | blk_ptr->quant_dc.v[txb_itr] = cand_bf->quant_dc.v[txb_itr]; |
3785 | |
|
3786 | 0 | if (ctx->bypass_encdec) { |
3787 | 0 | blk_ptr->eob.y[txb_itr] = cand_bf->eob.y[txb_itr]; |
3788 | 0 | blk_ptr->eob.u[txb_itr] = cand_bf->eob.u[txb_itr]; |
3789 | 0 | blk_ptr->eob.v[txb_itr] = cand_bf->eob.v[txb_itr]; |
3790 | 0 | int32_t* src_ptr; |
3791 | 0 | int32_t* dst_ptr; |
3792 | |
|
3793 | 0 | const TxSize tx_size = tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][ctx->blk_geom->bsize]; |
3794 | 0 | const int tx_width = tx_size_wide[tx_size]; |
3795 | 0 | const int tx_height = tx_size_high[tx_size]; |
3796 | | |
3797 | | // only one TX unit, so no need to bitmask |
3798 | 0 | if (blk_ptr->y_has_coeff) { |
3799 | 0 | src_ptr = &(((int32_t*)cand_bf->quant->y_buffer)[txb_1d_offset]); |
3800 | 0 | dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->y_buffer) + ctx->coded_area_sb; |
3801 | 0 | svt_memcpy(dst_ptr, src_ptr, tx_width * tx_height * sizeof(int32_t)); |
3802 | 0 | } |
3803 | 0 | ctx->coded_area_sb += tx_width * tx_height; |
3804 | |
|
3805 | 0 | const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1); |
3806 | 0 | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
3807 | 0 | const int tx_height_uv = tx_size_high[tx_size_uv]; |
3808 | | // Cb |
3809 | | // only one TX unit, so no need to bitmask |
3810 | 0 | if (blk_ptr->u_has_coeff) { |
3811 | 0 | src_ptr = &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset_uv]); |
3812 | 0 | dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->u_buffer) + |
3813 | 0 | ctx->coded_area_sb_uv; |
3814 | 0 | svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t)); |
3815 | 0 | } |
3816 | | |
3817 | | // Cr |
3818 | | // only one TX unit, so no need to bitmask |
3819 | 0 | if (blk_ptr->v_has_coeff) { |
3820 | 0 | src_ptr = &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset_uv]); |
3821 | 0 | dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->v_buffer) + |
3822 | 0 | ctx->coded_area_sb_uv; |
3823 | 0 | svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t)); |
3824 | 0 | } |
3825 | 0 | ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv; |
3826 | 0 | } |
3827 | 0 | } |
3828 | | |
3829 | 0 | static INLINE double derive_ssim_threshold_factor_for_full_md(SequenceControlSet* scs) { |
3830 | 0 | return scs->input_resolution >= INPUT_SIZE_1080p_RANGE ? 1.02 : 1.03; |
3831 | 0 | } |
3832 | | |
3833 | | /*************************************** |
3834 | | * Full Mode Decision |
3835 | | ***************************************/ |
3836 | | uint32_t svt_aom_product_full_mode_decision(PictureControlSet* pcs, ModeDecisionContext* ctx, |
3837 | | ModeDecisionCandidateBuffer** buffer_ptr_array, |
3838 | 247k | uint32_t candidate_total_count, uint32_t* best_candidate_index_array) { |
3839 | 247k | SequenceControlSet* scs = pcs->scs; |
3840 | 247k | BlkStruct* blk_ptr = ctx->blk_ptr; |
3841 | 247k | uint32_t lowest_cost_index = best_candidate_index_array[0]; |
3842 | 247k | const bool use_ssim_full_cost = ctx->tune_ssim_level > SSIM_LVL_0 ? true : false; |
3843 | | |
3844 | | // Find the candidate with the lowest cost |
3845 | | // Only need to sort if have multiple candidates |
3846 | 247k | if (ctx->md_stage_3_total_count > 1) { |
3847 | 0 | if (use_ssim_full_cost) { |
3848 | | // Pass one: find candidate with the lowest SSD cost |
3849 | 0 | uint64_t ssd_lowest_cost = 0xFFFFFFFFFFFFFFFFull; |
3850 | 0 | for (uint32_t i = 0; i < candidate_total_count; ++i) { |
3851 | 0 | uint32_t cand_index = best_candidate_index_array[i]; |
3852 | 0 | uint64_t cost = *(buffer_ptr_array[cand_index]->full_cost); |
3853 | 0 | if (cost < ssd_lowest_cost) { |
3854 | 0 | lowest_cost_index = cand_index; |
3855 | 0 | ssd_lowest_cost = cost; |
3856 | 0 | } |
3857 | 0 | } |
3858 | | |
3859 | | // Pass two: among the candidates with SSD cost not greater than the threshold, find the one with the lowest SSIM cost |
3860 | 0 | const double threshold_factor = derive_ssim_threshold_factor_for_full_md(scs); |
3861 | 0 | const uint64_t ssd_cost_threshold = (uint64_t)(threshold_factor * ssd_lowest_cost); |
3862 | 0 | uint64_t ssim_lowest_cost = 0xFFFFFFFFFFFFFFFFull; |
3863 | 0 | for (uint32_t i = 0; i < candidate_total_count; ++i) { |
3864 | 0 | uint32_t cand_index = best_candidate_index_array[i]; |
3865 | |
|
3866 | 0 | uint64_t ssim_cost = *(buffer_ptr_array[cand_index]->full_cost_ssim); |
3867 | 0 | uint64_t ssd_cost = *(buffer_ptr_array[cand_index]->full_cost); |
3868 | 0 | if (ssim_cost < ssim_lowest_cost) { |
3869 | 0 | if (ssd_cost <= ssd_cost_threshold) { |
3870 | 0 | lowest_cost_index = cand_index; |
3871 | 0 | ssim_lowest_cost = ssim_cost; |
3872 | 0 | ssd_lowest_cost = ssd_cost; |
3873 | 0 | } |
3874 | 0 | } else if (ssim_cost == ssim_lowest_cost) { |
3875 | | // if two candidates have the same ssim cost, choose the one with lower ssd cost |
3876 | 0 | if (ssd_cost < ssd_lowest_cost) { |
3877 | 0 | lowest_cost_index = cand_index; |
3878 | 0 | ssd_lowest_cost = ssd_cost; |
3879 | 0 | } |
3880 | 0 | } |
3881 | 0 | } |
3882 | 0 | } else { // fallback to SSD based RD cost |
3883 | 0 | uint64_t lowest_cost = 0xFFFFFFFFFFFFFFFFull; |
3884 | 0 | for (uint32_t i = 0; i < candidate_total_count; ++i) { |
3885 | 0 | uint32_t cand_index = best_candidate_index_array[i]; |
3886 | |
|
3887 | 0 | uint64_t cost = *(buffer_ptr_array[cand_index]->full_cost); |
3888 | 0 | if (scs->vq_ctrls.sharpness_ctrls.unipred_bias && pcs->ppcs->is_noise_level && |
3889 | 0 | is_inter_singleref_mode(buffer_ptr_array[cand_index]->cand->block_mi.mode)) { |
3890 | 0 | cost = (cost * uni_psy_bias[pcs->ppcs->picture_qp]) / 100; |
3891 | 0 | } |
3892 | |
|
3893 | 0 | if (cost < lowest_cost) { |
3894 | 0 | lowest_cost_index = cand_index; |
3895 | 0 | lowest_cost = cost; |
3896 | 0 | } |
3897 | 0 | } |
3898 | 0 | } |
3899 | 0 | } |
3900 | 247k | ModeDecisionCandidateBuffer* cand_bf = buffer_ptr_array[lowest_cost_index]; |
3901 | 247k | ModeDecisionCandidate* cand = cand_bf->cand; |
3902 | 247k | blk_ptr->total_rate = cand_bf->total_rate; |
3903 | 247k | if (!(ctx->pd_pass == PD_PASS_1 && ctx->fixed_partition)) { |
3904 | | // When lambda tuning is on, lambda of each block is set separately, however at interdepth decision the sb lambda is used |
3905 | 239k | uint32_t full_lambda = ctx->hbd_md ? ctx->full_sb_lambda_md[EB_10_BIT_MD] : ctx->full_sb_lambda_md[EB_8_BIT_MD]; |
3906 | 239k | ctx->blk_ptr->cost = RDCOST(full_lambda, cand_bf->total_rate, cand_bf->full_dist); |
3907 | 239k | ctx->blk_ptr->full_dist = cand_bf->full_dist; |
3908 | 239k | } |
3909 | | |
3910 | | // Set common signals (INTER/INTRA) |
3911 | 247k | svt_memcpy(&blk_ptr->block_mi, &cand->block_mi, sizeof(BlockModeInfo)); |
3912 | | // Set INTER mode signals |
3913 | | // INTER signals set first b/c INTER shuts Palette, so INTRA must overwrite if Palette + intrabc is used |
3914 | 247k | if (is_inter_block(&blk_ptr->block_mi)) { |
3915 | 0 | blk_ptr->drl_index = cand->drl_index; |
3916 | 0 | assert(IMPLIES( |
3917 | 0 | is_inter_compound_mode(cand->block_mi.mode) && blk_ptr->block_mi.interinter_comp.type == COMPOUND_AVERAGE, |
3918 | 0 | (blk_ptr->block_mi.comp_group_idx == 0 && blk_ptr->block_mi.compound_idx == 1))); |
3919 | |
|
3920 | 0 | blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0; |
3921 | | // Set MVs |
3922 | 0 | blk_ptr->predmv[0].as_int = cand->pred_mv[0].as_int; |
3923 | 0 | if (has_second_ref(&blk_ptr->block_mi)) { |
3924 | 0 | blk_ptr->predmv[1].as_int = cand->pred_mv[1].as_int; |
3925 | 0 | } |
3926 | 0 | if (blk_ptr->block_mi.motion_mode == WARPED_CAUSAL || |
3927 | 0 | (cand->block_mi.mode == GLOBALMV || cand->block_mi.mode == GLOBAL_GLOBALMV)) { |
3928 | 0 | svt_memcpy(&ctx->blk_ptr->wm_params_l0, &cand->wm_params_l0, sizeof(WarpedMotionParams)); |
3929 | 0 | svt_memcpy(&ctx->blk_ptr->wm_params_l1, &cand->wm_params_l1, sizeof(WarpedMotionParams)); |
3930 | 0 | } |
3931 | |
|
3932 | 0 | if (ctx->pd_pass == PD_PASS_1) { |
3933 | 0 | const int8_t ref_frame_type = av1_ref_frame_type(blk_ptr->block_mi.ref_frame); |
3934 | | // Store winning inter_mode_ctx in blk to avoid storing for all ref frames for EC |
3935 | 0 | blk_ptr->inter_mode_ctx = ctx->inter_mode_ctx[ref_frame_type]; |
3936 | | // Store drl_ctx in blk to avoid storing final_ref_mv_stack for EC |
3937 | 0 | if (blk_ptr->block_mi.mode == NEWMV || blk_ptr->block_mi.mode == NEW_NEWMV) { |
3938 | 0 | for (uint8_t idx = 0; idx < 2; ++idx) { |
3939 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
3940 | 0 | blk_ptr->drl_ctx[idx] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx); |
3941 | 0 | } else { |
3942 | 0 | blk_ptr->drl_ctx[idx] = -1; |
3943 | 0 | } |
3944 | 0 | } |
3945 | 0 | } |
3946 | |
|
3947 | 0 | if (have_nearmv_in_inter_mode(blk_ptr->block_mi.mode)) { |
3948 | | // TODO(jingning): Temporary solution to compensate the NEARESTMV offset. |
3949 | 0 | for (uint8_t idx = 1; idx < 3; ++idx) { |
3950 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
3951 | 0 | blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(ctx->ref_mv_stack[ref_frame_type], idx); |
3952 | 0 | } else { |
3953 | 0 | blk_ptr->drl_ctx_near[idx - 1] = -1; |
3954 | 0 | } |
3955 | 0 | } |
3956 | 0 | } |
3957 | 0 | } |
3958 | 0 | } |
3959 | | |
3960 | | // Set INTRA mode signals |
3961 | 247k | if (is_intra_mode(blk_ptr->block_mi.mode)) { |
3962 | 246k | if (!cand->palette_info) { |
3963 | 246k | blk_ptr->palette_size[0] = blk_ptr->palette_size[1] = 0; |
3964 | 18.4E | } else if (svt_av1_allow_palette(ctx->md_palette_level, ctx->blk_geom->bsize)) { |
3965 | 0 | memcpy(&blk_ptr->palette_info->pmi, &cand->palette_info->pmi, sizeof(PaletteModeInfo)); |
3966 | 0 | memcpy(blk_ptr->palette_info->color_idx_map, cand->palette_info->color_idx_map, MAX_PALETTE_SQUARE); |
3967 | 0 | blk_ptr->palette_size[0] = cand->palette_size[0]; |
3968 | 0 | blk_ptr->palette_size[1] = cand->palette_size[1]; |
3969 | 0 | } |
3970 | | |
3971 | 246k | if (blk_ptr->block_mi.use_intrabc == 0) { |
3972 | 246k | cand->skip_mode_allowed = false; |
3973 | 246k | } |
3974 | 246k | } |
3975 | | |
3976 | | // Set TX and coeff-related data |
3977 | 247k | blk_ptr->block_has_coeff = ((cand_bf->block_has_coeff) > 0) ? true : false; |
3978 | 247k | ctx->blk_ptr->cnt_nz_coeff = cand_bf->cnt_nz_coeff; |
3979 | | |
3980 | | // If skip_mode is allowed, and block has no coeffs, use skip_mode |
3981 | 247k | if (cand->skip_mode_allowed == true) { |
3982 | 0 | blk_ptr->block_mi.skip_mode |= !blk_ptr->block_has_coeff; |
3983 | 0 | } |
3984 | | |
3985 | 247k | assert(IMPLIES(pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE && blk_ptr->block_mi.skip_mode, |
3986 | 247k | cand->block_mi.interp_filters == 0)); |
3987 | 247k | if (blk_ptr->block_mi.skip_mode) { |
3988 | 0 | blk_ptr->block_has_coeff = 0; |
3989 | 0 | cand_bf->y_has_coeff = 0; |
3990 | 0 | cand_bf->u_has_coeff = 0; |
3991 | 0 | cand_bf->v_has_coeff = 0; |
3992 | 0 | } |
3993 | | |
3994 | 247k | blk_ptr->block_mi.skip = !blk_ptr->block_has_coeff; |
3995 | 247k | blk_ptr->y_has_coeff = cand_bf->y_has_coeff; |
3996 | 247k | blk_ptr->u_has_coeff = cand_bf->u_has_coeff; |
3997 | 247k | blk_ptr->v_has_coeff = cand_bf->v_has_coeff; |
3998 | 247k | svt_memcpy(blk_ptr->tx_type, cand->transform_type, sizeof(TxType) * MAX_TXB_COUNT); |
3999 | 247k | blk_ptr->tx_type_uv = cand->transform_type_uv; |
4000 | 247k | svt_memcpy(&blk_ptr->quant_dc, &cand_bf->quant_dc, sizeof(QuantDcData)); |
4001 | 247k | svt_memcpy(&blk_ptr->eob, &cand_bf->eob, sizeof(EobData)); |
4002 | | |
4003 | | // If bypassing EncDec, save recon/coeff |
4004 | 247k | if (ctx->bypass_encdec && ctx->pd_pass == PD_PASS_1) { |
4005 | 127k | const uint16_t tu_total_count = tx_blocks_per_depth[ctx->blk_geom->bsize][blk_ptr->block_mi.tx_depth]; |
4006 | 127k | int32_t txb_1d_offset = 0, txb_1d_offset_uv = 0; |
4007 | 127k | const TxSize tx_size = tx_depth_to_tx_size[blk_ptr->block_mi.tx_depth][ctx->blk_geom->bsize]; |
4008 | 127k | const int tx_width = tx_size_wide[tx_size]; |
4009 | 127k | const int tx_height = tx_size_high[tx_size]; |
4010 | 127k | const TxSize tx_size_uv = av1_get_max_uv_txsize(ctx->blk_geom->bsize, 1, 1); |
4011 | 127k | const int tx_width_uv = tx_size_wide[tx_size_uv]; |
4012 | 127k | const int tx_height_uv = tx_size_high[tx_size_uv]; |
4013 | 605k | for (uint16_t txb_itr = 0; txb_itr < tu_total_count; txb_itr++) { |
4014 | 478k | const bool uv_pass = (blk_ptr->block_mi.tx_depth == 0 || txb_itr == 0); |
4015 | | |
4016 | 478k | int32_t* src_ptr = &(((int32_t*)cand_bf->quant->y_buffer)[txb_1d_offset]); |
4017 | 478k | int32_t* dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->y_buffer)[txb_1d_offset]); |
4018 | | |
4019 | 478k | if (ctx->fixed_partition) { |
4020 | 7.54k | dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->y_buffer) + |
4021 | 7.54k | ctx->coded_area_sb; |
4022 | 7.54k | ctx->coded_area_sb += tx_width * tx_height; |
4023 | 7.54k | } |
4024 | | |
4025 | 478k | if (blk_ptr->y_has_coeff & (1 << txb_itr)) { |
4026 | 5.61k | svt_memcpy(dst_ptr, src_ptr, tx_width * tx_height * sizeof(int32_t)); |
4027 | 5.61k | } |
4028 | | |
4029 | 478k | txb_1d_offset += tx_width * tx_height; |
4030 | | |
4031 | 478k | if (ctx->has_uv && uv_pass) { |
4032 | | // Cb |
4033 | 126k | src_ptr = &(((int32_t*)cand_bf->quant->u_buffer)[txb_1d_offset_uv]); |
4034 | 126k | dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->u_buffer)[txb_1d_offset_uv]); |
4035 | | |
4036 | 126k | if (ctx->fixed_partition) { |
4037 | 7.54k | dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->u_buffer) + |
4038 | 7.54k | ctx->coded_area_sb_uv; |
4039 | 7.54k | } |
4040 | | |
4041 | 126k | if (blk_ptr->u_has_coeff & (1 << txb_itr)) { |
4042 | 5.46k | svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t)); |
4043 | 5.46k | } |
4044 | | |
4045 | | // Cr |
4046 | 126k | src_ptr = &(((int32_t*)cand_bf->quant->v_buffer)[txb_1d_offset_uv]); |
4047 | 126k | dst_ptr = &(((int32_t*)ctx->blk_ptr->coeff_tmp->v_buffer)[txb_1d_offset_uv]); |
4048 | | |
4049 | 126k | if (ctx->fixed_partition) { |
4050 | 7.54k | dst_ptr = ((int32_t*)pcs->ppcs->enc_dec_ptr->quantized_coeff[ctx->sb_index]->v_buffer) + |
4051 | 7.54k | ctx->coded_area_sb_uv; |
4052 | 7.54k | ctx->coded_area_sb_uv += tx_width_uv * tx_height_uv; |
4053 | 7.54k | } |
4054 | | |
4055 | 126k | if (blk_ptr->v_has_coeff & (1 << txb_itr)) { |
4056 | 5.46k | svt_memcpy(dst_ptr, src_ptr, tx_width_uv * tx_height_uv * sizeof(int32_t)); |
4057 | 5.46k | } |
4058 | | |
4059 | 126k | txb_1d_offset_uv += tx_width_uv * tx_height_uv; |
4060 | 126k | } |
4061 | 478k | } |
4062 | 127k | } |
4063 | | |
4064 | 247k | return lowest_cost_index; |
4065 | 247k | } |
4066 | | |
4067 | | // Return the end column for the current superblock, in unit of TPL blocks. |
4068 | 0 | static int get_superblock_tpl_column_end(PictureParentControlSet* ppcs, int mi_col, int num_mi_w) { |
4069 | 0 | const int mib_size_log2 = ppcs->scs->seq_header.sb_size == BLOCK_128X128 ? 5 : 4; |
4070 | | // Find the start column of this superblock. |
4071 | 0 | const int sb_mi_col_start = (mi_col >> mib_size_log2) << mib_size_log2; |
4072 | | // Same but in superres upscaled dimension. |
4073 | 0 | const int sb_mi_col_start_sr = coded_to_superres_mi(sb_mi_col_start, ppcs->superres_denom); |
4074 | | // Width of this superblock in mi units. |
4075 | 0 | const int sb_mi_width = mi_size_wide[ppcs->scs->seq_header.sb_size]; |
4076 | | // Same but in superres upscaled dimension. |
4077 | 0 | const int sb_mi_width_sr = coded_to_superres_mi(sb_mi_width, ppcs->superres_denom); |
4078 | | // Superblock end in mi units. |
4079 | 0 | const int sb_mi_end = sb_mi_col_start_sr + sb_mi_width_sr; |
4080 | | // Superblock end in TPL units. |
4081 | 0 | return (sb_mi_end + num_mi_w - 1) / num_mi_w; |
4082 | 0 | } |
4083 | | |
4084 | 0 | void aom_av1_set_ssim_rdmult(ModeDecisionContext* ctx, PictureControlSet* pcs, const int mi_row, const int mi_col) { |
4085 | 0 | const Av1Common* const cm = pcs->ppcs->av1_cm; |
4086 | 0 | BlockSize bsize = ctx->blk_geom->bsize; |
4087 | |
|
4088 | 0 | const int bsize_base = BLOCK_16X16; |
4089 | 0 | const int num_mi_w = mi_size_wide[bsize_base]; |
4090 | 0 | const int num_mi_h = mi_size_high[bsize_base]; |
4091 | 0 | const int num_cols = (cm->mi_cols + num_mi_w - 1) / num_mi_w; |
4092 | 0 | const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h; |
4093 | 0 | const int num_bcols = (mi_size_wide[bsize] + num_mi_w - 1) / num_mi_w; |
4094 | 0 | const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h; |
4095 | 0 | int row, col; |
4096 | 0 | double num_of_mi = 0.0; |
4097 | 0 | double geom_mean_of_scale = 1.0; |
4098 | 0 | for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
4099 | 0 | for (col = mi_col / num_mi_h; col < num_cols && col < mi_col / num_mi_h + num_bcols; ++col) { |
4100 | 0 | const int index = row * num_cols + col; |
4101 | 0 | geom_mean_of_scale *= pcs->ppcs->pa_me_data->ssim_rdmult_scaling_factors[index]; |
4102 | 0 | num_of_mi += 1.0; |
4103 | 0 | } |
4104 | 0 | } |
4105 | 0 | geom_mean_of_scale = pow(geom_mean_of_scale, (1.0 / num_of_mi)); |
4106 | 0 | if (!pcs->ppcs->blk_lambda_tuning) { |
4107 | 0 | ctx->full_lambda_md[EB_8_BIT_MD] = |
4108 | 0 | (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5); |
4109 | 0 | ctx->full_lambda_md[EB_10_BIT_MD] = |
4110 | 0 | (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5); |
4111 | |
|
4112 | 0 | ctx->fast_lambda_md[EB_8_BIT_MD] = |
4113 | 0 | (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5); |
4114 | 0 | ctx->fast_lambda_md[EB_10_BIT_MD] = |
4115 | 0 | (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5); |
4116 | 0 | } else { |
4117 | 0 | ctx->full_lambda_md[EB_8_BIT_MD] = (uint32_t)((double)ctx->full_lambda_md[EB_8_BIT_MD] * geom_mean_of_scale + |
4118 | 0 | 0.5); |
4119 | 0 | ctx->full_lambda_md[EB_10_BIT_MD] = (uint32_t)((double)ctx->full_lambda_md[EB_10_BIT_MD] * geom_mean_of_scale + |
4120 | 0 | 0.5); |
4121 | |
|
4122 | 0 | ctx->fast_lambda_md[EB_8_BIT_MD] = (uint32_t)((double)ctx->fast_lambda_md[EB_8_BIT_MD] * geom_mean_of_scale + |
4123 | 0 | 0.5); |
4124 | 0 | ctx->fast_lambda_md[EB_10_BIT_MD] = (uint32_t)((double)ctx->fast_lambda_md[EB_10_BIT_MD] * geom_mean_of_scale + |
4125 | 0 | 0.5); |
4126 | 0 | } |
4127 | 0 | } |
4128 | | |
4129 | 0 | void svt_aom_set_tuned_blk_lambda(ModeDecisionContext* ctx, PictureControlSet* pcs) { |
4130 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
4131 | 0 | Av1Common* cm = ppcs->av1_cm; |
4132 | |
|
4133 | 0 | BlockSize bsize = ctx->blk_geom->bsize; |
4134 | 0 | int mi_row = ctx->blk_org_y / 4; |
4135 | 0 | int mi_col = ctx->blk_org_x / 4; |
4136 | |
|
4137 | 0 | const int mi_col_sr = coded_to_superres_mi(mi_col, ppcs->superres_denom); |
4138 | 0 | const int mi_cols_sr = ((ppcs->enhanced_unscaled_pic->width + 15) / 16) << 2; // picture column boundary |
4139 | 0 | const int block_mi_width_sr = coded_to_superres_mi(mi_size_wide[bsize], ppcs->superres_denom); |
4140 | 0 | const int bsize_base = ppcs->tpl_ctrls.synth_blk_size == 32 ? BLOCK_32X32 : BLOCK_16X16; |
4141 | 0 | const int num_mi_w = mi_size_wide[bsize_base]; |
4142 | 0 | const int num_mi_h = mi_size_high[bsize_base]; |
4143 | 0 | const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w; |
4144 | 0 | const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h; |
4145 | 0 | const int num_bcols = (block_mi_width_sr + num_mi_w - 1) / num_mi_w; |
4146 | 0 | const int num_brows = (mi_size_high[bsize] + num_mi_h - 1) / num_mi_h; |
4147 | | |
4148 | | // This is required because the end col of superblock may be off by 1 in case |
4149 | | // of superres. |
4150 | 0 | const int sb_bcol_end = get_superblock_tpl_column_end(ppcs, mi_col, num_mi_w); |
4151 | 0 | int row, col; |
4152 | 0 | int32_t base_block_count = 0; |
4153 | 0 | double geom_mean_of_scale = 0.0; |
4154 | 0 | for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
4155 | 0 | for (col = mi_col_sr / num_mi_h; col < num_cols && col < mi_col_sr / num_mi_h + num_bcols && col < sb_bcol_end; |
4156 | 0 | ++col) { |
4157 | 0 | const int index = row * num_cols + col; |
4158 | 0 | geom_mean_of_scale += log(ppcs->pa_me_data->tpl_sb_rdmult_scaling_factors[index]); |
4159 | 0 | ++base_block_count; |
4160 | 0 | } |
4161 | 0 | } |
4162 | | // When superres is on, base_block_count could be zero. |
4163 | | // This function's counterpart in AOM, av1_get_hier_tpl_rdmult, will encounter division by zero |
4164 | 0 | if (base_block_count == 0) { |
4165 | | // return a large number to indicate invalid state |
4166 | 0 | ctx->full_lambda_md[EB_8_BIT_MD] = SUPERRES_INVALID_STATE; |
4167 | 0 | ctx->full_lambda_md[EB_10_BIT_MD] = SUPERRES_INVALID_STATE; |
4168 | |
|
4169 | 0 | ctx->fast_lambda_md[EB_8_BIT_MD] = SUPERRES_INVALID_STATE; |
4170 | 0 | ctx->fast_lambda_md[EB_10_BIT_MD] = SUPERRES_INVALID_STATE; |
4171 | 0 | return; |
4172 | 0 | } |
4173 | | |
4174 | 0 | geom_mean_of_scale = exp(geom_mean_of_scale / base_block_count); |
4175 | |
|
4176 | 0 | ctx->full_lambda_md[EB_8_BIT_MD] = |
4177 | 0 | (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5); |
4178 | 0 | ctx->full_lambda_md[EB_10_BIT_MD] = |
4179 | 0 | (uint32_t)((double)ctx->ed_ctx->pic_full_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5); |
4180 | |
|
4181 | 0 | ctx->fast_lambda_md[EB_8_BIT_MD] = |
4182 | 0 | (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_8_BIT_MD] * geom_mean_of_scale + 0.5); |
4183 | 0 | ctx->fast_lambda_md[EB_10_BIT_MD] = |
4184 | 0 | (uint32_t)((double)ctx->ed_ctx->pic_fast_lambda[EB_10_BIT_MD] * geom_mean_of_scale + 0.5); |
4185 | 0 | if (ppcs->scs->static_config.tune == TUNE_SSIM || ppcs->scs->static_config.tune == TUNE_IQ || |
4186 | 0 | ppcs->scs->static_config.tune == TUNE_MS_SSIM) { |
4187 | 0 | aom_av1_set_ssim_rdmult(ctx, pcs, mi_row, mi_col); |
4188 | 0 | } |
4189 | 0 | } |
4190 | | |
4191 | | double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, uint32_t sum_sq_r, uint32_t sum_sxr, int count, |
4192 | | uint32_t bd); |
4193 | | |
4194 | 0 | double svt_ssim_4x4_c(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp) { |
4195 | 0 | const int32_t count = 4 * 4; |
4196 | |
|
4197 | 0 | uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; |
4198 | 0 | uint32_t i, j; |
4199 | 0 | for (i = 0; i < 4; i++) { |
4200 | 0 | for (j = 0; j < 4; j++) { |
4201 | 0 | sum_s += s[j]; |
4202 | 0 | sum_r += r[j]; |
4203 | 0 | sum_sq_s += s[j] * s[j]; |
4204 | 0 | sum_sq_r += r[j] * r[j]; |
4205 | 0 | sum_sxr += s[j] * r[j]; |
4206 | 0 | } |
4207 | |
|
4208 | 0 | s += sp; |
4209 | 0 | r += rp; |
4210 | 0 | } |
4211 | | |
4212 | | // |
4213 | | // similarity |
4214 | | // |
4215 | 0 | double score = similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 8); |
4216 | 0 | return score; |
4217 | 0 | } |
4218 | | |
4219 | 0 | double svt_ssim_8x8_c(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp) { |
4220 | 0 | const int32_t count = 8 * 8; |
4221 | | |
4222 | | // |
4223 | | // is similar to svt_aom_ssim_parms_8x8_c, but supports MxN block size |
4224 | | // |
4225 | 0 | uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; |
4226 | 0 | uint32_t i, j; |
4227 | 0 | for (i = 0; i < 8; i++) { |
4228 | 0 | for (j = 0; j < 8; j++) { |
4229 | 0 | sum_s += s[j]; |
4230 | 0 | sum_r += r[j]; |
4231 | 0 | sum_sq_s += s[j] * s[j]; |
4232 | 0 | sum_sq_r += r[j] * r[j]; |
4233 | 0 | sum_sxr += s[j] * r[j]; |
4234 | 0 | } |
4235 | |
|
4236 | 0 | s += sp; |
4237 | 0 | r += rp; |
4238 | 0 | } |
4239 | | |
4240 | | // |
4241 | | // similarity |
4242 | | // |
4243 | 0 | double score = similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 8); |
4244 | 0 | return score; |
4245 | 0 | } |
4246 | | |
4247 | 0 | double svt_ssim_4x4_hbd_c(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp) { |
4248 | 0 | const int32_t count = 4 * 4; |
4249 | |
|
4250 | 0 | uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; |
4251 | 0 | uint32_t i, j; |
4252 | 0 | for (i = 0; i < 4; i++) { |
4253 | 0 | for (j = 0; j < 4; j++) { |
4254 | 0 | sum_s += s[j]; |
4255 | 0 | sum_r += r[j]; |
4256 | 0 | sum_sq_s += s[j] * s[j]; |
4257 | 0 | sum_sq_r += r[j] * r[j]; |
4258 | 0 | sum_sxr += s[j] * r[j]; |
4259 | 0 | } |
4260 | |
|
4261 | 0 | s += sp; |
4262 | 0 | r += rp; |
4263 | 0 | } |
4264 | | |
4265 | | // |
4266 | | // similarity |
4267 | | // |
4268 | 0 | double score = similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 10); |
4269 | 0 | return score; |
4270 | 0 | } |
4271 | | |
4272 | 0 | double svt_ssim_8x8_hbd_c(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp) { |
4273 | 0 | const int32_t count = 8 * 8; |
4274 | |
|
4275 | 0 | uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; |
4276 | 0 | uint32_t i, j; |
4277 | 0 | for (i = 0; i < 8; i++) { |
4278 | 0 | for (j = 0; j < 8; j++) { |
4279 | 0 | sum_s += s[j]; |
4280 | 0 | sum_r += r[j]; |
4281 | 0 | sum_sq_s += s[j] * s[j]; |
4282 | 0 | sum_sq_r += r[j] * r[j]; |
4283 | 0 | sum_sxr += s[j] * r[j]; |
4284 | 0 | } |
4285 | |
|
4286 | 0 | s += sp; |
4287 | 0 | r += rp; |
4288 | 0 | } |
4289 | | |
4290 | | // |
4291 | | // similarity |
4292 | | // |
4293 | 0 | double score = similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, count, 10); |
4294 | 0 | return score; |
4295 | 0 | } |
4296 | | |
4297 | | static double ssim_8x8_blocks(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width, |
4298 | 0 | uint32_t height) { |
4299 | 0 | uint32_t i, j; |
4300 | 0 | int samples = 0; |
4301 | 0 | double ssim_total = 0; |
4302 | | |
4303 | | // sample point start with each 4x4 location |
4304 | 0 | for (i = 0; i <= height - 8; i += 8, s += sp * 8, r += rp * 8) { |
4305 | 0 | for (j = 0; j <= width - 8; j += 8) { |
4306 | 0 | double v = svt_ssim_8x8(s + j, sp, r + j, rp); |
4307 | 0 | v = CLIP3(0, 1, v); |
4308 | 0 | ssim_total += v; |
4309 | 0 | samples++; |
4310 | 0 | } |
4311 | 0 | } |
4312 | 0 | assert(samples > 0); |
4313 | 0 | ssim_total /= samples; |
4314 | 0 | assert(ssim_total <= 1.0 && ssim_total >= 0); |
4315 | 0 | return ssim_total; |
4316 | 0 | } |
4317 | | |
4318 | | static double ssim_4x4_blocks(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width, |
4319 | 0 | uint32_t height) { |
4320 | 0 | uint32_t i, j; |
4321 | 0 | int samples = 0; |
4322 | 0 | double ssim_total = 0; |
4323 | | |
4324 | | // sample point start with each 2x2 location |
4325 | 0 | for (i = 0; i <= height - 4; i += 4, s += sp * 4, r += rp * 4) { |
4326 | 0 | for (j = 0; j <= width - 4; j += 4) { |
4327 | 0 | double v = svt_ssim_4x4(s + j, sp, r + j, rp); |
4328 | 0 | v = CLIP3(0, 1, v); |
4329 | 0 | ssim_total += v; |
4330 | 0 | samples++; |
4331 | 0 | } |
4332 | 0 | } |
4333 | 0 | assert(samples > 0); |
4334 | 0 | ssim_total /= samples; |
4335 | 0 | assert(ssim_total <= 1.0 && ssim_total >= 0); |
4336 | 0 | return ssim_total; |
4337 | 0 | } |
4338 | | |
4339 | 0 | static double ssim(const uint8_t* s, uint32_t sp, const uint8_t* r, uint32_t rp, uint32_t width, uint32_t height) { |
4340 | 0 | assert((width % 4) == 0 && (height % 4) == 0); |
4341 | 0 | if ((width % 8) == 0 && (height % 8) == 0) { |
4342 | 0 | return ssim_8x8_blocks(s, sp, r, rp, width, height); |
4343 | 0 | } else { |
4344 | 0 | return ssim_4x4_blocks(s, sp, r, rp, width, height); |
4345 | 0 | } |
4346 | 0 | } |
4347 | | |
4348 | | static double ssim_8x8_blocks_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width, |
4349 | 0 | uint32_t height) { |
4350 | 0 | uint32_t i, j; |
4351 | 0 | int samples = 0; |
4352 | 0 | double ssim_total = 0; |
4353 | | |
4354 | | // sample point start with each 4x4 location |
4355 | 0 | for (i = 0; i <= height - 8; i += 8, s += sp * 8, r += rp * 8) { |
4356 | 0 | for (j = 0; j <= width - 8; j += 8) { |
4357 | 0 | double v = svt_ssim_8x8_hbd(s + j, sp, r + j, rp); |
4358 | 0 | v = CLIP3(0, 1, v); |
4359 | 0 | ssim_total += v; |
4360 | 0 | samples++; |
4361 | 0 | } |
4362 | 0 | } |
4363 | 0 | assert(samples > 0); |
4364 | 0 | ssim_total /= samples; |
4365 | 0 | assert(ssim_total <= 1.0 && ssim_total >= 0); |
4366 | 0 | return ssim_total; |
4367 | 0 | } |
4368 | | |
4369 | | static double ssim_4x4_blocks_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width, |
4370 | 0 | uint32_t height) { |
4371 | 0 | uint32_t i, j; |
4372 | 0 | int samples = 0; |
4373 | 0 | double ssim_total = 0; |
4374 | | |
4375 | | // sample point start with each 2x2 location |
4376 | 0 | for (i = 0; i <= height - 4; i += 4, s += sp * 4, r += rp * 4) { |
4377 | 0 | for (j = 0; j <= width - 4; j += 4) { |
4378 | 0 | double v = svt_ssim_4x4_hbd(s + j, sp, r + j, rp); |
4379 | 0 | v = CLIP3(0, 1, v); |
4380 | 0 | ssim_total += v; |
4381 | 0 | samples++; |
4382 | 0 | } |
4383 | 0 | } |
4384 | 0 | assert(samples > 0); |
4385 | 0 | ssim_total /= samples; |
4386 | 0 | assert(ssim_total <= 1.0 && ssim_total >= 0); |
4387 | 0 | return ssim_total; |
4388 | 0 | } |
4389 | | |
4390 | | static double ssim_hbd(const uint16_t* s, uint32_t sp, const uint16_t* r, uint32_t rp, uint32_t width, |
4391 | 0 | uint32_t height) { |
4392 | 0 | assert((width % 4) == 0 && (height % 4) == 0); |
4393 | 0 | if ((width % 8) == 0 && (height % 8) == 0) { |
4394 | 0 | return ssim_8x8_blocks_hbd(s, sp, r, rp, width, height); |
4395 | 0 | } else { |
4396 | 0 | return ssim_4x4_blocks_hbd(s, sp, r, rp, width, height); |
4397 | 0 | } |
4398 | 0 | } |
4399 | | |
4400 | | uint64_t svt_spatial_full_distortion_ssim_kernel(uint8_t* input, uint32_t input_offset, uint32_t input_stride, |
4401 | | uint8_t* recon, int32_t recon_offset, uint32_t recon_stride, |
4402 | 0 | uint32_t area_width, uint32_t area_height, bool hbd, double ac_bias) { |
4403 | 0 | uint8_t m = 1; |
4404 | 0 | const uint32_t count = area_width * area_height; |
4405 | | |
4406 | | // SSIM |
4407 | 0 | uint64_t spatial_distortion; |
4408 | 0 | double ssim_score; |
4409 | | |
4410 | | // AC SAD |
4411 | 0 | uint64_t psy_distortion = 0; |
4412 | |
|
4413 | 0 | if (!hbd) { |
4414 | 0 | ssim_score = ssim( |
4415 | 0 | input + input_offset, input_stride, recon + recon_offset, recon_stride, area_width, area_height); |
4416 | 0 | if (ac_bias) { |
4417 | 0 | uint64_t ac_distortion = svt_psy_distortion( |
4418 | 0 | input + input_offset, input_stride, recon + recon_offset, recon_stride, area_width, area_height); |
4419 | 0 | psy_distortion = (uint64_t)(ac_distortion * ac_bias); |
4420 | 0 | } |
4421 | 0 | } else { |
4422 | 0 | m = 8; |
4423 | 0 | ssim_score = ssim_hbd((uint16_t*)input + input_offset, |
4424 | 0 | input_stride, |
4425 | 0 | (uint16_t*)recon + recon_offset, |
4426 | 0 | recon_stride, |
4427 | 0 | area_width, |
4428 | 0 | area_height); |
4429 | 0 | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
4430 | 0 | if (ac_bias) { |
4431 | 0 | uint64_t ac_distortion = svt_psy_distortion_hbd((uint16_t*)input + input_offset, |
4432 | 0 | input_stride, |
4433 | 0 | (uint16_t*)recon + recon_offset, |
4434 | 0 | recon_stride, |
4435 | 0 | area_width, |
4436 | 0 | area_height); |
4437 | 0 | psy_distortion = (uint64_t)(ac_distortion * ac_bias); |
4438 | 0 | } |
4439 | 0 | #endif |
4440 | 0 | } |
4441 | |
|
4442 | 0 | spatial_distortion = (uint64_t)((1 - ssim_score) * count * 100 * 7 * m); |
4443 | 0 | uint64_t total_distortion = spatial_distortion + psy_distortion; |
4444 | |
|
4445 | 0 | return total_distortion; |
4446 | 0 | } |