/work/svt-av1/Source/Lib/Codec/md_process.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include <stdlib.h> |
13 | | |
14 | | #include "utility.h" |
15 | | #include "md_process.h" |
16 | | #include "lambda_rate_tables.h" |
17 | | #include "rc_process.h" |
18 | | #include "enc_mode_config.h" |
19 | | |
20 | | const uint8_t quantizer_to_qindex[64] = { |
21 | | 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, |
22 | | 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, |
23 | | 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255}; |
24 | | |
25 | | const int percents[2][FIXED_QP_OFFSET_COUNT] = { |
26 | | {75, 70, 60, 20, 15, 0}, {76, 60, 30, 15, 8, 4} // libaom offsets |
27 | | }; |
28 | | |
29 | | const uint8_t uni_psy_bias[64] = { |
30 | | 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 95, 95, 95, 95, 95, 95, |
31 | | 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, |
32 | | 95, 95, 95, 95, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
33 | | }; |
34 | | |
35 | 1.37k | static void mode_decision_context_dctor(EbPtr p) { |
36 | 1.37k | ModeDecisionContext* obj = (ModeDecisionContext*)p; |
37 | | |
38 | 1.37k | uint32_t block_max_count_sb = obj->init_max_block_cnt; |
39 | | |
40 | | // MD palette search |
41 | 1.37k | if (obj->palette_buffer) { |
42 | 0 | EB_FREE(obj->palette_buffer); |
43 | 0 | } |
44 | 1.37k | if (obj->palette_cand_array) { |
45 | | // Free fields in palette_cand_array before freeing palette_cand_array |
46 | 0 | for (int cd = 0; cd < MAX_PAL_CAND; cd++) { |
47 | 0 | if (obj->palette_cand_array[cd].color_idx_map) { |
48 | 0 | EB_FREE_ARRAY(obj->palette_cand_array[cd].color_idx_map); |
49 | 0 | } |
50 | 0 | } |
51 | |
|
52 | 0 | EB_FREE_ARRAY(obj->palette_cand_array); |
53 | 0 | } |
54 | 1.37k | if (obj->palette_size_array_0) { |
55 | 0 | EB_FREE_ARRAY(obj->palette_size_array_0); |
56 | 0 | } |
57 | 8.22k | for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) { |
58 | 6.85k | EB_FREE_ARRAY(obj->cand_buff_indices[cand_class_it]); |
59 | 6.85k | } |
60 | 1.37k | EB_FREE_ARRAY(obj->best_candidate_index_array); |
61 | | |
62 | 1.37k | EB_FREE_ARRAY(obj->above_txfm_context); |
63 | 1.37k | EB_FREE_ARRAY(obj->left_txfm_context); |
64 | 117k | for (uint32_t coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
65 | 116k | if (obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp) { |
66 | 116k | EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp); |
67 | 116k | } |
68 | 116k | if (obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp) { |
69 | 116k | EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp); |
70 | 116k | } |
71 | 116k | } |
72 | 1.37k | EB_DELETE_PTR_ARRAY(obj->cand_bf_ptr_array, obj->max_nics_uv); |
73 | 1.37k | EB_FREE_ARRAY(obj->cand_bf_tx_depth_1->cand); |
74 | 1.37k | EB_DELETE(obj->cand_bf_tx_depth_1); |
75 | 1.37k | EB_FREE_ARRAY(obj->cand_bf_tx_depth_2->cand); |
76 | 1.37k | EB_DELETE(obj->cand_bf_tx_depth_2); |
77 | 1.37k | EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon16bit); |
78 | 1.37k | EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon); |
79 | 1.37k | EB_FREE_ALIGNED_ARRAY(obj->pred_buf_q3); |
80 | 1.37k | EB_FREE_ARRAY(obj->fast_cand_array); |
81 | 1.37k | EB_FREE_2D(obj->injected_mvs); |
82 | 1.37k | EB_FREE_ARRAY(obj->injected_ref_types); |
83 | 1.37k | EB_FREE_ARRAY(obj->fast_cost_array); |
84 | 1.37k | EB_FREE_ARRAY(obj->full_cost_array); |
85 | 1.37k | if (obj->md_blk_arr_nsq) { |
86 | 5.48k | for (int i = 0; i < 3; i++) { |
87 | 4.11k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon_16bit[i]); |
88 | 4.11k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon_16bit[i]); |
89 | 4.11k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon[i]); |
90 | 4.11k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon[i]); |
91 | 4.11k | } |
92 | 1.37k | } |
93 | 1.37k | if (obj->md_blk_arr_nsq) { |
94 | 1.37k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].av1xd); |
95 | 1.37k | } |
96 | 1.37k | EB_FREE_ARRAY(obj->mds); |
97 | 1.37k | EB_FREE_ARRAY(obj->pc_tree); |
98 | 1.37k | EB_FREE_ARRAY(obj->tested_blk); |
99 | 1.37k | obj->blocks_to_alloc = 0; |
100 | 1.37k | EB_FREE_ARRAY(obj->md_blk_arr_nsq); |
101 | 1.37k | #if OPT_LPD1_GLOBALMV_BYPASS |
102 | 1.37k | EB_FREE_ARRAY(obj->pd0_mds0_best_cost); |
103 | 1.37k | #endif |
104 | 1.37k | if (obj->rate_est_table) { |
105 | 0 | EB_FREE_ARRAY(obj->rate_est_table); |
106 | 0 | } |
107 | | |
108 | 6.85k | for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) { |
109 | 5.48k | if (obj->cmp_store.pred0_buf[i]) { |
110 | 0 | EB_FREE(obj->cmp_store.pred0_buf[i]); |
111 | 0 | } |
112 | 5.48k | if (obj->cmp_store.pred1_buf[i]) { |
113 | 0 | EB_FREE(obj->cmp_store.pred1_buf[i]); |
114 | 0 | } |
115 | 5.48k | } |
116 | 1.37k | if (obj->residual1) { |
117 | 0 | EB_FREE(obj->residual1); |
118 | 0 | } |
119 | 1.37k | if (obj->diff10) { |
120 | 0 | EB_FREE(obj->diff10); |
121 | 0 | } |
122 | | |
123 | 1.37k | if (obj->intrapred_buf) { |
124 | 0 | EB_FREE_2D(obj->intrapred_buf); |
125 | 0 | } |
126 | | |
127 | 1.37k | if (obj->obmc_buff_0) { |
128 | 0 | EB_FREE(obj->obmc_buff_0); |
129 | 0 | } |
130 | 1.37k | if (obj->obmc_buff_1) { |
131 | 0 | EB_FREE(obj->obmc_buff_1); |
132 | 0 | } |
133 | 1.37k | if (obj->wsrc_buf) { |
134 | 0 | EB_FREE(obj->wsrc_buf); |
135 | 0 | } |
136 | 1.37k | if (obj->mask_buf) { |
137 | 0 | EB_FREE(obj->mask_buf); |
138 | 0 | } |
139 | 23.2k | for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) { |
140 | 21.9k | EB_DELETE(obj->recon_coeff_ptr[txt_itr]); |
141 | 21.9k | EB_DELETE(obj->recon_ptr[txt_itr]); |
142 | 21.9k | EB_DELETE(obj->quant_coeff_ptr[txt_itr]); |
143 | 21.9k | } |
144 | 1.37k | EB_DELETE(obj->tx_coeffs); |
145 | 1.37k | EB_DELETE(obj->scratch_prediction_ptr); |
146 | 1.37k | EB_DELETE(obj->temp_residual); |
147 | 1.37k | EB_DELETE(obj->temp_recon_ptr); |
148 | 1.37k | EB_FREE_ARRAY(obj->full_cost_ssim_array); |
149 | 1.37k | } |
150 | | |
151 | | void svt_aom_set_nics(SequenceControlSet* scs, NicScalingCtrls* scaling_ctrls, uint32_t mds1_count[CAND_CLASS_TOTAL], |
152 | | uint32_t mds2_count[CAND_CLASS_TOTAL], uint32_t mds3_count[CAND_CLASS_TOTAL], uint8_t pic_type, |
153 | | uint32_t qp); |
154 | | |
155 | | static void setup_mds(SequenceControlSet* scs, MdScan* mds, uint32_t* mds_idx, int index, BlockSize bsize, |
156 | 116k | const int min_sq_size) { |
157 | 116k | mds->mds_idx = *mds_idx; |
158 | 116k | mds->bsize = bsize; |
159 | 116k | mds->index = index; |
160 | | |
161 | | // If applicable, add split depths |
162 | 116k | const BlockGeom* blk_geom = get_blk_geom_mds(scs->blk_geom_mds, *mds_idx); |
163 | 116k | const int sq_size = block_size_wide[bsize]; |
164 | 116k | if (sq_size > min_sq_size) { |
165 | 28.7k | const BlockSize subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
166 | 28.7k | const int sq_subsize = block_size_wide[subsize]; |
167 | 28.7k | int blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size); |
168 | 28.7k | int blocks_to_skip = 0; |
169 | | |
170 | 65.7k | for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) { |
171 | 36.9k | blocks_to_skip += blocks_per_subdepth; |
172 | 36.9k | } |
173 | | |
174 | 28.7k | *mds_idx += blk_geom->d1_depth_offset; |
175 | 143k | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
176 | 115k | mds->split[i] = mds + i * blocks_to_skip + 1; |
177 | 115k | setup_mds(scs, mds->split[i], mds_idx, i, subsize, min_sq_size); |
178 | 115k | } |
179 | 87.6k | } else { |
180 | 87.6k | *mds_idx += blk_geom->ns_depth_offset; |
181 | 87.6k | } |
182 | 116k | } |
183 | | |
184 | | static void setup_pc_tree(PC_TREE* pc_tree, bool (*test_blk_array)[PART_S][4], int index, BlockSize bsize, |
185 | 116k | const int min_sq_size) { |
186 | 116k | pc_tree->bsize = bsize; |
187 | 116k | pc_tree->index = index; |
188 | 116k | pc_tree->tested_blk = test_blk_array[0]; |
189 | | |
190 | | // If applicable, add split depths |
191 | 116k | const int sq_size = block_size_wide[bsize]; |
192 | 116k | if (sq_size > min_sq_size) { |
193 | 28.7k | const BlockSize subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
194 | 28.7k | const int sq_subsize = block_size_wide[subsize]; |
195 | 28.7k | int blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size); |
196 | 28.7k | int blocks_to_skip = 0; |
197 | | |
198 | 65.7k | for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) { |
199 | 36.9k | blocks_to_skip += blocks_per_subdepth; |
200 | 36.9k | } |
201 | | |
202 | 143k | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
203 | 115k | pc_tree->split[i] = pc_tree + i * blocks_to_skip + 1; |
204 | 115k | pc_tree->split[i]->parent = pc_tree; |
205 | 115k | setup_pc_tree(pc_tree->split[i], test_blk_array + i * blocks_to_skip + 1, i, subsize, min_sq_size); |
206 | 115k | } |
207 | 28.7k | } |
208 | 116k | } |
209 | | |
210 | | /****************************************************** |
211 | | * Mode Decision Context Constructor |
212 | | ******************************************************/ |
213 | | EbErrorType svt_aom_mode_decision_context_ctor(ModeDecisionContext* ctx, SequenceControlSet* scs, |
214 | | EbColorFormat color_format, uint8_t sb_size, EncMode enc_mode, |
215 | | uint16_t max_block_cnt, uint32_t encoder_bit_depth, |
216 | | EbFifo* mode_decision_configuration_input_fifo_ptr, |
217 | | EbFifo* mode_decision_output_fifo_ptr, uint8_t enable_hbd_mode_decision, |
218 | 1.37k | uint8_t seq_qp_mod) { |
219 | 1.37k | const bool allintra = scs->allintra; |
220 | 1.37k | const bool rtc_tune = scs->static_config.rtc; |
221 | 1.37k | uint32_t buffer_index; |
222 | 1.37k | uint32_t cand_index; |
223 | | |
224 | 1.37k | ctx->init_max_block_cnt = max_block_cnt; |
225 | 1.37k | uint32_t block_max_count_sb = max_block_cnt; |
226 | | |
227 | 1.37k | ctx->sb_size = sb_size; |
228 | 1.37k | (void)color_format; |
229 | | |
230 | 1.37k | ctx->dctor = mode_decision_context_dctor; |
231 | 1.37k | ctx->hbd_md = enable_hbd_mode_decision; |
232 | | |
233 | | // Zero the tail of md_levels_buf once; it serves as permanent bottom-padding |
234 | | // for set_levels() and is never overwritten by svt_av1_txb_init_levels(). |
235 | 1.37k | memset(ctx->md_levels_buf + LEVELS_TAIL_OFFSET, 0, TX_PAD_2D - LEVELS_TAIL_OFFSET); |
236 | | |
237 | | // Input/Output System Resource Manager FIFOs |
238 | 1.37k | ctx->mode_decision_configuration_input_fifo_ptr = mode_decision_configuration_input_fifo_ptr; |
239 | 1.37k | ctx->mode_decision_output_fifo_ptr = mode_decision_output_fifo_ptr; |
240 | | |
241 | | // Maximum number of candidates MD can support |
242 | | // determine MAX_NICS for a given preset |
243 | | // get the min scaling level (the smallest scaling level is the most conservative) |
244 | 1.37k | uint8_t min_nic_scaling_level = NICS_SCALING_LEVELS - 1; |
245 | 1.37k | uint8_t stage1_scaling_num; |
246 | 1.37k | if (allintra) { |
247 | 1.37k | uint8_t nic_level = svt_aom_get_nic_level_allintra(enc_mode); |
248 | 1.37k | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1]; |
249 | 1.37k | } else if (rtc_tune) { |
250 | 0 | #if TUNE_RTC |
251 | 0 | uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode, scs->use_flat_ipp); |
252 | | #else |
253 | | #if TUNE_SIMPLIFY_SETTINGS |
254 | | uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode); |
255 | | #else |
256 | | uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode, scs->use_flat_ipp); |
257 | | #endif |
258 | | #endif |
259 | 0 | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1]; |
260 | 0 | } else { |
261 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
262 | 0 | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
263 | 0 | uint8_t nic_level = svt_aom_get_nic_level_default(enc_mode, is_base); |
264 | 0 | uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level); |
265 | 0 | min_nic_scaling_level = MIN(min_nic_scaling_level, nic_scaling_level); |
266 | 0 | } |
267 | | #else |
268 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
269 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
270 | | uint8_t nic_level = svt_aom_get_nic_level_default(enc_mode, is_base, sc_class1); |
271 | | uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level); |
272 | | min_nic_scaling_level = MIN(min_nic_scaling_level, nic_scaling_level); |
273 | | } |
274 | | } |
275 | | #endif |
276 | |
|
277 | 0 | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[min_nic_scaling_level][MD_STAGE_1]; |
278 | 0 | } |
279 | | // scale max_nics |
280 | 1.37k | uint32_t max_nics = 0; |
281 | 1.37k | { |
282 | 1.37k | NicScalingCtrls scaling_ctrls; |
283 | 1.37k | scaling_ctrls.stage1_scaling_num = stage1_scaling_num; |
284 | 1.37k | scaling_ctrls.stage2_scaling_num = stage1_scaling_num; |
285 | 1.37k | scaling_ctrls.stage3_scaling_num = stage1_scaling_num; |
286 | 1.37k | uint32_t mds1_count[CAND_CLASS_TOTAL]; |
287 | 1.37k | uint32_t mds2_count[CAND_CLASS_TOTAL]; |
288 | 1.37k | uint32_t mds3_count[CAND_CLASS_TOTAL]; |
289 | 5.48k | for (uint8_t pic_type = 0; pic_type < NICS_PIC_TYPE; pic_type++) { |
290 | 267k | for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) { |
291 | 263k | svt_aom_set_nics(scs, &scaling_ctrls, mds1_count, mds2_count, mds3_count, pic_type, qp); |
292 | | |
293 | 263k | uint32_t nics = 0; |
294 | 1.57M | for (CandClass cidx = CAND_CLASS_0; cidx < CAND_CLASS_TOTAL; cidx++) { |
295 | 1.31M | nics += mds1_count[cidx]; |
296 | 1.31M | } |
297 | 263k | max_nics = MAX(max_nics, nics); |
298 | 263k | } |
299 | 4.11k | } |
300 | 1.37k | } |
301 | | |
302 | | // If independent chroma search is used, need to allocate additional 84 candidate buffers |
303 | 1.37k | bool is_chroma_mode_0; |
304 | 1.37k | if (allintra) { |
305 | 1.37k | is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_allintra(enc_mode)) == |
306 | 1.37k | CHROMA_MODE_0; |
307 | 1.37k | } else if (scs->static_config.rtc) { |
308 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
309 | 0 | is_chroma_mode_0 = svt_aom_set_chroma_controls( |
310 | 0 | NULL, svt_aom_get_chroma_level_rtc(enc_mode, scs->use_flat_ipp)) == CHROMA_MODE_0; |
311 | | #else |
312 | | for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) { |
313 | | is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_rtc(enc_mode, is_i_slice)) == |
314 | | CHROMA_MODE_0; |
315 | | if (is_chroma_mode_0) { |
316 | | break; |
317 | | } |
318 | | } |
319 | | #endif |
320 | 0 | } else { |
321 | 0 | for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) { |
322 | 0 | is_chroma_mode_0 = svt_aom_set_chroma_controls( |
323 | 0 | NULL, svt_aom_get_chroma_level_default(enc_mode, is_i_slice)) == CHROMA_MODE_0; |
324 | 0 | if (is_chroma_mode_0) { |
325 | 0 | break; |
326 | 0 | } |
327 | 0 | } |
328 | 0 | } |
329 | 1.37k | const uint8_t ind_uv_cands = is_chroma_mode_0 ? 84 : 0; |
330 | 1.37k | max_nics += CAND_CLASS_TOTAL; //need one extra temp buffer for each fast loop call |
331 | 1.37k | ctx->max_nics = max_nics; |
332 | 1.37k | ctx->max_nics_uv = max_nics + ind_uv_cands; |
333 | | // Cfl scratch memory |
334 | 1.37k | if (ctx->hbd_md > EB_8_BIT_MD) { |
335 | 0 | EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon16bit, sizeof(uint16_t) * sb_size * sb_size); |
336 | 0 | } |
337 | 1.37k | if (ctx->hbd_md != EB_10_BIT_MD) { |
338 | 1.37k | EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon, sizeof(uint8_t) * sb_size * sb_size); |
339 | 1.37k | } |
340 | 1.37k | EB_MALLOC_ALIGNED(ctx->pred_buf_q3, CFL_BUF_SQUARE); |
341 | 1.37k | uint8_t use_update_cdf = 0; |
342 | 1.37k | if (allintra) { |
343 | 1.37k | use_update_cdf = svt_aom_get_update_cdf_level_allintra(enc_mode); |
344 | 1.37k | } else if (rtc_tune) { |
345 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
346 | 0 | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
347 | 0 | if (use_update_cdf) { |
348 | 0 | break; |
349 | 0 | } |
350 | 0 | use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice); |
351 | 0 | } |
352 | | #else |
353 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
354 | | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
355 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
356 | | if (use_update_cdf) { |
357 | | break; |
358 | | } |
359 | | use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice, is_base, sc_class1); |
360 | | } |
361 | | } |
362 | | } |
363 | | #endif |
364 | 0 | } else { |
365 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
366 | 0 | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
367 | 0 | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
368 | 0 | if (use_update_cdf) { |
369 | 0 | break; |
370 | 0 | } |
371 | 0 | use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base); |
372 | 0 | } |
373 | 0 | } |
374 | | #else |
375 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
376 | | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
377 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
378 | | if (use_update_cdf) { |
379 | | break; |
380 | | } |
381 | | use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base, sc_class1); |
382 | | } |
383 | | } |
384 | | } |
385 | | #endif |
386 | 0 | } |
387 | 1.37k | if (use_update_cdf) { |
388 | 0 | EB_CALLOC_ARRAY(ctx->rate_est_table, 1); |
389 | 1.37k | } else { |
390 | 1.37k | ctx->rate_est_table = NULL; |
391 | 1.37k | } |
392 | | // Allocate buffer for inter-inter compound prediction |
393 | 1.37k | if (get_inter_compound_level(enc_mode)) { |
394 | 0 | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
395 | 0 | for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) { |
396 | 0 | EB_MALLOC(ctx->cmp_store.pred0_buf[i], sb_size * sb_size * bits * sizeof(uint8_t)); |
397 | 0 | EB_MALLOC(ctx->cmp_store.pred1_buf[i], sb_size * sb_size * bits * sizeof(uint8_t)); |
398 | 0 | } |
399 | 0 | EB_MALLOC(ctx->residual1, sb_size * sb_size * sizeof(ctx->residual1[0])); |
400 | 0 | EB_MALLOC(ctx->diff10, sb_size * sb_size * sizeof(ctx->diff10[0])); |
401 | 0 | } |
402 | | |
403 | | // Allocate buffer for inter-intra prediction |
404 | 1.37k | uint8_t ii_allowed = 0; |
405 | 4.11k | for (uint8_t transition_present = 0; transition_present < 2; transition_present++) { |
406 | 2.74k | if (ii_allowed) { |
407 | 0 | break; |
408 | 0 | } |
409 | 2.74k | ii_allowed |= svt_aom_get_inter_intra_level(enc_mode, transition_present); |
410 | 2.74k | } |
411 | 1.37k | if (ii_allowed) { |
412 | 0 | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
413 | | // MAX block size for inter intra is 32x32 |
414 | 0 | EB_MALLOC_2D(ctx->intrapred_buf, INTERINTRA_MODES, 32 * 32 * bits * sizeof(ctx->intrapred_buf[0][0])); |
415 | 0 | } |
416 | | |
417 | | // Allocate buffers for obmc prediction |
418 | 1.37k | uint8_t obmc_allowed = 0; |
419 | 4.11k | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
420 | 178k | for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) { |
421 | 175k | if (obmc_allowed) { |
422 | 0 | break; |
423 | 0 | } |
424 | | #if TUNE_SHIFT_PRESETS_RTC && !TUNE_RTC |
425 | | obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod, rtc_tune); |
426 | | #else |
427 | 175k | obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod); |
428 | 175k | #endif |
429 | 175k | } |
430 | 2.74k | } |
431 | 1.37k | if (obmc_allowed) { |
432 | 0 | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
433 | 0 | EB_MALLOC(ctx->obmc_buff_0, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_0[0])); |
434 | 0 | EB_MALLOC(ctx->obmc_buff_1, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_1[0])); |
435 | 0 | EB_MALLOC(ctx->wsrc_buf, sb_size * sb_size * sizeof(ctx->wsrc_buf[0])); |
436 | 0 | EB_MALLOC(ctx->mask_buf, sb_size * sb_size * sizeof(ctx->mask_buf[0])); |
437 | 0 | } |
438 | 1.37k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq, block_max_count_sb); |
439 | 1.37k | #if OPT_LPD1_GLOBALMV_BYPASS |
440 | 1.37k | EB_MALLOC_ARRAY(ctx->pd0_mds0_best_cost, block_max_count_sb); |
441 | 1.37k | #endif |
442 | | // Fast Candidate Array |
443 | 1.37k | #if OPT_MAX_CAN_COUNT_RTC |
444 | 1.37k | uint16_t max_can_count = svt_aom_get_max_can_count(enc_mode, rtc_tune) + ind_uv_cands; |
445 | | #else |
446 | | uint16_t max_can_count = svt_aom_get_max_can_count(enc_mode) + ind_uv_cands; |
447 | | #endif |
448 | 1.37k | EB_MALLOC_ARRAY(ctx->fast_cand_array, max_can_count); |
449 | | |
450 | 261k | for (cand_index = 0; cand_index < max_can_count; ++cand_index) { |
451 | 260k | ctx->fast_cand_array[cand_index].palette_info = NULL; |
452 | 260k | } |
453 | 1.37k | svt_aom_assert_err(max_can_count > ind_uv_cands, "Max. candidates is too low"); |
454 | 1.37k | EB_MALLOC_2D(ctx->injected_mvs, (uint16_t)(max_can_count - ind_uv_cands), 2); |
455 | 1.37k | EB_MALLOC_ARRAY(ctx->injected_ref_types, (max_can_count - ind_uv_cands)); |
456 | | |
457 | | // Set buffers for MD palette search to NULL; will be init'd at runtime if needed |
458 | 1.37k | ctx->palette_buffer = NULL; |
459 | 1.37k | ctx->palette_cand_array = NULL; |
460 | 1.37k | ctx->palette_size_array_0 = NULL; |
461 | | |
462 | | // Cost Arrays |
463 | 1.37k | EB_MALLOC_ARRAY(ctx->fast_cost_array, ctx->max_nics_uv); |
464 | 1.37k | EB_MALLOC_ARRAY(ctx->full_cost_array, ctx->max_nics_uv); |
465 | 1.37k | EB_MALLOC_ARRAY(ctx->full_cost_ssim_array, ctx->max_nics_uv); |
466 | | // Candidate Buffers |
467 | 1.37k | EB_NEW(ctx->cand_bf_tx_depth_1, |
468 | 1.37k | svt_aom_mode_decision_scratch_cand_bf_ctor, |
469 | 1.37k | sb_size, |
470 | 1.37k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT); |
471 | | |
472 | 1.37k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_1->cand, 1); |
473 | 1.37k | EB_NEW(ctx->cand_bf_tx_depth_2, |
474 | 1.37k | svt_aom_mode_decision_scratch_cand_bf_ctor, |
475 | 1.37k | sb_size, |
476 | 1.37k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT); |
477 | | |
478 | 1.37k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_2->cand, 1); |
479 | 5.48k | for (int i = 0; i < 3; i++) { |
480 | 4.11k | ctx->md_blk_arr_nsq[0].neigh_left_recon[i] = NULL; |
481 | 4.11k | ctx->md_blk_arr_nsq[0].neigh_top_recon[i] = NULL; |
482 | 4.11k | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[i] = NULL; |
483 | 4.11k | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[i] = NULL; |
484 | 4.11k | } |
485 | 1.37k | uint32_t coded_leaf_index; |
486 | 1.37k | uint16_t sz = sizeof(uint16_t); |
487 | 1.37k | if (ctx->hbd_md > EB_8_BIT_MD) { |
488 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0], block_max_count_sb * sb_size * sz); |
489 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0], block_max_count_sb * sb_size * sz); |
490 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1); |
491 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1); |
492 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1); |
493 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1); |
494 | | |
495 | 0 | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
496 | 0 | size_t offset = coded_leaf_index * sb_size * sz; |
497 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[0] = |
498 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0] + offset; |
499 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[0] = |
500 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0] + offset; |
501 | 0 | offset >>= 1; |
502 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[1] = |
503 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1] + offset; |
504 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[1] = |
505 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1] + offset; |
506 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[2] = |
507 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2] + offset; |
508 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[2] = |
509 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2] + offset; |
510 | 0 | } |
511 | 0 | } |
512 | 1.37k | if (ctx->hbd_md != EB_10_BIT_MD) { |
513 | 1.37k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[0], block_max_count_sb * sb_size); |
514 | 1.37k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[0], block_max_count_sb * sb_size); |
515 | 1.37k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[1], block_max_count_sb * sb_size >> 1); |
516 | 1.37k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[1], block_max_count_sb * sb_size >> 1); |
517 | 1.37k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[2], block_max_count_sb * sb_size >> 1); |
518 | 1.37k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[2], block_max_count_sb * sb_size >> 1); |
519 | | |
520 | 117k | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
521 | 116k | size_t offset = coded_leaf_index * sb_size; |
522 | 116k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[0] = ctx->md_blk_arr_nsq[0].neigh_left_recon[0] + |
523 | 116k | offset; |
524 | 116k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[0] = ctx->md_blk_arr_nsq[0].neigh_top_recon[0] + |
525 | 116k | offset; |
526 | 116k | offset >>= 1; |
527 | 116k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[1] = ctx->md_blk_arr_nsq[0].neigh_left_recon[1] + |
528 | 116k | offset; |
529 | 116k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[1] = ctx->md_blk_arr_nsq[0].neigh_top_recon[1] + |
530 | 116k | offset; |
531 | 116k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[2] = ctx->md_blk_arr_nsq[0].neigh_left_recon[2] + |
532 | 116k | offset; |
533 | 116k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[2] = ctx->md_blk_arr_nsq[0].neigh_top_recon[2] + |
534 | 116k | offset; |
535 | 116k | } |
536 | 1.37k | } |
537 | 1.37k | ctx->md_blk_arr_nsq[0].av1xd = NULL; |
538 | 1.37k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].av1xd, block_max_count_sb); |
539 | | |
540 | | // Alloc mds and pc_tree, which are used to track tested blocks in MD |
541 | 1.37k | bool disallow_4x4 = allintra ? svt_aom_get_disallow_4x4_allintra(enc_mode) |
542 | 1.37k | #if TUNE_SIMPLIFY_SETTINGS |
543 | 1.37k | : rtc_tune ? svt_aom_get_disallow_4x4_rtc() |
544 | | #else |
545 | | : rtc_tune ? svt_aom_get_disallow_4x4_rtc(enc_mode) |
546 | | #endif |
547 | 0 | : svt_aom_get_disallow_4x4_default(enc_mode); |
548 | 1.37k | bool disallow_8x8 = allintra ? svt_aom_get_disallow_8x8_allintra() |
549 | 1.37k | : rtc_tune ? svt_aom_get_disallow_8x8_rtc(enc_mode, scs->max_input_luma_width, scs->max_input_luma_height) |
550 | 0 | : svt_aom_get_disallow_8x8_default(); |
551 | 1.37k | uint8_t min_bsize = disallow_8x8 ? 16 : disallow_4x4 ? 8 : 4; |
552 | 1.37k | int blocks_per_depth = (sb_size / min_bsize) * (sb_size / min_bsize); |
553 | 1.37k | int blocks_to_alloc = 0; |
554 | | |
555 | 6.85k | for (int i = min_bsize; i <= sb_size; i <<= 1, blocks_per_depth >>= 2) { |
556 | 5.48k | blocks_to_alloc += blocks_per_depth; |
557 | 5.48k | } |
558 | 1.37k | EB_CALLOC_ARRAY(ctx->mds, blocks_to_alloc); |
559 | 1.37k | uint32_t mds_idx = 0; |
560 | 1.37k | setup_mds(scs, ctx->mds, &mds_idx, 0, scs->seq_header.sb_size, min_bsize); |
561 | 1.37k | EB_CALLOC_ARRAY(ctx->pc_tree, blocks_to_alloc); |
562 | 1.37k | EB_MALLOC_ARRAY(ctx->tested_blk, blocks_to_alloc); |
563 | 1.37k | setup_pc_tree(ctx->pc_tree, ctx->tested_blk, 0, scs->seq_header.sb_size, min_bsize); |
564 | 1.37k | ctx->blocks_to_alloc = blocks_to_alloc; |
565 | | |
566 | 1.37k | bool bypass_encdec = allintra ? svt_aom_get_bypass_encdec_allintra(enc_mode) |
567 | 1.37k | : rtc_tune ? svt_aom_get_bypass_encdec_rtc(enc_mode, encoder_bit_depth) |
568 | 0 | : svt_aom_get_bypass_encdec_default(enc_mode, encoder_bit_depth); |
569 | 117k | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
570 | 116k | ctx->md_blk_arr_nsq[coded_leaf_index].av1xd = ctx->md_blk_arr_nsq[0].av1xd + coded_leaf_index; |
571 | 116k | ctx->md_blk_arr_nsq[coded_leaf_index].segment_id = 0; |
572 | 116k | const BlockGeom* blk_geom = get_blk_geom_mds(scs->blk_geom_mds, coded_leaf_index); |
573 | 116k | if (bypass_encdec) { |
574 | 116k | EbPictureBufferDescInitData init_data; |
575 | | |
576 | 116k | init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
577 | 116k | init_data.max_width = blk_geom->bwidth; |
578 | 116k | init_data.max_height = blk_geom->bheight; |
579 | 116k | init_data.bit_depth = EB_THIRTYTWO_BIT; |
580 | 116k | init_data.color_format = (blk_geom->bwidth > 4 && blk_geom->bheight > 4) |
581 | 116k | ? EB_YUV420 |
582 | 116k | : EB_YUV444; // PW - must have at least 4x4 for chroma coeffs |
583 | 116k | init_data.border = 0; |
584 | 116k | init_data.split_mode = false; |
585 | | |
586 | 116k | EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data); |
587 | | |
588 | 116k | init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
589 | 116k | init_data.max_width = blk_geom->bwidth; |
590 | 116k | init_data.max_height = blk_geom->bheight; |
591 | 116k | init_data.bit_depth = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
592 | 116k | ; |
593 | 116k | init_data.color_format = (blk_geom->bwidth > 4 && blk_geom->bheight > 4) ? EB_YUV420 : EB_YUV444; |
594 | 116k | init_data.border = 0; |
595 | 116k | init_data.split_mode = false; |
596 | | |
597 | 116k | EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data); |
598 | 116k | } else { |
599 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp = NULL; |
600 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp = NULL; |
601 | 0 | } |
602 | 116k | } |
603 | 8.22k | for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) { |
604 | 6.85k | EB_MALLOC_ARRAY(ctx->cand_buff_indices[cand_class_it], ctx->max_nics_uv); |
605 | 6.85k | } |
606 | | |
607 | 1.37k | EB_MALLOC_ARRAY(ctx->best_candidate_index_array, ctx->max_nics_uv); |
608 | 1.37k | EB_MALLOC_ARRAY(ctx->above_txfm_context, (sb_size >> MI_SIZE_LOG2)); |
609 | 1.37k | EB_MALLOC_ARRAY(ctx->left_txfm_context, (sb_size >> MI_SIZE_LOG2)); |
610 | 1.37k | EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data; |
611 | 1.37k | EbPictureBufferDescInitData picture_buffer_desc_init_data; |
612 | | |
613 | 1.37k | picture_buffer_desc_init_data.max_width = sb_size; |
614 | 1.37k | picture_buffer_desc_init_data.max_height = sb_size; |
615 | 1.37k | picture_buffer_desc_init_data.bit_depth = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
616 | 1.37k | picture_buffer_desc_init_data.color_format = EB_YUV420; |
617 | 1.37k | picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
618 | 1.37k | picture_buffer_desc_init_data.border = 0; |
619 | 1.37k | picture_buffer_desc_init_data.split_mode = false; |
620 | 1.37k | picture_buffer_desc_init_data.is_16bit_pipeline = false; |
621 | | |
622 | 1.37k | thirty_two_width_picture_buffer_desc_init_data.max_width = sb_size; |
623 | 1.37k | thirty_two_width_picture_buffer_desc_init_data.max_height = sb_size; |
624 | 1.37k | thirty_two_width_picture_buffer_desc_init_data.bit_depth = EB_THIRTYTWO_BIT; |
625 | 1.37k | thirty_two_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
626 | 1.37k | thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
627 | 1.37k | thirty_two_width_picture_buffer_desc_init_data.border = 0; |
628 | 1.37k | thirty_two_width_picture_buffer_desc_init_data.split_mode = false; |
629 | 1.37k | thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline = false; |
630 | 23.2k | for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) { |
631 | 21.9k | EB_NEW(ctx->recon_coeff_ptr[txt_itr], |
632 | 21.9k | svt_picture_buffer_desc_ctor, |
633 | 21.9k | (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
634 | 21.9k | EB_NEW(ctx->recon_ptr[txt_itr], svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
635 | 21.9k | EB_NEW(ctx->quant_coeff_ptr[txt_itr], |
636 | 21.9k | svt_picture_buffer_desc_ctor, |
637 | 21.9k | (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
638 | 21.9k | } |
639 | 1.37k | EB_NEW(ctx->tx_coeffs, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
640 | 1.37k | EB_NEW(ctx->scratch_prediction_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
641 | 1.37k | EbPictureBufferDescInitData double_width_picture_buffer_desc_init_data; |
642 | 1.37k | double_width_picture_buffer_desc_init_data.max_width = sb_size; |
643 | 1.37k | double_width_picture_buffer_desc_init_data.max_height = sb_size; |
644 | 1.37k | double_width_picture_buffer_desc_init_data.bit_depth = EB_SIXTEEN_BIT; |
645 | 1.37k | double_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
646 | 1.37k | double_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
647 | 1.37k | double_width_picture_buffer_desc_init_data.border = 0; |
648 | 1.37k | double_width_picture_buffer_desc_init_data.split_mode = false; |
649 | 1.37k | double_width_picture_buffer_desc_init_data.is_16bit_pipeline = false; |
650 | | |
651 | | // The temp_recon_ptr and temp_residual will be shared by all candidates |
652 | | // If you want to do something with residual or recon, you need to create one |
653 | 1.37k | EB_NEW(ctx->temp_recon_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
654 | 1.37k | EB_NEW(ctx->temp_residual, svt_picture_buffer_desc_ctor, (EbPtr)&double_width_picture_buffer_desc_init_data); |
655 | | |
656 | | // Candidate Buffers |
657 | 1.37k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_ptr_array, ctx->max_nics_uv); |
658 | | |
659 | 15.0k | for (buffer_index = 0; buffer_index < ctx->max_nics; ++buffer_index) { |
660 | 13.7k | EB_NEW(ctx->cand_bf_ptr_array[buffer_index], |
661 | 13.7k | svt_aom_mode_decision_cand_bf_ctor, |
662 | 13.7k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
663 | 13.7k | sb_size, |
664 | 13.7k | PICTURE_BUFFER_DESC_FULL_MASK, |
665 | 13.7k | ctx->temp_residual, |
666 | 13.7k | ctx->temp_recon_ptr, |
667 | 13.7k | &(ctx->fast_cost_array[buffer_index]), |
668 | 13.7k | &(ctx->full_cost_array[buffer_index]), |
669 | 13.7k | &(ctx->full_cost_ssim_array[buffer_index])); |
670 | 13.7k | } |
671 | | |
672 | 1.37k | for (buffer_index = max_nics; buffer_index < ctx->max_nics_uv; ++buffer_index) { |
673 | 0 | EB_NEW(ctx->cand_bf_ptr_array[buffer_index], |
674 | 0 | svt_aom_mode_decision_cand_bf_ctor, |
675 | 0 | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
676 | 0 | sb_size, |
677 | 0 | PICTURE_BUFFER_DESC_CHROMA_MASK, |
678 | 0 | ctx->temp_residual, |
679 | 0 | ctx->temp_recon_ptr, |
680 | 0 | &(ctx->fast_cost_array[buffer_index]), |
681 | 0 | &(ctx->full_cost_array[buffer_index]), |
682 | 0 | &(ctx->full_cost_ssim_array[buffer_index])); |
683 | 0 | } |
684 | | |
685 | 1.37k | return EB_ErrorNone; |
686 | 1.37k | } |
687 | | |
688 | | /************************************************** |
689 | | * Reset Mode Decision Neighbor Arrays |
690 | | *************************************************/ |
691 | 4.65k | void svt_aom_reset_mode_decision_neighbor_arrays(PictureControlSet* pcs, uint16_t tile_idx) { |
692 | 4.65k | uint8_t depth; |
693 | 18.6k | for (depth = 0; depth < NA_TOT_CNT; depth++) { |
694 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->mdleaf_partition_na[depth][tile_idx]); |
695 | 13.9k | if (pcs->hbd_md != EB_10_BIT_MD) { |
696 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na[depth][tile_idx]); |
697 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na[depth][tile_idx]); |
698 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na[depth][tile_idx]); |
699 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na[depth][tile_idx]); |
700 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na[depth][tile_idx]); |
701 | 13.9k | } |
702 | 13.9k | if (pcs->hbd_md > EB_8_BIT_MD || (pcs->scs->encoder_bit_depth > EB_EIGHT_BIT && pcs->pic_bypass_encdec)) { |
703 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na_16bit[depth][tile_idx]); |
704 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na_16bit[depth][tile_idx]); |
705 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na_16bit[depth][tile_idx]); |
706 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na_16bit[depth][tile_idx]); |
707 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na_16bit[depth][tile_idx]); |
708 | 0 | } |
709 | | |
710 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_y_dcs_na[depth][tile_idx]); |
711 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_dc_sign_level_coeff_na[depth][tile_idx]); |
712 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_cb_dc_sign_level_coeff_na[depth][tile_idx]); |
713 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_cr_dc_sign_level_coeff_na[depth][tile_idx]); |
714 | 13.9k | svt_aom_neighbor_array_unit_reset(pcs->md_txfm_context_array[depth][tile_idx]); |
715 | 13.9k | } |
716 | | |
717 | 4.65k | return; |
718 | 4.65k | } |
719 | | |
720 | | // If the ref intra percentage is below the TH, applying modulation to the MD lambda |
721 | 0 | #define LAMBDA_MOD_INTRA_TH 50 |
722 | 0 | #define LAMBDA_MOD_INTRA_SCALING_FACTOR 138 |
723 | | |
724 | | // Set the lambda for each sb. |
725 | | // When lambda tuning is on (blk_lambda_tuning), lambda of each block is set separately (full_lambda_md/fast_lambda_md) |
726 | | // later in svt_aom_set_tuned_blk_lambda |
727 | | // Testing showed that updating SAD lambda based on frame info was not helpful; therefore, the SAD lambda generation is not changed. |
728 | 5.73k | static void av1_lambda_assign_md(PictureControlSet* pcs, ModeDecisionContext* ctx) { |
729 | 5.73k | ctx->full_lambda_md[0] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT); |
730 | 5.73k | ctx->fast_lambda_md[0] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT); |
731 | 5.73k | ctx->full_lambda_md[1] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT); |
732 | 5.73k | ctx->fast_lambda_md[1] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT); |
733 | | |
734 | 5.73k | if (!pcs->scs->static_config.rtc && pcs->scs->stats_based_sb_lambda_modulation) { |
735 | 5.73k | if (pcs->temporal_layer_index > 0) { |
736 | 0 | if (pcs->ref_intra_percentage < LAMBDA_MOD_INTRA_TH) { |
737 | 0 | ctx->full_lambda_md[0] = (ctx->full_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
738 | 0 | ctx->fast_lambda_md[0] = (ctx->fast_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
739 | 0 | ctx->full_lambda_md[1] = (ctx->full_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
740 | 0 | ctx->fast_lambda_md[1] = (ctx->fast_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
741 | 0 | } |
742 | 0 | } |
743 | 5.73k | } |
744 | | |
745 | 5.73k | if (pcs->lambda_weight) { |
746 | 3.10k | ctx->full_lambda_md[0] = (uint32_t)((ctx->full_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7); |
747 | 3.10k | ctx->fast_lambda_md[0] = (uint32_t)((ctx->fast_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7); |
748 | 3.10k | ctx->full_lambda_md[1] = (uint32_t)((ctx->full_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7); |
749 | 3.10k | ctx->fast_lambda_md[1] = (uint32_t)((ctx->fast_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7); |
750 | 3.10k | } |
751 | 5.73k | ctx->full_lambda_md[1] *= 16; |
752 | 5.73k | ctx->fast_lambda_md[1] *= 4; |
753 | | |
754 | 5.73k | SequenceControlSet* scs = pcs->scs; |
755 | 5.73k | uint64_t scale_factor = scs->static_config.lambda_scale_factors[pcs->ppcs->update_type]; |
756 | 5.73k | ctx->full_lambda_md[0] = (uint32_t)((ctx->full_lambda_md[0] * scale_factor) >> 7); |
757 | 5.73k | ctx->full_lambda_md[1] = (uint32_t)((ctx->full_lambda_md[1] * scale_factor) >> 7); |
758 | 5.73k | ctx->fast_lambda_md[0] = (uint32_t)((ctx->fast_lambda_md[0] * scale_factor) >> 7); |
759 | 5.73k | ctx->fast_lambda_md[1] = (uint32_t)((ctx->fast_lambda_md[1] * scale_factor) >> 7); |
760 | | |
761 | 5.73k | ctx->full_sb_lambda_md[0] = ctx->full_lambda_md[0]; |
762 | 5.73k | ctx->full_sb_lambda_md[1] = ctx->full_lambda_md[1]; |
763 | 5.73k | } |
764 | | |
765 | | void svt_aom_reset_mode_decision(SequenceControlSet* scs, ModeDecisionContext* ctx, PictureControlSet* pcs, |
766 | 4.75k | uint16_t tile_group_idx, uint32_t segment_index) { |
767 | | #if !OPT_LPD1_FAST_SKIP |
768 | | const bool rtc_tune = scs->static_config.rtc; |
769 | | #endif |
770 | 4.75k | ctx->hbd_md = pcs->hbd_md; |
771 | | // Reset MD rate Estimation table to initial values by copying from md_rate_est_ctx |
772 | 4.75k | ctx->md_rate_est_ctx = pcs->md_rate_est_ctx; |
773 | | // Reset CABAC Contexts |
774 | | |
775 | | // Reset Neighbor Arrays at start of new Segment / Picture |
776 | 4.75k | if (segment_index == 0) { |
777 | 431 | for (uint16_t r = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_y; |
778 | 1.81k | r < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_y; |
779 | 1.38k | r++) { |
780 | 1.38k | for (uint16_t c = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_x; |
781 | 6.03k | c < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_x; |
782 | 4.65k | c++) { |
783 | 4.65k | uint16_t tile_idx = c + r * pcs->ppcs->av1_cm->tiles_info.tile_cols; |
784 | 4.65k | svt_aom_reset_mode_decision_neighbor_arrays(pcs, tile_idx); |
785 | 4.65k | } |
786 | 1.38k | } |
787 | 431 | (void)scs; |
788 | 431 | } |
789 | | //each segment enherits the bypass encdec from the picture level |
790 | 4.75k | ctx->bypass_encdec = pcs->pic_bypass_encdec; |
791 | | #if !OPT_LPD1_FAST_SKIP |
792 | | if (!rtc_tune && (pcs->enc_mode <= ENC_M11 || pcs->temporal_layer_index != 0)) { |
793 | | ctx->rtc_use_N4_dct_dct_shortcut = 1; |
794 | | } else { |
795 | | ctx->rtc_use_N4_dct_dct_shortcut = 0; |
796 | | } |
797 | | #endif |
798 | 4.75k | return; |
799 | 4.75k | } |
800 | | |
801 | | /****************************************************** |
802 | | * Mode Decision Configure SB |
803 | | ******************************************************/ |
804 | | void svt_aom_mode_decision_configure_sb(ModeDecisionContext* ctx, PictureControlSet* pcs, uint8_t sb_qp, |
805 | 5.73k | uint8_t me_sb_qp) { |
806 | | /* Note(CHKN) : when Qp modulation varies QP on a sub-SB(CU) basis, Lamda has to change based on Cu->QP , and then this code has to move inside the CU loop in MD */ |
807 | | |
808 | | // Lambda Assignement |
809 | 5.73k | ctx->qp_index = pcs->ppcs->frm_hdr.delta_q_params.delta_q_present || pcs->ppcs->r0_delta_qp_md |
810 | 5.73k | ? sb_qp |
811 | 5.73k | : (uint8_t)pcs->ppcs->frm_hdr.quantization_params.base_q_idx; |
812 | | |
813 | 5.73k | ctx->me_q_index = me_sb_qp; |
814 | | |
815 | 5.73k | av1_lambda_assign_md(pcs, ctx); |
816 | | |
817 | 5.73k | ctx->hbd_pack_done = 0; |
818 | | |
819 | 5.73k | return; |
820 | 5.73k | } |