/work/svt-av1/Source/Lib/Codec/md_process.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include <stdlib.h> |
13 | | |
14 | | #include "utility.h" |
15 | | #include "md_process.h" |
16 | | #include "lambda_rate_tables.h" |
17 | | #include "rc_process.h" |
18 | | #include "enc_mode_config.h" |
19 | | |
20 | | const uint8_t quantizer_to_qindex[64] = { |
21 | | 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, |
22 | | 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, |
23 | | 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255}; |
24 | | |
25 | | const int percents[2][FIXED_QP_OFFSET_COUNT] = { |
26 | | {75, 70, 60, 20, 15, 0}, {76, 60, 30, 15, 8, 4} // libaom offsets |
27 | | }; |
28 | | |
29 | | const uint8_t uni_psy_bias[64] = { |
30 | | 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 95, 95, 95, 95, 95, 95, |
31 | | 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, |
32 | | 95, 95, 95, 95, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
33 | | }; |
34 | | |
35 | 1.48k | static void mode_decision_context_dctor(EbPtr p) { |
36 | 1.48k | ModeDecisionContext* obj = (ModeDecisionContext*)p; |
37 | | |
38 | 1.48k | uint32_t block_max_count_sb = obj->init_max_block_cnt; |
39 | | |
40 | | // MD palette search |
41 | 1.48k | if (obj->palette_buffer) { |
42 | 0 | EB_FREE(obj->palette_buffer); |
43 | 0 | } |
44 | 1.48k | if (obj->palette_cand_array) { |
45 | | // Free fields in palette_cand_array before freeing palette_cand_array |
46 | 0 | for (int cd = 0; cd < MAX_PAL_CAND; cd++) { |
47 | 0 | if (obj->palette_cand_array[cd].color_idx_map) { |
48 | 0 | EB_FREE_ARRAY(obj->palette_cand_array[cd].color_idx_map); |
49 | 0 | } |
50 | 0 | } |
51 | |
|
52 | 0 | EB_FREE_ARRAY(obj->palette_cand_array); |
53 | 0 | } |
54 | 1.48k | if (obj->palette_size_array_0) { |
55 | 0 | EB_FREE_ARRAY(obj->palette_size_array_0); |
56 | 0 | } |
57 | 8.90k | for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) { |
58 | 7.42k | EB_FREE_ARRAY(obj->cand_buff_indices[cand_class_it]); |
59 | 7.42k | } |
60 | 1.48k | EB_FREE_ARRAY(obj->best_candidate_index_array); |
61 | | |
62 | 1.48k | EB_FREE_ARRAY(obj->above_txfm_context); |
63 | 1.48k | EB_FREE_ARRAY(obj->left_txfm_context); |
64 | 127k | for (uint32_t coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
65 | 126k | if (obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp) { |
66 | 126k | EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp); |
67 | 126k | } |
68 | 126k | if (obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp) { |
69 | 126k | EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp); |
70 | 126k | } |
71 | 126k | } |
72 | 1.48k | EB_DELETE_PTR_ARRAY(obj->cand_bf_ptr_array, obj->max_nics_uv); |
73 | 1.48k | EB_FREE_ARRAY(obj->cand_bf_tx_depth_1->cand); |
74 | 1.48k | EB_DELETE(obj->cand_bf_tx_depth_1); |
75 | 1.48k | EB_FREE_ARRAY(obj->cand_bf_tx_depth_2->cand); |
76 | 1.48k | EB_DELETE(obj->cand_bf_tx_depth_2); |
77 | 1.48k | EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon16bit); |
78 | 1.48k | EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon); |
79 | 1.48k | EB_FREE_ALIGNED_ARRAY(obj->pred_buf_q3); |
80 | 1.48k | EB_FREE_ARRAY(obj->fast_cand_array); |
81 | 1.48k | EB_FREE_2D(obj->injected_mvs); |
82 | 1.48k | EB_FREE_ARRAY(obj->injected_ref_types); |
83 | 1.48k | EB_FREE_ARRAY(obj->fast_cost_array); |
84 | 1.48k | EB_FREE_ARRAY(obj->full_cost_array); |
85 | 1.48k | if (obj->md_blk_arr_nsq) { |
86 | 5.93k | for (int i = 0; i < 3; i++) { |
87 | 4.45k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon_16bit[i]); |
88 | 4.45k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon_16bit[i]); |
89 | 4.45k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon[i]); |
90 | 4.45k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon[i]); |
91 | 4.45k | } |
92 | 1.48k | } |
93 | 1.48k | if (obj->md_blk_arr_nsq) { |
94 | 1.48k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].av1xd); |
95 | 1.48k | } |
96 | 1.48k | EB_FREE_ARRAY(obj->mds); |
97 | 1.48k | EB_FREE_ARRAY(obj->pc_tree); |
98 | 1.48k | EB_FREE_ARRAY(obj->tested_blk); |
99 | 1.48k | obj->blocks_to_alloc = 0; |
100 | 1.48k | EB_FREE_ARRAY(obj->md_blk_arr_nsq); |
101 | 1.48k | if (obj->rate_est_table) { |
102 | 0 | EB_FREE_ARRAY(obj->rate_est_table); |
103 | 0 | } |
104 | | |
105 | 7.42k | for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) { |
106 | 5.93k | if (obj->cmp_store.pred0_buf[i]) { |
107 | 0 | EB_FREE(obj->cmp_store.pred0_buf[i]); |
108 | 0 | } |
109 | 5.93k | if (obj->cmp_store.pred1_buf[i]) { |
110 | 0 | EB_FREE(obj->cmp_store.pred1_buf[i]); |
111 | 0 | } |
112 | 5.93k | } |
113 | 1.48k | if (obj->residual1) { |
114 | 0 | EB_FREE(obj->residual1); |
115 | 0 | } |
116 | 1.48k | if (obj->diff10) { |
117 | 0 | EB_FREE(obj->diff10); |
118 | 0 | } |
119 | | |
120 | 1.48k | if (obj->intrapred_buf) { |
121 | 0 | EB_FREE_2D(obj->intrapred_buf); |
122 | 0 | } |
123 | | |
124 | 1.48k | if (obj->obmc_buff_0) { |
125 | 1.48k | EB_FREE(obj->obmc_buff_0); |
126 | 1.48k | } |
127 | 1.48k | if (obj->obmc_buff_1) { |
128 | 1.48k | EB_FREE(obj->obmc_buff_1); |
129 | 1.48k | } |
130 | 1.48k | if (obj->wsrc_buf) { |
131 | 1.48k | EB_FREE(obj->wsrc_buf); |
132 | 1.48k | } |
133 | 1.48k | if (obj->mask_buf) { |
134 | 1.48k | EB_FREE(obj->mask_buf); |
135 | 1.48k | } |
136 | 25.2k | for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) { |
137 | 23.7k | EB_DELETE(obj->recon_coeff_ptr[txt_itr]); |
138 | 23.7k | EB_DELETE(obj->recon_ptr[txt_itr]); |
139 | 23.7k | EB_DELETE(obj->quant_coeff_ptr[txt_itr]); |
140 | 23.7k | } |
141 | 1.48k | EB_DELETE(obj->tx_coeffs); |
142 | 1.48k | EB_DELETE(obj->scratch_prediction_ptr); |
143 | 1.48k | EB_DELETE(obj->temp_residual); |
144 | 1.48k | EB_DELETE(obj->temp_recon_ptr); |
145 | 1.48k | EB_FREE_ARRAY(obj->full_cost_ssim_array); |
146 | 1.48k | } |
147 | | |
148 | | void svt_aom_set_nics(SequenceControlSet* scs, NicScalingCtrls* scaling_ctrls, uint32_t mds1_count[CAND_CLASS_TOTAL], |
149 | | uint32_t mds2_count[CAND_CLASS_TOTAL], uint32_t mds3_count[CAND_CLASS_TOTAL], uint8_t pic_type, |
150 | | uint32_t qp); |
151 | | |
152 | | static void setup_mds(SequenceControlSet* scs, MdScan* mds, uint32_t* mds_idx, int index, BlockSize bsize, |
153 | 126k | const int min_sq_size) { |
154 | 126k | mds->mds_idx = *mds_idx; |
155 | 126k | mds->bsize = bsize; |
156 | 126k | mds->index = index; |
157 | | |
158 | | // If applicable, add split depths |
159 | 126k | const BlockGeom* blk_geom = get_blk_geom_mds(scs->blk_geom_mds, *mds_idx); |
160 | 126k | const int sq_size = block_size_wide[bsize]; |
161 | 126k | if (sq_size > min_sq_size) { |
162 | 31.1k | const BlockSize subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
163 | 31.1k | const int sq_subsize = block_size_wide[subsize]; |
164 | 31.1k | int blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size); |
165 | 31.1k | int blocks_to_skip = 0; |
166 | | |
167 | 71.2k | for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) { |
168 | 40.0k | blocks_to_skip += blocks_per_subdepth; |
169 | 40.0k | } |
170 | | |
171 | 31.1k | *mds_idx += blk_geom->d1_depth_offset; |
172 | 155k | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
173 | 124k | mds->split[i] = mds + i * blocks_to_skip + 1; |
174 | 124k | setup_mds(scs, mds->split[i], mds_idx, i, subsize, min_sq_size); |
175 | 124k | } |
176 | 94.9k | } else { |
177 | 94.9k | *mds_idx += blk_geom->ns_depth_offset; |
178 | 94.9k | } |
179 | 126k | } |
180 | | |
181 | | static void setup_pc_tree(PC_TREE* pc_tree, bool (*test_blk_array)[PART_S][4], int index, BlockSize bsize, |
182 | 126k | const int min_sq_size) { |
183 | 126k | pc_tree->bsize = bsize; |
184 | 126k | pc_tree->index = index; |
185 | 126k | pc_tree->tested_blk = test_blk_array[0]; |
186 | | |
187 | | // If applicable, add split depths |
188 | 126k | const int sq_size = block_size_wide[bsize]; |
189 | 126k | if (sq_size > min_sq_size) { |
190 | 31.1k | const BlockSize subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
191 | 31.1k | const int sq_subsize = block_size_wide[subsize]; |
192 | 31.1k | int blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size); |
193 | 31.1k | int blocks_to_skip = 0; |
194 | | |
195 | 71.2k | for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) { |
196 | 40.0k | blocks_to_skip += blocks_per_subdepth; |
197 | 40.0k | } |
198 | | |
199 | 155k | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
200 | 124k | pc_tree->split[i] = pc_tree + i * blocks_to_skip + 1; |
201 | 124k | pc_tree->split[i]->parent = pc_tree; |
202 | 124k | setup_pc_tree(pc_tree->split[i], test_blk_array + i * blocks_to_skip + 1, i, subsize, min_sq_size); |
203 | 124k | } |
204 | 31.1k | } |
205 | 126k | } |
206 | | |
207 | | /****************************************************** |
208 | | * Mode Decision Context Constructor |
209 | | ******************************************************/ |
210 | | EbErrorType svt_aom_mode_decision_context_ctor(ModeDecisionContext* ctx, SequenceControlSet* scs, |
211 | | EbColorFormat color_format, uint8_t sb_size, EncMode enc_mode, |
212 | | uint16_t max_block_cnt, uint32_t encoder_bit_depth, |
213 | | EbFifo* mode_decision_configuration_input_fifo_ptr, |
214 | | EbFifo* mode_decision_output_fifo_ptr, uint8_t enable_hbd_mode_decision, |
215 | 1.48k | uint8_t seq_qp_mod) { |
216 | 1.48k | const bool allintra = scs->allintra; |
217 | 1.48k | const bool rtc_tune = scs->static_config.rtc; |
218 | 1.48k | uint32_t buffer_index; |
219 | 1.48k | uint32_t cand_index; |
220 | | |
221 | 1.48k | ctx->init_max_block_cnt = max_block_cnt; |
222 | 1.48k | uint32_t block_max_count_sb = max_block_cnt; |
223 | | |
224 | 1.48k | ctx->sb_size = sb_size; |
225 | 1.48k | (void)color_format; |
226 | | |
227 | 1.48k | ctx->dctor = mode_decision_context_dctor; |
228 | 1.48k | ctx->hbd_md = enable_hbd_mode_decision; |
229 | | |
230 | | // Input/Output System Resource Manager FIFOs |
231 | 1.48k | ctx->mode_decision_configuration_input_fifo_ptr = mode_decision_configuration_input_fifo_ptr; |
232 | 1.48k | ctx->mode_decision_output_fifo_ptr = mode_decision_output_fifo_ptr; |
233 | | |
234 | | // Maximum number of candidates MD can support |
235 | | // determine MAX_NICS for a given preset |
236 | | // get the min scaling level (the smallest scaling level is the most conservative) |
237 | 1.48k | uint8_t min_nic_scaling_level = NICS_SCALING_LEVELS - 1; |
238 | 1.48k | uint8_t stage1_scaling_num; |
239 | 1.48k | if (allintra) { |
240 | 1.48k | uint8_t nic_level = svt_aom_get_nic_level_allintra(enc_mode); |
241 | 1.48k | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1]; |
242 | 1.48k | } else if (rtc_tune) { |
243 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
244 | 0 | uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode); |
245 | | #else |
246 | | uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode, scs->use_flat_ipp); |
247 | | #endif |
248 | 0 | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1]; |
249 | 0 | } else { |
250 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
251 | 0 | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
252 | 0 | uint8_t nic_level = svt_aom_get_nic_level_default(enc_mode, is_base); |
253 | 0 | uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level); |
254 | 0 | min_nic_scaling_level = MIN(min_nic_scaling_level, nic_scaling_level); |
255 | 0 | } |
256 | | #else |
257 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
258 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
259 | | uint8_t nic_level = svt_aom_get_nic_level_default(enc_mode, is_base, sc_class1); |
260 | | uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level); |
261 | | min_nic_scaling_level = MIN(min_nic_scaling_level, nic_scaling_level); |
262 | | } |
263 | | } |
264 | | #endif |
265 | |
|
266 | 0 | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[min_nic_scaling_level][MD_STAGE_1]; |
267 | 0 | } |
268 | | // scale max_nics |
269 | 1.48k | uint32_t max_nics = 0; |
270 | 1.48k | { |
271 | 1.48k | NicScalingCtrls scaling_ctrls; |
272 | 1.48k | scaling_ctrls.stage1_scaling_num = stage1_scaling_num; |
273 | 1.48k | scaling_ctrls.stage2_scaling_num = stage1_scaling_num; |
274 | 1.48k | scaling_ctrls.stage3_scaling_num = stage1_scaling_num; |
275 | 1.48k | uint32_t mds1_count[CAND_CLASS_TOTAL]; |
276 | 1.48k | uint32_t mds2_count[CAND_CLASS_TOTAL]; |
277 | 1.48k | uint32_t mds3_count[CAND_CLASS_TOTAL]; |
278 | 5.93k | for (uint8_t pic_type = 0; pic_type < NICS_PIC_TYPE; pic_type++) { |
279 | 289k | for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) { |
280 | 284k | svt_aom_set_nics(scs, &scaling_ctrls, mds1_count, mds2_count, mds3_count, pic_type, qp); |
281 | | |
282 | 284k | uint32_t nics = 0; |
283 | 1.70M | for (CandClass cidx = CAND_CLASS_0; cidx < CAND_CLASS_TOTAL; cidx++) { |
284 | 1.42M | nics += mds1_count[cidx]; |
285 | 1.42M | } |
286 | 284k | max_nics = MAX(max_nics, nics); |
287 | 284k | } |
288 | 4.45k | } |
289 | 1.48k | } |
290 | | |
291 | | // If independent chroma search is used, need to allocate additional 84 candidate buffers |
292 | 1.48k | bool is_chroma_mode_0; |
293 | 1.48k | if (allintra) { |
294 | 1.48k | is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_allintra(enc_mode)) == |
295 | 1.48k | CHROMA_MODE_0; |
296 | 1.48k | } else if (scs->static_config.rtc) { |
297 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
298 | 0 | is_chroma_mode_0 = svt_aom_set_chroma_controls( |
299 | 0 | NULL, svt_aom_get_chroma_level_rtc(enc_mode, scs->use_flat_ipp)) == CHROMA_MODE_0; |
300 | | #else |
301 | | for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) { |
302 | | is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_rtc(enc_mode, is_i_slice)) == |
303 | | CHROMA_MODE_0; |
304 | | if (is_chroma_mode_0) { |
305 | | break; |
306 | | } |
307 | | } |
308 | | #endif |
309 | 0 | } else { |
310 | 0 | for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) { |
311 | 0 | is_chroma_mode_0 = svt_aom_set_chroma_controls( |
312 | 0 | NULL, svt_aom_get_chroma_level_default(enc_mode, is_i_slice)) == CHROMA_MODE_0; |
313 | 0 | if (is_chroma_mode_0) { |
314 | 0 | break; |
315 | 0 | } |
316 | 0 | } |
317 | 0 | } |
318 | 1.48k | const uint8_t ind_uv_cands = is_chroma_mode_0 ? 84 : 0; |
319 | 1.48k | max_nics += CAND_CLASS_TOTAL; //need one extra temp buffer for each fast loop call |
320 | 1.48k | ctx->max_nics = max_nics; |
321 | 1.48k | ctx->max_nics_uv = max_nics + ind_uv_cands; |
322 | | // Cfl scratch memory |
323 | 1.48k | if (ctx->hbd_md > EB_8_BIT_MD) { |
324 | 0 | EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon16bit, sizeof(uint16_t) * sb_size * sb_size); |
325 | 0 | } |
326 | 1.48k | if (ctx->hbd_md != EB_10_BIT_MD) { |
327 | 1.48k | EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon, sizeof(uint8_t) * sb_size * sb_size); |
328 | 1.48k | } |
329 | 1.48k | EB_MALLOC_ALIGNED(ctx->pred_buf_q3, CFL_BUF_SQUARE); |
330 | 1.48k | uint8_t use_update_cdf = 0; |
331 | 1.48k | if (allintra) { |
332 | 1.48k | use_update_cdf = svt_aom_get_update_cdf_level_allintra(enc_mode); |
333 | 1.48k | } else if (rtc_tune) { |
334 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
335 | 0 | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
336 | 0 | if (use_update_cdf) { |
337 | 0 | break; |
338 | 0 | } |
339 | 0 | use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice); |
340 | 0 | } |
341 | | #else |
342 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
343 | | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
344 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
345 | | if (use_update_cdf) { |
346 | | break; |
347 | | } |
348 | | use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice, is_base, sc_class1); |
349 | | } |
350 | | } |
351 | | } |
352 | | #endif |
353 | 0 | } else { |
354 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
355 | 0 | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
356 | 0 | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
357 | 0 | if (use_update_cdf) { |
358 | 0 | break; |
359 | 0 | } |
360 | 0 | use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base); |
361 | 0 | } |
362 | 0 | } |
363 | | #else |
364 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
365 | | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
366 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
367 | | if (use_update_cdf) { |
368 | | break; |
369 | | } |
370 | | use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base, sc_class1); |
371 | | } |
372 | | } |
373 | | } |
374 | | #endif |
375 | 0 | } |
376 | 1.48k | if (use_update_cdf) { |
377 | 0 | EB_CALLOC_ARRAY(ctx->rate_est_table, 1); |
378 | 1.48k | } else { |
379 | 1.48k | ctx->rate_est_table = NULL; |
380 | 1.48k | } |
381 | | // Allocate buffer for inter-inter compound prediction |
382 | 1.48k | if (get_inter_compound_level(enc_mode)) { |
383 | 0 | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
384 | 0 | for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) { |
385 | 0 | EB_MALLOC(ctx->cmp_store.pred0_buf[i], sb_size * sb_size * bits * sizeof(uint8_t)); |
386 | 0 | EB_MALLOC(ctx->cmp_store.pred1_buf[i], sb_size * sb_size * bits * sizeof(uint8_t)); |
387 | 0 | } |
388 | 0 | EB_MALLOC(ctx->residual1, sb_size * sb_size * sizeof(ctx->residual1[0])); |
389 | 0 | EB_MALLOC(ctx->diff10, sb_size * sb_size * sizeof(ctx->diff10[0])); |
390 | 0 | } |
391 | | |
392 | | // Allocate buffer for inter-intra prediction |
393 | 1.48k | uint8_t ii_allowed = 0; |
394 | 4.45k | for (uint8_t transition_present = 0; transition_present < 2; transition_present++) { |
395 | 2.96k | if (ii_allowed) { |
396 | 0 | break; |
397 | 0 | } |
398 | 2.96k | ii_allowed |= svt_aom_get_inter_intra_level(enc_mode, transition_present); |
399 | 2.96k | } |
400 | 1.48k | if (ii_allowed) { |
401 | 0 | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
402 | | // MAX block size for inter intra is 32x32 |
403 | 0 | EB_MALLOC_2D(ctx->intrapred_buf, INTERINTRA_MODES, 32 * 32 * bits * sizeof(ctx->intrapred_buf[0][0])); |
404 | 0 | } |
405 | | |
406 | | // Allocate buffers for obmc prediction |
407 | 1.48k | uint8_t obmc_allowed = 0; |
408 | 4.45k | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
409 | 4.45k | for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) { |
410 | 4.45k | if (obmc_allowed) { |
411 | 2.96k | break; |
412 | 2.96k | } |
413 | 1.48k | #if TUNE_SHIFT_PRESETS_RTC |
414 | 1.48k | obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod, rtc_tune); |
415 | | #else |
416 | | obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod); |
417 | | #endif |
418 | 1.48k | } |
419 | 2.96k | } |
420 | 1.48k | if (obmc_allowed) { |
421 | 1.48k | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
422 | 1.48k | EB_MALLOC(ctx->obmc_buff_0, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_0[0])); |
423 | 1.48k | EB_MALLOC(ctx->obmc_buff_1, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_1[0])); |
424 | 1.48k | EB_MALLOC(ctx->wsrc_buf, sb_size * sb_size * sizeof(ctx->wsrc_buf[0])); |
425 | 1.48k | EB_MALLOC(ctx->mask_buf, sb_size * sb_size * sizeof(ctx->mask_buf[0])); |
426 | 1.48k | } |
427 | 1.48k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq, block_max_count_sb); |
428 | | // Fast Candidate Array |
429 | 1.48k | uint16_t max_can_count = svt_aom_get_max_can_count(enc_mode) + ind_uv_cands; |
430 | 1.48k | EB_MALLOC_ARRAY(ctx->fast_cand_array, max_can_count); |
431 | | |
432 | 283k | for (cand_index = 0; cand_index < max_can_count; ++cand_index) { |
433 | 281k | ctx->fast_cand_array[cand_index].palette_info = NULL; |
434 | 281k | } |
435 | 1.48k | svt_aom_assert_err(max_can_count > ind_uv_cands, "Max. candidates is too low"); |
436 | 1.48k | EB_MALLOC_2D(ctx->injected_mvs, (uint16_t)(max_can_count - ind_uv_cands), 2); |
437 | 1.48k | EB_MALLOC_ARRAY(ctx->injected_ref_types, (max_can_count - ind_uv_cands)); |
438 | | |
439 | | // Set buffers for MD palette search to NULL; will be init'd at runtime if needed |
440 | 1.48k | ctx->palette_buffer = NULL; |
441 | 1.48k | ctx->palette_cand_array = NULL; |
442 | 1.48k | ctx->palette_size_array_0 = NULL; |
443 | | |
444 | | // Cost Arrays |
445 | 1.48k | EB_MALLOC_ARRAY(ctx->fast_cost_array, ctx->max_nics_uv); |
446 | 1.48k | EB_MALLOC_ARRAY(ctx->full_cost_array, ctx->max_nics_uv); |
447 | 1.48k | EB_MALLOC_ARRAY(ctx->full_cost_ssim_array, ctx->max_nics_uv); |
448 | | // Candidate Buffers |
449 | 1.48k | EB_NEW(ctx->cand_bf_tx_depth_1, |
450 | 1.48k | svt_aom_mode_decision_scratch_cand_bf_ctor, |
451 | 1.48k | sb_size, |
452 | 1.48k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT); |
453 | | |
454 | 1.48k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_1->cand, 1); |
455 | 1.48k | EB_NEW(ctx->cand_bf_tx_depth_2, |
456 | 1.48k | svt_aom_mode_decision_scratch_cand_bf_ctor, |
457 | 1.48k | sb_size, |
458 | 1.48k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT); |
459 | | |
460 | 1.48k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_2->cand, 1); |
461 | 5.93k | for (int i = 0; i < 3; i++) { |
462 | 4.45k | ctx->md_blk_arr_nsq[0].neigh_left_recon[i] = NULL; |
463 | 4.45k | ctx->md_blk_arr_nsq[0].neigh_top_recon[i] = NULL; |
464 | 4.45k | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[i] = NULL; |
465 | 4.45k | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[i] = NULL; |
466 | 4.45k | } |
467 | 1.48k | uint32_t coded_leaf_index; |
468 | 1.48k | uint16_t sz = sizeof(uint16_t); |
469 | 1.48k | if (ctx->hbd_md > EB_8_BIT_MD) { |
470 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0], block_max_count_sb * sb_size * sz); |
471 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0], block_max_count_sb * sb_size * sz); |
472 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1); |
473 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1); |
474 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1); |
475 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1); |
476 | | |
477 | 0 | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
478 | 0 | size_t offset = coded_leaf_index * sb_size * sz; |
479 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[0] = |
480 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0] + offset; |
481 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[0] = |
482 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0] + offset; |
483 | 0 | offset >>= 1; |
484 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[1] = |
485 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1] + offset; |
486 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[1] = |
487 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1] + offset; |
488 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[2] = |
489 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2] + offset; |
490 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[2] = |
491 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2] + offset; |
492 | 0 | } |
493 | 0 | } |
494 | 1.48k | if (ctx->hbd_md != EB_10_BIT_MD) { |
495 | 1.48k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[0], block_max_count_sb * sb_size); |
496 | 1.48k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[0], block_max_count_sb * sb_size); |
497 | 1.48k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[1], block_max_count_sb * sb_size >> 1); |
498 | 1.48k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[1], block_max_count_sb * sb_size >> 1); |
499 | 1.48k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[2], block_max_count_sb * sb_size >> 1); |
500 | 1.48k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[2], block_max_count_sb * sb_size >> 1); |
501 | | |
502 | 127k | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
503 | 126k | size_t offset = coded_leaf_index * sb_size; |
504 | 126k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[0] = ctx->md_blk_arr_nsq[0].neigh_left_recon[0] + |
505 | 126k | offset; |
506 | 126k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[0] = ctx->md_blk_arr_nsq[0].neigh_top_recon[0] + |
507 | 126k | offset; |
508 | 126k | offset >>= 1; |
509 | 126k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[1] = ctx->md_blk_arr_nsq[0].neigh_left_recon[1] + |
510 | 126k | offset; |
511 | 126k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[1] = ctx->md_blk_arr_nsq[0].neigh_top_recon[1] + |
512 | 126k | offset; |
513 | 126k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[2] = ctx->md_blk_arr_nsq[0].neigh_left_recon[2] + |
514 | 126k | offset; |
515 | 126k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[2] = ctx->md_blk_arr_nsq[0].neigh_top_recon[2] + |
516 | 126k | offset; |
517 | 126k | } |
518 | 1.48k | } |
519 | 1.48k | ctx->md_blk_arr_nsq[0].av1xd = NULL; |
520 | 1.48k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].av1xd, block_max_count_sb); |
521 | | |
522 | | // Alloc mds and pc_tree, which are used to track tested blocks in MD |
523 | 1.48k | bool disallow_4x4 = allintra ? svt_aom_get_disallow_4x4_allintra(enc_mode) |
524 | 1.48k | #if TUNE_SIMPLIFY_SETTINGS |
525 | 1.48k | : rtc_tune ? svt_aom_get_disallow_4x4_rtc() |
526 | | #else |
527 | | : rtc_tune ? svt_aom_get_disallow_4x4_rtc(enc_mode) |
528 | | #endif |
529 | 0 | : svt_aom_get_disallow_4x4_default(enc_mode); |
530 | 1.48k | bool disallow_8x8 = allintra ? svt_aom_get_disallow_8x8_allintra() |
531 | 1.48k | : rtc_tune ? svt_aom_get_disallow_8x8_rtc(enc_mode, scs->max_input_luma_width, scs->max_input_luma_height) |
532 | 0 | : svt_aom_get_disallow_8x8_default(); |
533 | 1.48k | uint8_t min_bsize = disallow_8x8 ? 16 : disallow_4x4 ? 8 : 4; |
534 | 1.48k | int blocks_per_depth = (sb_size / min_bsize) * (sb_size / min_bsize); |
535 | 1.48k | int blocks_to_alloc = 0; |
536 | | |
537 | 7.42k | for (int i = min_bsize; i <= sb_size; i <<= 1, blocks_per_depth >>= 2) { |
538 | 5.93k | blocks_to_alloc += blocks_per_depth; |
539 | 5.93k | } |
540 | 1.48k | EB_CALLOC_ARRAY(ctx->mds, blocks_to_alloc); |
541 | 1.48k | uint32_t mds_idx = 0; |
542 | 1.48k | setup_mds(scs, ctx->mds, &mds_idx, 0, scs->seq_header.sb_size, min_bsize); |
543 | 1.48k | EB_CALLOC_ARRAY(ctx->pc_tree, blocks_to_alloc); |
544 | 1.48k | EB_MALLOC_ARRAY(ctx->tested_blk, blocks_to_alloc); |
545 | 1.48k | setup_pc_tree(ctx->pc_tree, ctx->tested_blk, 0, scs->seq_header.sb_size, min_bsize); |
546 | 1.48k | ctx->blocks_to_alloc = blocks_to_alloc; |
547 | | |
548 | 1.48k | bool bypass_encdec = allintra ? svt_aom_get_bypass_encdec_allintra(enc_mode) |
549 | 1.48k | : rtc_tune ? svt_aom_get_bypass_encdec_rtc(enc_mode, encoder_bit_depth) |
550 | 0 | : svt_aom_get_bypass_encdec_default(enc_mode, encoder_bit_depth); |
551 | 127k | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
552 | 126k | ctx->md_blk_arr_nsq[coded_leaf_index].av1xd = ctx->md_blk_arr_nsq[0].av1xd + coded_leaf_index; |
553 | 126k | ctx->md_blk_arr_nsq[coded_leaf_index].segment_id = 0; |
554 | 126k | const BlockGeom* blk_geom = get_blk_geom_mds(scs->blk_geom_mds, coded_leaf_index); |
555 | 126k | if (bypass_encdec) { |
556 | 126k | EbPictureBufferDescInitData init_data; |
557 | | |
558 | 126k | init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
559 | 126k | init_data.max_width = blk_geom->bwidth; |
560 | 126k | init_data.max_height = blk_geom->bheight; |
561 | 126k | init_data.bit_depth = EB_THIRTYTWO_BIT; |
562 | 126k | init_data.color_format = (blk_geom->bwidth > 4 && blk_geom->bheight > 4) |
563 | 126k | ? EB_YUV420 |
564 | 126k | : EB_YUV444; // PW - must have at least 4x4 for chroma coeffs |
565 | 126k | init_data.border = 0; |
566 | 126k | init_data.split_mode = false; |
567 | | |
568 | 126k | EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data); |
569 | | |
570 | 126k | init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
571 | 126k | init_data.max_width = blk_geom->bwidth; |
572 | 126k | init_data.max_height = blk_geom->bheight; |
573 | 126k | init_data.bit_depth = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
574 | 126k | ; |
575 | 126k | init_data.color_format = (blk_geom->bwidth > 4 && blk_geom->bheight > 4) ? EB_YUV420 : EB_YUV444; |
576 | 126k | init_data.border = 0; |
577 | 126k | init_data.split_mode = false; |
578 | | |
579 | 126k | EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data); |
580 | 126k | } else { |
581 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp = NULL; |
582 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp = NULL; |
583 | 0 | } |
584 | 126k | } |
585 | 8.90k | for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) { |
586 | 7.42k | EB_MALLOC_ARRAY(ctx->cand_buff_indices[cand_class_it], ctx->max_nics_uv); |
587 | 7.42k | } |
588 | | |
589 | 1.48k | EB_MALLOC_ARRAY(ctx->best_candidate_index_array, ctx->max_nics_uv); |
590 | 1.48k | EB_MALLOC_ARRAY(ctx->above_txfm_context, (sb_size >> MI_SIZE_LOG2)); |
591 | 1.48k | EB_MALLOC_ARRAY(ctx->left_txfm_context, (sb_size >> MI_SIZE_LOG2)); |
592 | 1.48k | EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data; |
593 | 1.48k | EbPictureBufferDescInitData picture_buffer_desc_init_data; |
594 | | |
595 | 1.48k | picture_buffer_desc_init_data.max_width = sb_size; |
596 | 1.48k | picture_buffer_desc_init_data.max_height = sb_size; |
597 | 1.48k | picture_buffer_desc_init_data.bit_depth = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
598 | 1.48k | picture_buffer_desc_init_data.color_format = EB_YUV420; |
599 | 1.48k | picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
600 | 1.48k | picture_buffer_desc_init_data.border = 0; |
601 | 1.48k | picture_buffer_desc_init_data.split_mode = false; |
602 | 1.48k | picture_buffer_desc_init_data.is_16bit_pipeline = false; |
603 | | |
604 | 1.48k | thirty_two_width_picture_buffer_desc_init_data.max_width = sb_size; |
605 | 1.48k | thirty_two_width_picture_buffer_desc_init_data.max_height = sb_size; |
606 | 1.48k | thirty_two_width_picture_buffer_desc_init_data.bit_depth = EB_THIRTYTWO_BIT; |
607 | 1.48k | thirty_two_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
608 | 1.48k | thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
609 | 1.48k | thirty_two_width_picture_buffer_desc_init_data.border = 0; |
610 | 1.48k | thirty_two_width_picture_buffer_desc_init_data.split_mode = false; |
611 | 1.48k | thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline = false; |
612 | 25.2k | for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) { |
613 | 23.7k | EB_NEW(ctx->recon_coeff_ptr[txt_itr], |
614 | 23.7k | svt_picture_buffer_desc_ctor, |
615 | 23.7k | (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
616 | 23.7k | EB_NEW(ctx->recon_ptr[txt_itr], svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
617 | 23.7k | EB_NEW(ctx->quant_coeff_ptr[txt_itr], |
618 | 23.7k | svt_picture_buffer_desc_ctor, |
619 | 23.7k | (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
620 | 23.7k | } |
621 | 1.48k | EB_NEW(ctx->tx_coeffs, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
622 | 1.48k | EB_NEW(ctx->scratch_prediction_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
623 | 1.48k | EbPictureBufferDescInitData double_width_picture_buffer_desc_init_data; |
624 | 1.48k | double_width_picture_buffer_desc_init_data.max_width = sb_size; |
625 | 1.48k | double_width_picture_buffer_desc_init_data.max_height = sb_size; |
626 | 1.48k | double_width_picture_buffer_desc_init_data.bit_depth = EB_SIXTEEN_BIT; |
627 | 1.48k | double_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
628 | 1.48k | double_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
629 | 1.48k | double_width_picture_buffer_desc_init_data.border = 0; |
630 | 1.48k | double_width_picture_buffer_desc_init_data.split_mode = false; |
631 | 1.48k | double_width_picture_buffer_desc_init_data.is_16bit_pipeline = false; |
632 | | |
633 | | // The temp_recon_ptr and temp_residual will be shared by all candidates |
634 | | // If you want to do something with residual or recon, you need to create one |
635 | 1.48k | EB_NEW(ctx->temp_recon_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
636 | 1.48k | EB_NEW(ctx->temp_residual, svt_picture_buffer_desc_ctor, (EbPtr)&double_width_picture_buffer_desc_init_data); |
637 | | |
638 | | // Candidate Buffers |
639 | 1.48k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_ptr_array, ctx->max_nics_uv); |
640 | | |
641 | 16.3k | for (buffer_index = 0; buffer_index < ctx->max_nics; ++buffer_index) { |
642 | 14.8k | EB_NEW(ctx->cand_bf_ptr_array[buffer_index], |
643 | 14.8k | svt_aom_mode_decision_cand_bf_ctor, |
644 | 14.8k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
645 | 14.8k | sb_size, |
646 | 14.8k | PICTURE_BUFFER_DESC_FULL_MASK, |
647 | 14.8k | ctx->temp_residual, |
648 | 14.8k | ctx->temp_recon_ptr, |
649 | 14.8k | &(ctx->fast_cost_array[buffer_index]), |
650 | 14.8k | &(ctx->full_cost_array[buffer_index]), |
651 | 14.8k | &(ctx->full_cost_ssim_array[buffer_index])); |
652 | 14.8k | } |
653 | | |
654 | 1.48k | for (buffer_index = max_nics; buffer_index < ctx->max_nics_uv; ++buffer_index) { |
655 | 0 | EB_NEW(ctx->cand_bf_ptr_array[buffer_index], |
656 | 0 | svt_aom_mode_decision_cand_bf_ctor, |
657 | 0 | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
658 | 0 | sb_size, |
659 | 0 | PICTURE_BUFFER_DESC_CHROMA_MASK, |
660 | 0 | ctx->temp_residual, |
661 | 0 | ctx->temp_recon_ptr, |
662 | 0 | &(ctx->fast_cost_array[buffer_index]), |
663 | 0 | &(ctx->full_cost_array[buffer_index]), |
664 | 0 | &(ctx->full_cost_ssim_array[buffer_index])); |
665 | 0 | } |
666 | | |
667 | 1.48k | return EB_ErrorNone; |
668 | 1.48k | } |
669 | | |
670 | | /************************************************** |
671 | | * Reset Mode Decision Neighbor Arrays |
672 | | *************************************************/ |
673 | 4.99k | void svt_aom_reset_mode_decision_neighbor_arrays(PictureControlSet* pcs, uint16_t tile_idx) { |
674 | 4.99k | uint8_t depth; |
675 | 19.9k | for (depth = 0; depth < NA_TOT_CNT; depth++) { |
676 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->mdleaf_partition_na[depth][tile_idx]); |
677 | 14.9k | if (pcs->hbd_md != EB_10_BIT_MD) { |
678 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na[depth][tile_idx]); |
679 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na[depth][tile_idx]); |
680 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na[depth][tile_idx]); |
681 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na[depth][tile_idx]); |
682 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na[depth][tile_idx]); |
683 | 14.9k | } |
684 | 14.9k | if (pcs->hbd_md > EB_8_BIT_MD || (pcs->scs->encoder_bit_depth > EB_EIGHT_BIT && pcs->pic_bypass_encdec)) { |
685 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na_16bit[depth][tile_idx]); |
686 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na_16bit[depth][tile_idx]); |
687 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na_16bit[depth][tile_idx]); |
688 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na_16bit[depth][tile_idx]); |
689 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na_16bit[depth][tile_idx]); |
690 | 0 | } |
691 | | |
692 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_y_dcs_na[depth][tile_idx]); |
693 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_dc_sign_level_coeff_na[depth][tile_idx]); |
694 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_cb_dc_sign_level_coeff_na[depth][tile_idx]); |
695 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_cr_dc_sign_level_coeff_na[depth][tile_idx]); |
696 | 14.9k | svt_aom_neighbor_array_unit_reset(pcs->md_txfm_context_array[depth][tile_idx]); |
697 | 14.9k | } |
698 | | |
699 | 4.99k | return; |
700 | 4.99k | } |
701 | | |
702 | | // If the ref intra percentage is below the TH, applying modulation to the MD lambda |
703 | 0 | #define LAMBDA_MOD_INTRA_TH 50 |
704 | 0 | #define LAMBDA_MOD_INTRA_SCALING_FACTOR 138 |
705 | | |
706 | | // Set the lambda for each sb. |
707 | | // When lambda tuning is on (blk_lambda_tuning), lambda of each block is set separately (full_lambda_md/fast_lambda_md) |
708 | | // later in svt_aom_set_tuned_blk_lambda |
709 | | // Testing showed that updating SAD lambda based on frame info was not helpful; therefore, the SAD lambda generation is not changed. |
710 | 6.22k | static void av1_lambda_assign_md(PictureControlSet* pcs, ModeDecisionContext* ctx) { |
711 | 6.22k | ctx->full_lambda_md[0] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT); |
712 | 6.22k | ctx->fast_lambda_md[0] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT); |
713 | 6.22k | ctx->full_lambda_md[1] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT); |
714 | 6.22k | ctx->fast_lambda_md[1] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT); |
715 | | |
716 | 6.22k | if (!pcs->scs->static_config.rtc && pcs->scs->stats_based_sb_lambda_modulation) { |
717 | 6.22k | if (pcs->temporal_layer_index > 0) { |
718 | 0 | if (pcs->ref_intra_percentage < LAMBDA_MOD_INTRA_TH) { |
719 | 0 | ctx->full_lambda_md[0] = (ctx->full_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
720 | 0 | ctx->fast_lambda_md[0] = (ctx->fast_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
721 | 0 | ctx->full_lambda_md[1] = (ctx->full_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
722 | 0 | ctx->fast_lambda_md[1] = (ctx->fast_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
723 | 0 | } |
724 | 0 | } |
725 | 6.22k | } |
726 | | |
727 | 6.22k | if (pcs->lambda_weight) { |
728 | 3.35k | ctx->full_lambda_md[0] = (uint32_t)((ctx->full_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7); |
729 | 3.35k | ctx->fast_lambda_md[0] = (uint32_t)((ctx->fast_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7); |
730 | 3.35k | ctx->full_lambda_md[1] = (uint32_t)((ctx->full_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7); |
731 | 3.35k | ctx->fast_lambda_md[1] = (uint32_t)((ctx->fast_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7); |
732 | 3.35k | } |
733 | 6.22k | ctx->full_lambda_md[1] *= 16; |
734 | 6.22k | ctx->fast_lambda_md[1] *= 4; |
735 | | |
736 | 6.22k | SequenceControlSet* scs = pcs->scs; |
737 | 6.22k | uint64_t scale_factor = scs->static_config.lambda_scale_factors[pcs->ppcs->update_type]; |
738 | 6.22k | ctx->full_lambda_md[0] = (uint32_t)((ctx->full_lambda_md[0] * scale_factor) >> 7); |
739 | 6.22k | ctx->full_lambda_md[1] = (uint32_t)((ctx->full_lambda_md[1] * scale_factor) >> 7); |
740 | 6.22k | ctx->fast_lambda_md[0] = (uint32_t)((ctx->fast_lambda_md[0] * scale_factor) >> 7); |
741 | 6.22k | ctx->fast_lambda_md[1] = (uint32_t)((ctx->fast_lambda_md[1] * scale_factor) >> 7); |
742 | | |
743 | 6.22k | ctx->full_sb_lambda_md[0] = ctx->full_lambda_md[0]; |
744 | 6.22k | ctx->full_sb_lambda_md[1] = ctx->full_lambda_md[1]; |
745 | 6.22k | } |
746 | | |
747 | | void svt_aom_reset_mode_decision(SequenceControlSet* scs, ModeDecisionContext* ctx, PictureControlSet* pcs, |
748 | 5.16k | uint16_t tile_group_idx, uint32_t segment_index) { |
749 | 5.16k | const bool rtc_tune = scs->static_config.rtc; |
750 | 5.16k | ctx->hbd_md = pcs->hbd_md; |
751 | | // Reset MD rate Estimation table to initial values by copying from md_rate_est_ctx |
752 | 5.16k | ctx->md_rate_est_ctx = pcs->md_rate_est_ctx; |
753 | | // Reset CABAC Contexts |
754 | | |
755 | | // Reset Neighbor Arrays at start of new Segment / Picture |
756 | 5.16k | if (segment_index == 0) { |
757 | 474 | for (uint16_t r = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_y; |
758 | 1.99k | r < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_y; |
759 | 1.51k | r++) { |
760 | 1.51k | for (uint16_t c = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_x; |
761 | 6.51k | c < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_x; |
762 | 4.99k | c++) { |
763 | 4.99k | uint16_t tile_idx = c + r * pcs->ppcs->av1_cm->tiles_info.tile_cols; |
764 | 4.99k | svt_aom_reset_mode_decision_neighbor_arrays(pcs, tile_idx); |
765 | 4.99k | } |
766 | 1.51k | } |
767 | 474 | (void)scs; |
768 | 474 | } |
769 | | //each segment enherits the bypass encdec from the picture level |
770 | 5.16k | ctx->bypass_encdec = pcs->pic_bypass_encdec; |
771 | | |
772 | 5.16k | if (!rtc_tune && (pcs->enc_mode <= ENC_M11 || pcs->temporal_layer_index != 0)) { |
773 | 5.16k | ctx->rtc_use_N4_dct_dct_shortcut = 1; |
774 | 5.16k | } else { |
775 | 0 | ctx->rtc_use_N4_dct_dct_shortcut = 0; |
776 | 0 | } |
777 | 5.16k | return; |
778 | 5.16k | } |
779 | | |
780 | | /****************************************************** |
781 | | * Mode Decision Configure SB |
782 | | ******************************************************/ |
783 | | void svt_aom_mode_decision_configure_sb(ModeDecisionContext* ctx, PictureControlSet* pcs, uint8_t sb_qp, |
784 | 6.22k | uint8_t me_sb_qp) { |
785 | | /* Note(CHKN) : when Qp modulation varies QP on a sub-SB(CU) basis, Lamda has to change based on Cu->QP , and then this code has to move inside the CU loop in MD */ |
786 | | |
787 | | // Lambda Assignement |
788 | 6.22k | ctx->qp_index = pcs->ppcs->frm_hdr.delta_q_params.delta_q_present || pcs->ppcs->r0_delta_qp_md |
789 | 6.22k | ? sb_qp |
790 | 6.22k | : (uint8_t)pcs->ppcs->frm_hdr.quantization_params.base_q_idx; |
791 | | |
792 | 6.22k | ctx->me_q_index = me_sb_qp; |
793 | | |
794 | 6.22k | av1_lambda_assign_md(pcs, ctx); |
795 | | |
796 | 6.22k | ctx->hbd_pack_done = 0; |
797 | | |
798 | 6.22k | return; |
799 | 6.22k | } |