/work/svt-av1/Source/Lib/Codec/md_process.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include <stdlib.h> |
13 | | |
14 | | #include "utility.h" |
15 | | #include "md_process.h" |
16 | | #include "lambda_rate_tables.h" |
17 | | #include "rc_process.h" |
18 | | #include "enc_mode_config.h" |
19 | | |
20 | | const uint8_t quantizer_to_qindex[64] = { |
21 | | 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, |
22 | | 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, |
23 | | 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255}; |
24 | | |
25 | | const int percents[2][FIXED_QP_OFFSET_COUNT] = { |
26 | | {75, 70, 60, 20, 15, 0}, {76, 60, 30, 15, 8, 4} // libaom offsets |
27 | | }; |
28 | | |
29 | | const uint8_t uni_psy_bias[64] = { |
30 | | 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 85, 95, 95, 95, 95, 95, 95, |
31 | | 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, 95, |
32 | | 95, 95, 95, 95, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, |
33 | | }; |
34 | | |
35 | 1.66k | static void mode_decision_context_dctor(EbPtr p) { |
36 | 1.66k | ModeDecisionContext* obj = (ModeDecisionContext*)p; |
37 | | |
38 | 1.66k | uint32_t block_max_count_sb = obj->init_max_block_cnt; |
39 | | |
40 | | // MD palette search |
41 | 1.66k | if (obj->palette_buffer) { |
42 | 0 | EB_FREE(obj->palette_buffer); |
43 | 0 | } |
44 | 1.66k | if (obj->palette_cand_array) { |
45 | | // Free fields in palette_cand_array before freeing palette_cand_array |
46 | 0 | for (int cd = 0; cd < MAX_PAL_CAND; cd++) { |
47 | 0 | if (obj->palette_cand_array[cd].color_idx_map) { |
48 | 0 | EB_FREE_ARRAY(obj->palette_cand_array[cd].color_idx_map); |
49 | 0 | } |
50 | 0 | } |
51 | |
|
52 | 0 | EB_FREE_ARRAY(obj->palette_cand_array); |
53 | 0 | } |
54 | 1.66k | if (obj->palette_size_array_0) { |
55 | 0 | EB_FREE_ARRAY(obj->palette_size_array_0); |
56 | 0 | } |
57 | 9.98k | for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) { |
58 | 8.32k | EB_FREE_ARRAY(obj->cand_buff_indices[cand_class_it]); |
59 | 8.32k | } |
60 | 1.66k | EB_FREE_ARRAY(obj->best_candidate_index_array); |
61 | | |
62 | 1.66k | EB_FREE_ARRAY(obj->above_txfm_context); |
63 | 1.66k | EB_FREE_ARRAY(obj->left_txfm_context); |
64 | 143k | for (uint32_t coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
65 | 141k | if (obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp) { |
66 | 141k | EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp); |
67 | 141k | } |
68 | 141k | if (obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp) { |
69 | 141k | EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp); |
70 | 141k | } |
71 | 141k | } |
72 | 1.66k | EB_DELETE_PTR_ARRAY(obj->cand_bf_ptr_array, obj->max_nics_uv); |
73 | 1.66k | EB_FREE_ARRAY(obj->cand_bf_tx_depth_1->cand); |
74 | 1.66k | EB_DELETE(obj->cand_bf_tx_depth_1); |
75 | 1.66k | EB_FREE_ARRAY(obj->cand_bf_tx_depth_2->cand); |
76 | 1.66k | EB_DELETE(obj->cand_bf_tx_depth_2); |
77 | 1.66k | EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon16bit); |
78 | 1.66k | EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon); |
79 | 1.66k | EB_FREE_ALIGNED_ARRAY(obj->pred_buf_q3); |
80 | 1.66k | EB_FREE_ARRAY(obj->fast_cand_array); |
81 | 1.66k | EB_FREE_2D(obj->injected_mvs); |
82 | 1.66k | EB_FREE_ARRAY(obj->injected_ref_types); |
83 | 1.66k | EB_FREE_ARRAY(obj->fast_cost_array); |
84 | 1.66k | EB_FREE_ARRAY(obj->full_cost_array); |
85 | 1.66k | if (obj->md_blk_arr_nsq) { |
86 | 6.65k | for (int i = 0; i < 3; i++) { |
87 | 4.99k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon_16bit[i]); |
88 | 4.99k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon_16bit[i]); |
89 | 4.99k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon[i]); |
90 | 4.99k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon[i]); |
91 | 4.99k | } |
92 | 1.66k | } |
93 | 1.66k | if (obj->md_blk_arr_nsq) { |
94 | 1.66k | EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].av1xd); |
95 | 1.66k | } |
96 | 1.66k | EB_FREE_ARRAY(obj->mds); |
97 | 1.66k | EB_FREE_ARRAY(obj->pc_tree); |
98 | 1.66k | EB_FREE_ARRAY(obj->tested_blk); |
99 | 1.66k | obj->blocks_to_alloc = 0; |
100 | 1.66k | EB_FREE_ARRAY(obj->md_blk_arr_nsq); |
101 | 1.66k | if (obj->rate_est_table) { |
102 | 0 | EB_FREE_ARRAY(obj->rate_est_table); |
103 | 0 | } |
104 | | |
105 | 8.32k | for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) { |
106 | 6.65k | if (obj->cmp_store.pred0_buf[i]) { |
107 | 0 | EB_FREE(obj->cmp_store.pred0_buf[i]); |
108 | 0 | } |
109 | 6.65k | if (obj->cmp_store.pred1_buf[i]) { |
110 | 0 | EB_FREE(obj->cmp_store.pred1_buf[i]); |
111 | 0 | } |
112 | 6.65k | } |
113 | 1.66k | if (obj->residual1) { |
114 | 0 | EB_FREE(obj->residual1); |
115 | 0 | } |
116 | 1.66k | if (obj->diff10) { |
117 | 0 | EB_FREE(obj->diff10); |
118 | 0 | } |
119 | | |
120 | 1.66k | if (obj->intrapred_buf) { |
121 | 0 | EB_FREE_2D(obj->intrapred_buf); |
122 | 0 | } |
123 | | |
124 | 1.66k | if (obj->obmc_buff_0) { |
125 | 1.66k | EB_FREE(obj->obmc_buff_0); |
126 | 1.66k | } |
127 | 1.66k | if (obj->obmc_buff_1) { |
128 | 1.66k | EB_FREE(obj->obmc_buff_1); |
129 | 1.66k | } |
130 | 1.66k | if (obj->wsrc_buf) { |
131 | 1.66k | EB_FREE(obj->wsrc_buf); |
132 | 1.66k | } |
133 | 1.66k | if (obj->mask_buf) { |
134 | 1.66k | EB_FREE(obj->mask_buf); |
135 | 1.66k | } |
136 | 28.2k | for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) { |
137 | 26.6k | EB_DELETE(obj->recon_coeff_ptr[txt_itr]); |
138 | 26.6k | EB_DELETE(obj->recon_ptr[txt_itr]); |
139 | 26.6k | EB_DELETE(obj->quant_coeff_ptr[txt_itr]); |
140 | 26.6k | } |
141 | 1.66k | EB_DELETE(obj->tx_coeffs); |
142 | 1.66k | EB_DELETE(obj->scratch_prediction_ptr); |
143 | 1.66k | EB_DELETE(obj->temp_residual); |
144 | 1.66k | EB_DELETE(obj->temp_recon_ptr); |
145 | 1.66k | EB_FREE_ARRAY(obj->full_cost_ssim_array); |
146 | 1.66k | } |
147 | | |
148 | | void svt_aom_set_nics(SequenceControlSet* scs, NicScalingCtrls* scaling_ctrls, uint32_t mds1_count[CAND_CLASS_TOTAL], |
149 | | uint32_t mds2_count[CAND_CLASS_TOTAL], uint32_t mds3_count[CAND_CLASS_TOTAL], uint8_t pic_type, |
150 | | uint32_t qp); |
151 | | |
152 | | static void setup_mds(SequenceControlSet* scs, MdScan* mds, uint32_t* mds_idx, int index, BlockSize bsize, |
153 | 141k | const int min_sq_size) { |
154 | 141k | mds->mds_idx = *mds_idx; |
155 | 141k | mds->bsize = bsize; |
156 | 141k | mds->index = index; |
157 | | |
158 | | // If applicable, add split depths |
159 | 141k | const BlockGeom* blk_geom = get_blk_geom_mds(scs->blk_geom_mds, *mds_idx); |
160 | 141k | const int sq_size = block_size_wide[bsize]; |
161 | 141k | if (sq_size > min_sq_size) { |
162 | 34.9k | const BlockSize subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
163 | 34.9k | const int sq_subsize = block_size_wide[subsize]; |
164 | 34.9k | int blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size); |
165 | 34.9k | int blocks_to_skip = 0; |
166 | | |
167 | 79.8k | for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) { |
168 | 44.9k | blocks_to_skip += blocks_per_subdepth; |
169 | 44.9k | } |
170 | | |
171 | 34.9k | *mds_idx += blk_geom->d1_depth_offset; |
172 | 174k | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
173 | 139k | mds->split[i] = mds + i * blocks_to_skip + 1; |
174 | 139k | setup_mds(scs, mds->split[i], mds_idx, i, subsize, min_sq_size); |
175 | 139k | } |
176 | 106k | } else { |
177 | 106k | *mds_idx += blk_geom->ns_depth_offset; |
178 | 106k | } |
179 | 141k | } |
180 | | |
181 | | static void setup_pc_tree(PC_TREE* pc_tree, bool (*test_blk_array)[PART_S][4], int index, BlockSize bsize, |
182 | 141k | const int min_sq_size) { |
183 | 141k | pc_tree->bsize = bsize; |
184 | 141k | pc_tree->index = index; |
185 | 141k | pc_tree->tested_blk = test_blk_array[0]; |
186 | | |
187 | | // If applicable, add split depths |
188 | 141k | const int sq_size = block_size_wide[bsize]; |
189 | 141k | if (sq_size > min_sq_size) { |
190 | 34.9k | const BlockSize subsize = get_partition_subsize(bsize, PARTITION_SPLIT); |
191 | 34.9k | const int sq_subsize = block_size_wide[subsize]; |
192 | 34.9k | int blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size); |
193 | 34.9k | int blocks_to_skip = 0; |
194 | | |
195 | 79.8k | for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) { |
196 | 44.9k | blocks_to_skip += blocks_per_subdepth; |
197 | 44.9k | } |
198 | | |
199 | 174k | for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) { |
200 | 139k | pc_tree->split[i] = pc_tree + i * blocks_to_skip + 1; |
201 | 139k | pc_tree->split[i]->parent = pc_tree; |
202 | 139k | setup_pc_tree(pc_tree->split[i], test_blk_array + i * blocks_to_skip + 1, i, subsize, min_sq_size); |
203 | 139k | } |
204 | 34.9k | } |
205 | 141k | } |
206 | | |
207 | | /****************************************************** |
208 | | * Mode Decision Context Constructor |
209 | | ******************************************************/ |
210 | | EbErrorType svt_aom_mode_decision_context_ctor(ModeDecisionContext* ctx, SequenceControlSet* scs, |
211 | | EbColorFormat color_format, uint8_t sb_size, EncMode enc_mode, |
212 | | uint16_t max_block_cnt, uint32_t encoder_bit_depth, |
213 | | EbFifo* mode_decision_configuration_input_fifo_ptr, |
214 | | EbFifo* mode_decision_output_fifo_ptr, uint8_t enable_hbd_mode_decision, |
215 | 1.66k | uint8_t seq_qp_mod) { |
216 | 1.66k | const bool allintra = scs->allintra; |
217 | 1.66k | const bool rtc_tune = scs->static_config.rtc; |
218 | 1.66k | uint32_t buffer_index; |
219 | 1.66k | uint32_t cand_index; |
220 | | |
221 | 1.66k | ctx->init_max_block_cnt = max_block_cnt; |
222 | 1.66k | uint32_t block_max_count_sb = max_block_cnt; |
223 | | |
224 | 1.66k | ctx->sb_size = sb_size; |
225 | 1.66k | (void)color_format; |
226 | | |
227 | 1.66k | ctx->dctor = mode_decision_context_dctor; |
228 | 1.66k | ctx->hbd_md = enable_hbd_mode_decision; |
229 | | |
230 | | // Zero the tail of md_levels_buf once; it serves as permanent bottom-padding |
231 | | // for set_levels() and is never overwritten by svt_av1_txb_init_levels(). |
232 | 1.66k | memset(ctx->md_levels_buf + LEVELS_TAIL_OFFSET, 0, TX_PAD_2D - LEVELS_TAIL_OFFSET); |
233 | | |
234 | | // Input/Output System Resource Manager FIFOs |
235 | 1.66k | ctx->mode_decision_configuration_input_fifo_ptr = mode_decision_configuration_input_fifo_ptr; |
236 | 1.66k | ctx->mode_decision_output_fifo_ptr = mode_decision_output_fifo_ptr; |
237 | | |
238 | | // Maximum number of candidates MD can support |
239 | | // determine MAX_NICS for a given preset |
240 | | // get the min scaling level (the smallest scaling level is the most conservative) |
241 | 1.66k | uint8_t min_nic_scaling_level = NICS_SCALING_LEVELS - 1; |
242 | 1.66k | uint8_t stage1_scaling_num; |
243 | 1.66k | if (allintra) { |
244 | 1.66k | uint8_t nic_level = svt_aom_get_nic_level_allintra(enc_mode); |
245 | 1.66k | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1]; |
246 | 1.66k | } else if (rtc_tune) { |
247 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
248 | 0 | uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode); |
249 | | #else |
250 | | uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode, scs->use_flat_ipp); |
251 | | #endif |
252 | 0 | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1]; |
253 | 0 | } else { |
254 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
255 | 0 | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
256 | 0 | uint8_t nic_level = svt_aom_get_nic_level_default(enc_mode, is_base); |
257 | 0 | uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level); |
258 | 0 | min_nic_scaling_level = MIN(min_nic_scaling_level, nic_scaling_level); |
259 | 0 | } |
260 | | #else |
261 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
262 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
263 | | uint8_t nic_level = svt_aom_get_nic_level_default(enc_mode, is_base, sc_class1); |
264 | | uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level); |
265 | | min_nic_scaling_level = MIN(min_nic_scaling_level, nic_scaling_level); |
266 | | } |
267 | | } |
268 | | #endif |
269 | |
|
270 | 0 | stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[min_nic_scaling_level][MD_STAGE_1]; |
271 | 0 | } |
272 | | // scale max_nics |
273 | 1.66k | uint32_t max_nics = 0; |
274 | 1.66k | { |
275 | 1.66k | NicScalingCtrls scaling_ctrls; |
276 | 1.66k | scaling_ctrls.stage1_scaling_num = stage1_scaling_num; |
277 | 1.66k | scaling_ctrls.stage2_scaling_num = stage1_scaling_num; |
278 | 1.66k | scaling_ctrls.stage3_scaling_num = stage1_scaling_num; |
279 | 1.66k | uint32_t mds1_count[CAND_CLASS_TOTAL]; |
280 | 1.66k | uint32_t mds2_count[CAND_CLASS_TOTAL]; |
281 | 1.66k | uint32_t mds3_count[CAND_CLASS_TOTAL]; |
282 | 6.65k | for (uint8_t pic_type = 0; pic_type < NICS_PIC_TYPE; pic_type++) { |
283 | 324k | for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) { |
284 | 319k | svt_aom_set_nics(scs, &scaling_ctrls, mds1_count, mds2_count, mds3_count, pic_type, qp); |
285 | | |
286 | 319k | uint32_t nics = 0; |
287 | 1.91M | for (CandClass cidx = CAND_CLASS_0; cidx < CAND_CLASS_TOTAL; cidx++) { |
288 | 1.59M | nics += mds1_count[cidx]; |
289 | 1.59M | } |
290 | 319k | max_nics = MAX(max_nics, nics); |
291 | 319k | } |
292 | 4.99k | } |
293 | 1.66k | } |
294 | | |
295 | | // If independent chroma search is used, need to allocate additional 84 candidate buffers |
296 | 1.66k | bool is_chroma_mode_0; |
297 | 1.66k | if (allintra) { |
298 | 1.66k | is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_allintra(enc_mode)) == |
299 | 1.66k | CHROMA_MODE_0; |
300 | 1.66k | } else if (scs->static_config.rtc) { |
301 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
302 | 0 | is_chroma_mode_0 = svt_aom_set_chroma_controls( |
303 | 0 | NULL, svt_aom_get_chroma_level_rtc(enc_mode, scs->use_flat_ipp)) == CHROMA_MODE_0; |
304 | | #else |
305 | | for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) { |
306 | | is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_rtc(enc_mode, is_i_slice)) == |
307 | | CHROMA_MODE_0; |
308 | | if (is_chroma_mode_0) { |
309 | | break; |
310 | | } |
311 | | } |
312 | | #endif |
313 | 0 | } else { |
314 | 0 | for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) { |
315 | 0 | is_chroma_mode_0 = svt_aom_set_chroma_controls( |
316 | 0 | NULL, svt_aom_get_chroma_level_default(enc_mode, is_i_slice)) == CHROMA_MODE_0; |
317 | 0 | if (is_chroma_mode_0) { |
318 | 0 | break; |
319 | 0 | } |
320 | 0 | } |
321 | 0 | } |
322 | 1.66k | const uint8_t ind_uv_cands = is_chroma_mode_0 ? 84 : 0; |
323 | 1.66k | max_nics += CAND_CLASS_TOTAL; //need one extra temp buffer for each fast loop call |
324 | 1.66k | ctx->max_nics = max_nics; |
325 | 1.66k | ctx->max_nics_uv = max_nics + ind_uv_cands; |
326 | | // Cfl scratch memory |
327 | 1.66k | if (ctx->hbd_md > EB_8_BIT_MD) { |
328 | 0 | EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon16bit, sizeof(uint16_t) * sb_size * sb_size); |
329 | 0 | } |
330 | 1.66k | if (ctx->hbd_md != EB_10_BIT_MD) { |
331 | 1.66k | EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon, sizeof(uint8_t) * sb_size * sb_size); |
332 | 1.66k | } |
333 | 1.66k | EB_MALLOC_ALIGNED(ctx->pred_buf_q3, CFL_BUF_SQUARE); |
334 | 1.66k | uint8_t use_update_cdf = 0; |
335 | 1.66k | if (allintra) { |
336 | 1.66k | use_update_cdf = svt_aom_get_update_cdf_level_allintra(enc_mode); |
337 | 1.66k | } else if (rtc_tune) { |
338 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
339 | 0 | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
340 | 0 | if (use_update_cdf) { |
341 | 0 | break; |
342 | 0 | } |
343 | 0 | use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice); |
344 | 0 | } |
345 | | #else |
346 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
347 | | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
348 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
349 | | if (use_update_cdf) { |
350 | | break; |
351 | | } |
352 | | use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice, is_base, sc_class1); |
353 | | } |
354 | | } |
355 | | } |
356 | | #endif |
357 | 0 | } else { |
358 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
359 | 0 | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
360 | 0 | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
361 | 0 | if (use_update_cdf) { |
362 | 0 | break; |
363 | 0 | } |
364 | 0 | use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base); |
365 | 0 | } |
366 | 0 | } |
367 | | #else |
368 | | for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) { |
369 | | for (uint8_t is_islice = 0; is_islice < 2; is_islice++) { |
370 | | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
371 | | if (use_update_cdf) { |
372 | | break; |
373 | | } |
374 | | use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base, sc_class1); |
375 | | } |
376 | | } |
377 | | } |
378 | | #endif |
379 | 0 | } |
380 | 1.66k | if (use_update_cdf) { |
381 | 0 | EB_CALLOC_ARRAY(ctx->rate_est_table, 1); |
382 | 1.66k | } else { |
383 | 1.66k | ctx->rate_est_table = NULL; |
384 | 1.66k | } |
385 | | // Allocate buffer for inter-inter compound prediction |
386 | 1.66k | if (get_inter_compound_level(enc_mode)) { |
387 | 0 | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
388 | 0 | for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) { |
389 | 0 | EB_MALLOC(ctx->cmp_store.pred0_buf[i], sb_size * sb_size * bits * sizeof(uint8_t)); |
390 | 0 | EB_MALLOC(ctx->cmp_store.pred1_buf[i], sb_size * sb_size * bits * sizeof(uint8_t)); |
391 | 0 | } |
392 | 0 | EB_MALLOC(ctx->residual1, sb_size * sb_size * sizeof(ctx->residual1[0])); |
393 | 0 | EB_MALLOC(ctx->diff10, sb_size * sb_size * sizeof(ctx->diff10[0])); |
394 | 0 | } |
395 | | |
396 | | // Allocate buffer for inter-intra prediction |
397 | 1.66k | uint8_t ii_allowed = 0; |
398 | 4.99k | for (uint8_t transition_present = 0; transition_present < 2; transition_present++) { |
399 | 3.32k | if (ii_allowed) { |
400 | 0 | break; |
401 | 0 | } |
402 | 3.32k | ii_allowed |= svt_aom_get_inter_intra_level(enc_mode, transition_present); |
403 | 3.32k | } |
404 | 1.66k | if (ii_allowed) { |
405 | 0 | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
406 | | // MAX block size for inter intra is 32x32 |
407 | 0 | EB_MALLOC_2D(ctx->intrapred_buf, INTERINTRA_MODES, 32 * 32 * bits * sizeof(ctx->intrapred_buf[0][0])); |
408 | 0 | } |
409 | | |
410 | | // Allocate buffers for obmc prediction |
411 | 1.66k | uint8_t obmc_allowed = 0; |
412 | 4.99k | for (uint8_t is_base = 0; is_base < 2; is_base++) { |
413 | 4.99k | for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) { |
414 | 4.99k | if (obmc_allowed) { |
415 | 3.32k | break; |
416 | 3.32k | } |
417 | 1.66k | #if TUNE_SHIFT_PRESETS_RTC |
418 | 1.66k | obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod, rtc_tune); |
419 | | #else |
420 | | obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod); |
421 | | #endif |
422 | 1.66k | } |
423 | 3.32k | } |
424 | 1.66k | if (obmc_allowed) { |
425 | 1.66k | const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1; |
426 | 1.66k | EB_MALLOC(ctx->obmc_buff_0, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_0[0])); |
427 | 1.66k | EB_MALLOC(ctx->obmc_buff_1, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_1[0])); |
428 | 1.66k | EB_MALLOC(ctx->wsrc_buf, sb_size * sb_size * sizeof(ctx->wsrc_buf[0])); |
429 | 1.66k | EB_MALLOC(ctx->mask_buf, sb_size * sb_size * sizeof(ctx->mask_buf[0])); |
430 | 1.66k | } |
431 | 1.66k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq, block_max_count_sb); |
432 | | // Fast Candidate Array |
433 | 1.66k | uint16_t max_can_count = svt_aom_get_max_can_count(enc_mode) + ind_uv_cands; |
434 | 1.66k | EB_MALLOC_ARRAY(ctx->fast_cand_array, max_can_count); |
435 | | |
436 | 317k | for (cand_index = 0; cand_index < max_can_count; ++cand_index) { |
437 | 316k | ctx->fast_cand_array[cand_index].palette_info = NULL; |
438 | 316k | } |
439 | 1.66k | svt_aom_assert_err(max_can_count > ind_uv_cands, "Max. candidates is too low"); |
440 | 1.66k | EB_MALLOC_2D(ctx->injected_mvs, (uint16_t)(max_can_count - ind_uv_cands), 2); |
441 | 1.66k | EB_MALLOC_ARRAY(ctx->injected_ref_types, (max_can_count - ind_uv_cands)); |
442 | | |
443 | | // Set buffers for MD palette search to NULL; will be init'd at runtime if needed |
444 | 1.66k | ctx->palette_buffer = NULL; |
445 | 1.66k | ctx->palette_cand_array = NULL; |
446 | 1.66k | ctx->palette_size_array_0 = NULL; |
447 | | |
448 | | // Cost Arrays |
449 | 1.66k | EB_MALLOC_ARRAY(ctx->fast_cost_array, ctx->max_nics_uv); |
450 | 1.66k | EB_MALLOC_ARRAY(ctx->full_cost_array, ctx->max_nics_uv); |
451 | 1.66k | EB_MALLOC_ARRAY(ctx->full_cost_ssim_array, ctx->max_nics_uv); |
452 | | // Candidate Buffers |
453 | 1.66k | EB_NEW(ctx->cand_bf_tx_depth_1, |
454 | 1.66k | svt_aom_mode_decision_scratch_cand_bf_ctor, |
455 | 1.66k | sb_size, |
456 | 1.66k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT); |
457 | | |
458 | 1.66k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_1->cand, 1); |
459 | 1.66k | EB_NEW(ctx->cand_bf_tx_depth_2, |
460 | 1.66k | svt_aom_mode_decision_scratch_cand_bf_ctor, |
461 | 1.66k | sb_size, |
462 | 1.66k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT); |
463 | | |
464 | 1.66k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_2->cand, 1); |
465 | 6.65k | for (int i = 0; i < 3; i++) { |
466 | 4.99k | ctx->md_blk_arr_nsq[0].neigh_left_recon[i] = NULL; |
467 | 4.99k | ctx->md_blk_arr_nsq[0].neigh_top_recon[i] = NULL; |
468 | 4.99k | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[i] = NULL; |
469 | 4.99k | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[i] = NULL; |
470 | 4.99k | } |
471 | 1.66k | uint32_t coded_leaf_index; |
472 | 1.66k | uint16_t sz = sizeof(uint16_t); |
473 | 1.66k | if (ctx->hbd_md > EB_8_BIT_MD) { |
474 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0], block_max_count_sb * sb_size * sz); |
475 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0], block_max_count_sb * sb_size * sz); |
476 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1); |
477 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1); |
478 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1); |
479 | 0 | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1); |
480 | | |
481 | 0 | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
482 | 0 | size_t offset = coded_leaf_index * sb_size * sz; |
483 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[0] = |
484 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0] + offset; |
485 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[0] = |
486 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0] + offset; |
487 | 0 | offset >>= 1; |
488 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[1] = |
489 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1] + offset; |
490 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[1] = |
491 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1] + offset; |
492 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[2] = |
493 | 0 | ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2] + offset; |
494 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[2] = |
495 | 0 | ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2] + offset; |
496 | 0 | } |
497 | 0 | } |
498 | 1.66k | if (ctx->hbd_md != EB_10_BIT_MD) { |
499 | 1.66k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[0], block_max_count_sb * sb_size); |
500 | 1.66k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[0], block_max_count_sb * sb_size); |
501 | 1.66k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[1], block_max_count_sb * sb_size >> 1); |
502 | 1.66k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[1], block_max_count_sb * sb_size >> 1); |
503 | 1.66k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[2], block_max_count_sb * sb_size >> 1); |
504 | 1.66k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[2], block_max_count_sb * sb_size >> 1); |
505 | | |
506 | 143k | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
507 | 141k | size_t offset = coded_leaf_index * sb_size; |
508 | 141k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[0] = ctx->md_blk_arr_nsq[0].neigh_left_recon[0] + |
509 | 141k | offset; |
510 | 141k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[0] = ctx->md_blk_arr_nsq[0].neigh_top_recon[0] + |
511 | 141k | offset; |
512 | 141k | offset >>= 1; |
513 | 141k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[1] = ctx->md_blk_arr_nsq[0].neigh_left_recon[1] + |
514 | 141k | offset; |
515 | 141k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[1] = ctx->md_blk_arr_nsq[0].neigh_top_recon[1] + |
516 | 141k | offset; |
517 | 141k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[2] = ctx->md_blk_arr_nsq[0].neigh_left_recon[2] + |
518 | 141k | offset; |
519 | 141k | ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[2] = ctx->md_blk_arr_nsq[0].neigh_top_recon[2] + |
520 | 141k | offset; |
521 | 141k | } |
522 | 1.66k | } |
523 | 1.66k | ctx->md_blk_arr_nsq[0].av1xd = NULL; |
524 | 1.66k | EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].av1xd, block_max_count_sb); |
525 | | |
526 | | // Alloc mds and pc_tree, which are used to track tested blocks in MD |
527 | 1.66k | bool disallow_4x4 = allintra ? svt_aom_get_disallow_4x4_allintra(enc_mode) |
528 | 1.66k | #if TUNE_SIMPLIFY_SETTINGS |
529 | 1.66k | : rtc_tune ? svt_aom_get_disallow_4x4_rtc() |
530 | | #else |
531 | | : rtc_tune ? svt_aom_get_disallow_4x4_rtc(enc_mode) |
532 | | #endif |
533 | 0 | : svt_aom_get_disallow_4x4_default(enc_mode); |
534 | 1.66k | bool disallow_8x8 = allintra ? svt_aom_get_disallow_8x8_allintra() |
535 | 1.66k | : rtc_tune ? svt_aom_get_disallow_8x8_rtc(enc_mode, scs->max_input_luma_width, scs->max_input_luma_height) |
536 | 0 | : svt_aom_get_disallow_8x8_default(); |
537 | 1.66k | uint8_t min_bsize = disallow_8x8 ? 16 : disallow_4x4 ? 8 : 4; |
538 | 1.66k | int blocks_per_depth = (sb_size / min_bsize) * (sb_size / min_bsize); |
539 | 1.66k | int blocks_to_alloc = 0; |
540 | | |
541 | 8.32k | for (int i = min_bsize; i <= sb_size; i <<= 1, blocks_per_depth >>= 2) { |
542 | 6.65k | blocks_to_alloc += blocks_per_depth; |
543 | 6.65k | } |
544 | 1.66k | EB_CALLOC_ARRAY(ctx->mds, blocks_to_alloc); |
545 | 1.66k | uint32_t mds_idx = 0; |
546 | 1.66k | setup_mds(scs, ctx->mds, &mds_idx, 0, scs->seq_header.sb_size, min_bsize); |
547 | 1.66k | EB_CALLOC_ARRAY(ctx->pc_tree, blocks_to_alloc); |
548 | 1.66k | EB_MALLOC_ARRAY(ctx->tested_blk, blocks_to_alloc); |
549 | 1.66k | setup_pc_tree(ctx->pc_tree, ctx->tested_blk, 0, scs->seq_header.sb_size, min_bsize); |
550 | 1.66k | ctx->blocks_to_alloc = blocks_to_alloc; |
551 | | |
552 | 1.66k | bool bypass_encdec = allintra ? svt_aom_get_bypass_encdec_allintra(enc_mode) |
553 | 1.66k | : rtc_tune ? svt_aom_get_bypass_encdec_rtc(enc_mode, encoder_bit_depth) |
554 | 0 | : svt_aom_get_bypass_encdec_default(enc_mode, encoder_bit_depth); |
555 | 143k | for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) { |
556 | 141k | ctx->md_blk_arr_nsq[coded_leaf_index].av1xd = ctx->md_blk_arr_nsq[0].av1xd + coded_leaf_index; |
557 | 141k | ctx->md_blk_arr_nsq[coded_leaf_index].segment_id = 0; |
558 | 141k | const BlockGeom* blk_geom = get_blk_geom_mds(scs->blk_geom_mds, coded_leaf_index); |
559 | 141k | if (bypass_encdec) { |
560 | 141k | EbPictureBufferDescInitData init_data; |
561 | | |
562 | 141k | init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
563 | 141k | init_data.max_width = blk_geom->bwidth; |
564 | 141k | init_data.max_height = blk_geom->bheight; |
565 | 141k | init_data.bit_depth = EB_THIRTYTWO_BIT; |
566 | 141k | init_data.color_format = (blk_geom->bwidth > 4 && blk_geom->bheight > 4) |
567 | 141k | ? EB_YUV420 |
568 | 141k | : EB_YUV444; // PW - must have at least 4x4 for chroma coeffs |
569 | 141k | init_data.border = 0; |
570 | 141k | init_data.split_mode = false; |
571 | | |
572 | 141k | EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data); |
573 | | |
574 | 141k | init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
575 | 141k | init_data.max_width = blk_geom->bwidth; |
576 | 141k | init_data.max_height = blk_geom->bheight; |
577 | 141k | init_data.bit_depth = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
578 | 141k | ; |
579 | 141k | init_data.color_format = (blk_geom->bwidth > 4 && blk_geom->bheight > 4) ? EB_YUV420 : EB_YUV444; |
580 | 141k | init_data.border = 0; |
581 | 141k | init_data.split_mode = false; |
582 | | |
583 | 141k | EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data); |
584 | 141k | } else { |
585 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp = NULL; |
586 | 0 | ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp = NULL; |
587 | 0 | } |
588 | 141k | } |
589 | 9.98k | for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) { |
590 | 8.32k | EB_MALLOC_ARRAY(ctx->cand_buff_indices[cand_class_it], ctx->max_nics_uv); |
591 | 8.32k | } |
592 | | |
593 | 1.66k | EB_MALLOC_ARRAY(ctx->best_candidate_index_array, ctx->max_nics_uv); |
594 | 1.66k | EB_MALLOC_ARRAY(ctx->above_txfm_context, (sb_size >> MI_SIZE_LOG2)); |
595 | 1.66k | EB_MALLOC_ARRAY(ctx->left_txfm_context, (sb_size >> MI_SIZE_LOG2)); |
596 | 1.66k | EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data; |
597 | 1.66k | EbPictureBufferDescInitData picture_buffer_desc_init_data; |
598 | | |
599 | 1.66k | picture_buffer_desc_init_data.max_width = sb_size; |
600 | 1.66k | picture_buffer_desc_init_data.max_height = sb_size; |
601 | 1.66k | picture_buffer_desc_init_data.bit_depth = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
602 | 1.66k | picture_buffer_desc_init_data.color_format = EB_YUV420; |
603 | 1.66k | picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
604 | 1.66k | picture_buffer_desc_init_data.border = 0; |
605 | 1.66k | picture_buffer_desc_init_data.split_mode = false; |
606 | 1.66k | picture_buffer_desc_init_data.is_16bit_pipeline = false; |
607 | | |
608 | 1.66k | thirty_two_width_picture_buffer_desc_init_data.max_width = sb_size; |
609 | 1.66k | thirty_two_width_picture_buffer_desc_init_data.max_height = sb_size; |
610 | 1.66k | thirty_two_width_picture_buffer_desc_init_data.bit_depth = EB_THIRTYTWO_BIT; |
611 | 1.66k | thirty_two_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
612 | 1.66k | thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
613 | 1.66k | thirty_two_width_picture_buffer_desc_init_data.border = 0; |
614 | 1.66k | thirty_two_width_picture_buffer_desc_init_data.split_mode = false; |
615 | 1.66k | thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline = false; |
616 | 28.2k | for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) { |
617 | 26.6k | EB_NEW(ctx->recon_coeff_ptr[txt_itr], |
618 | 26.6k | svt_picture_buffer_desc_ctor, |
619 | 26.6k | (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
620 | 26.6k | EB_NEW(ctx->recon_ptr[txt_itr], svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
621 | 26.6k | EB_NEW(ctx->quant_coeff_ptr[txt_itr], |
622 | 26.6k | svt_picture_buffer_desc_ctor, |
623 | 26.6k | (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
624 | 26.6k | } |
625 | 1.66k | EB_NEW(ctx->tx_coeffs, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data); |
626 | 1.66k | EB_NEW(ctx->scratch_prediction_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
627 | 1.66k | EbPictureBufferDescInitData double_width_picture_buffer_desc_init_data; |
628 | 1.66k | double_width_picture_buffer_desc_init_data.max_width = sb_size; |
629 | 1.66k | double_width_picture_buffer_desc_init_data.max_height = sb_size; |
630 | 1.66k | double_width_picture_buffer_desc_init_data.bit_depth = EB_SIXTEEN_BIT; |
631 | 1.66k | double_width_picture_buffer_desc_init_data.color_format = EB_YUV420; |
632 | 1.66k | double_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK; |
633 | 1.66k | double_width_picture_buffer_desc_init_data.border = 0; |
634 | 1.66k | double_width_picture_buffer_desc_init_data.split_mode = false; |
635 | 1.66k | double_width_picture_buffer_desc_init_data.is_16bit_pipeline = false; |
636 | | |
637 | | // The temp_recon_ptr and temp_residual will be shared by all candidates |
638 | | // If you want to do something with residual or recon, you need to create one |
639 | 1.66k | EB_NEW(ctx->temp_recon_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data); |
640 | 1.66k | EB_NEW(ctx->temp_residual, svt_picture_buffer_desc_ctor, (EbPtr)&double_width_picture_buffer_desc_init_data); |
641 | | |
642 | | // Candidate Buffers |
643 | 1.66k | EB_ALLOC_PTR_ARRAY(ctx->cand_bf_ptr_array, ctx->max_nics_uv); |
644 | | |
645 | 18.3k | for (buffer_index = 0; buffer_index < ctx->max_nics; ++buffer_index) { |
646 | 16.6k | EB_NEW(ctx->cand_bf_ptr_array[buffer_index], |
647 | 16.6k | svt_aom_mode_decision_cand_bf_ctor, |
648 | 16.6k | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
649 | 16.6k | sb_size, |
650 | 16.6k | PICTURE_BUFFER_DESC_FULL_MASK, |
651 | 16.6k | ctx->temp_residual, |
652 | 16.6k | ctx->temp_recon_ptr, |
653 | 16.6k | &(ctx->fast_cost_array[buffer_index]), |
654 | 16.6k | &(ctx->full_cost_array[buffer_index]), |
655 | 16.6k | &(ctx->full_cost_ssim_array[buffer_index])); |
656 | 16.6k | } |
657 | | |
658 | 1.66k | for (buffer_index = max_nics; buffer_index < ctx->max_nics_uv; ++buffer_index) { |
659 | 0 | EB_NEW(ctx->cand_bf_ptr_array[buffer_index], |
660 | 0 | svt_aom_mode_decision_cand_bf_ctor, |
661 | 0 | ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT, |
662 | 0 | sb_size, |
663 | 0 | PICTURE_BUFFER_DESC_CHROMA_MASK, |
664 | 0 | ctx->temp_residual, |
665 | 0 | ctx->temp_recon_ptr, |
666 | 0 | &(ctx->fast_cost_array[buffer_index]), |
667 | 0 | &(ctx->full_cost_array[buffer_index]), |
668 | 0 | &(ctx->full_cost_ssim_array[buffer_index])); |
669 | 0 | } |
670 | | |
671 | 1.66k | return EB_ErrorNone; |
672 | 1.66k | } |
673 | | |
674 | | /************************************************** |
675 | | * Reset Mode Decision Neighbor Arrays |
676 | | *************************************************/ |
677 | 5.67k | void svt_aom_reset_mode_decision_neighbor_arrays(PictureControlSet* pcs, uint16_t tile_idx) { |
678 | 5.67k | uint8_t depth; |
679 | 22.7k | for (depth = 0; depth < NA_TOT_CNT; depth++) { |
680 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->mdleaf_partition_na[depth][tile_idx]); |
681 | 17.0k | if (pcs->hbd_md != EB_10_BIT_MD) { |
682 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na[depth][tile_idx]); |
683 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na[depth][tile_idx]); |
684 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na[depth][tile_idx]); |
685 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na[depth][tile_idx]); |
686 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na[depth][tile_idx]); |
687 | 17.0k | } |
688 | 17.0k | if (pcs->hbd_md > EB_8_BIT_MD || (pcs->scs->encoder_bit_depth > EB_EIGHT_BIT && pcs->pic_bypass_encdec)) { |
689 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na_16bit[depth][tile_idx]); |
690 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na_16bit[depth][tile_idx]); |
691 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na_16bit[depth][tile_idx]); |
692 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na_16bit[depth][tile_idx]); |
693 | 0 | svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na_16bit[depth][tile_idx]); |
694 | 0 | } |
695 | | |
696 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_y_dcs_na[depth][tile_idx]); |
697 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_dc_sign_level_coeff_na[depth][tile_idx]); |
698 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_cb_dc_sign_level_coeff_na[depth][tile_idx]); |
699 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_cr_dc_sign_level_coeff_na[depth][tile_idx]); |
700 | 17.0k | svt_aom_neighbor_array_unit_reset(pcs->md_txfm_context_array[depth][tile_idx]); |
701 | 17.0k | } |
702 | | |
703 | 5.67k | return; |
704 | 5.67k | } |
705 | | |
706 | | // If the ref intra percentage is below the TH, applying modulation to the MD lambda |
707 | 0 | #define LAMBDA_MOD_INTRA_TH 50 |
708 | 0 | #define LAMBDA_MOD_INTRA_SCALING_FACTOR 138 |
709 | | |
710 | | // Set the lambda for each sb. |
711 | | // When lambda tuning is on (blk_lambda_tuning), lambda of each block is set separately (full_lambda_md/fast_lambda_md) |
712 | | // later in svt_aom_set_tuned_blk_lambda |
713 | | // Testing showed that updating SAD lambda based on frame info was not helpful; therefore, the SAD lambda generation is not changed. |
714 | 6.90k | static void av1_lambda_assign_md(PictureControlSet* pcs, ModeDecisionContext* ctx) { |
715 | 6.90k | ctx->full_lambda_md[0] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT); |
716 | 6.90k | ctx->fast_lambda_md[0] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT); |
717 | 6.90k | ctx->full_lambda_md[1] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT); |
718 | 6.90k | ctx->fast_lambda_md[1] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT); |
719 | | |
720 | 6.90k | if (!pcs->scs->static_config.rtc && pcs->scs->stats_based_sb_lambda_modulation) { |
721 | 6.90k | if (pcs->temporal_layer_index > 0) { |
722 | 0 | if (pcs->ref_intra_percentage < LAMBDA_MOD_INTRA_TH) { |
723 | 0 | ctx->full_lambda_md[0] = (ctx->full_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
724 | 0 | ctx->fast_lambda_md[0] = (ctx->fast_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
725 | 0 | ctx->full_lambda_md[1] = (ctx->full_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
726 | 0 | ctx->fast_lambda_md[1] = (ctx->fast_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7; |
727 | 0 | } |
728 | 0 | } |
729 | 6.90k | } |
730 | | |
731 | 6.90k | if (pcs->lambda_weight) { |
732 | 3.75k | ctx->full_lambda_md[0] = (uint32_t)((ctx->full_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7); |
733 | 3.75k | ctx->fast_lambda_md[0] = (uint32_t)((ctx->fast_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7); |
734 | 3.75k | ctx->full_lambda_md[1] = (uint32_t)((ctx->full_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7); |
735 | 3.75k | ctx->fast_lambda_md[1] = (uint32_t)((ctx->fast_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7); |
736 | 3.75k | } |
737 | 6.90k | ctx->full_lambda_md[1] *= 16; |
738 | 6.90k | ctx->fast_lambda_md[1] *= 4; |
739 | | |
740 | 6.90k | SequenceControlSet* scs = pcs->scs; |
741 | 6.90k | uint64_t scale_factor = scs->static_config.lambda_scale_factors[pcs->ppcs->update_type]; |
742 | 6.90k | ctx->full_lambda_md[0] = (uint32_t)((ctx->full_lambda_md[0] * scale_factor) >> 7); |
743 | 6.90k | ctx->full_lambda_md[1] = (uint32_t)((ctx->full_lambda_md[1] * scale_factor) >> 7); |
744 | 6.90k | ctx->fast_lambda_md[0] = (uint32_t)((ctx->fast_lambda_md[0] * scale_factor) >> 7); |
745 | 6.90k | ctx->fast_lambda_md[1] = (uint32_t)((ctx->fast_lambda_md[1] * scale_factor) >> 7); |
746 | | |
747 | 6.90k | ctx->full_sb_lambda_md[0] = ctx->full_lambda_md[0]; |
748 | 6.90k | ctx->full_sb_lambda_md[1] = ctx->full_lambda_md[1]; |
749 | 6.90k | } |
750 | | |
751 | | void svt_aom_reset_mode_decision(SequenceControlSet* scs, ModeDecisionContext* ctx, PictureControlSet* pcs, |
752 | 5.76k | uint16_t tile_group_idx, uint32_t segment_index) { |
753 | 5.76k | const bool rtc_tune = scs->static_config.rtc; |
754 | 5.76k | ctx->hbd_md = pcs->hbd_md; |
755 | | // Reset MD rate Estimation table to initial values by copying from md_rate_est_ctx |
756 | 5.76k | ctx->md_rate_est_ctx = pcs->md_rate_est_ctx; |
757 | | // Reset CABAC Contexts |
758 | | |
759 | | // Reset Neighbor Arrays at start of new Segment / Picture |
760 | 5.76k | if (segment_index == 0) { |
761 | 533 | for (uint16_t r = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_y; |
762 | 2.22k | r < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_y; |
763 | 1.68k | r++) { |
764 | 1.68k | for (uint16_t c = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_x; |
765 | 7.36k | c < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_x; |
766 | 5.67k | c++) { |
767 | 5.67k | uint16_t tile_idx = c + r * pcs->ppcs->av1_cm->tiles_info.tile_cols; |
768 | 5.67k | svt_aom_reset_mode_decision_neighbor_arrays(pcs, tile_idx); |
769 | 5.67k | } |
770 | 1.68k | } |
771 | 533 | (void)scs; |
772 | 533 | } |
773 | | //each segment enherits the bypass encdec from the picture level |
774 | 5.76k | ctx->bypass_encdec = pcs->pic_bypass_encdec; |
775 | | |
776 | 5.76k | if (!rtc_tune && (pcs->enc_mode <= ENC_M11 || pcs->temporal_layer_index != 0)) { |
777 | 5.76k | ctx->rtc_use_N4_dct_dct_shortcut = 1; |
778 | 5.76k | } else { |
779 | 0 | ctx->rtc_use_N4_dct_dct_shortcut = 0; |
780 | 0 | } |
781 | 5.76k | return; |
782 | 5.76k | } |
783 | | |
784 | | /****************************************************** |
785 | | * Mode Decision Configure SB |
786 | | ******************************************************/ |
787 | | void svt_aom_mode_decision_configure_sb(ModeDecisionContext* ctx, PictureControlSet* pcs, uint8_t sb_qp, |
788 | 6.90k | uint8_t me_sb_qp) { |
789 | | /* Note(CHKN) : when Qp modulation varies QP on a sub-SB(CU) basis, Lamda has to change based on Cu->QP , and then this code has to move inside the CU loop in MD */ |
790 | | |
791 | | // Lambda Assignement |
792 | 6.90k | ctx->qp_index = pcs->ppcs->frm_hdr.delta_q_params.delta_q_present || pcs->ppcs->r0_delta_qp_md |
793 | 6.90k | ? sb_qp |
794 | 6.90k | : (uint8_t)pcs->ppcs->frm_hdr.quantization_params.base_q_idx; |
795 | | |
796 | 6.90k | ctx->me_q_index = me_sb_qp; |
797 | | |
798 | 6.90k | av1_lambda_assign_md(pcs, ctx); |
799 | | |
800 | 6.90k | ctx->hbd_pack_done = 0; |
801 | | |
802 | 6.90k | return; |
803 | 6.90k | } |