Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/md_process.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
*
4
* This source code is subject to the terms of the BSD 2 Clause License and
5
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
* was not distributed with this source code in the LICENSE file, you can
7
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8
* Media Patent License 1.0 was not distributed with this source code in the
9
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
*/
11
12
#include <stdlib.h>
13
14
#include "utility.h"
15
#include "md_process.h"
16
#include "lambda_rate_tables.h"
17
#include "rc_process.h"
18
#include "enc_mode_config.h"
19
20
const uint8_t quantizer_to_qindex[64] = {
21
    0,   4,   8,   12,  16,  20,  24,  28,  32,  36,  40,  44,  48,  52,  56,  60,  64,  68,  72,  76,  80,  84,
22
    88,  92,  96,  100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172,
23
    176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255};
24
25
const int percents[2][FIXED_QP_OFFSET_COUNT] = {
26
    {75, 70, 60, 20, 15, 0}, {76, 60, 30, 15, 8, 4} // libaom offsets
27
};
28
29
const uint8_t uni_psy_bias[64] = {
30
    85, 85, 85, 85, 85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  95,  95,  95,  95,  95, 95,
31
    95, 95, 95, 95, 95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95, 95,
32
    95, 95, 95, 95, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
33
};
34
35
1.48k
static void mode_decision_context_dctor(EbPtr p) {
36
1.48k
    ModeDecisionContext* obj = (ModeDecisionContext*)p;
37
38
1.48k
    uint32_t block_max_count_sb = obj->init_max_block_cnt;
39
40
    // MD palette search
41
1.48k
    if (obj->palette_buffer) {
42
0
        EB_FREE(obj->palette_buffer);
43
0
    }
44
1.48k
    if (obj->palette_cand_array) {
45
        // Free fields in palette_cand_array before freeing palette_cand_array
46
0
        for (int cd = 0; cd < MAX_PAL_CAND; cd++) {
47
0
            if (obj->palette_cand_array[cd].color_idx_map) {
48
0
                EB_FREE_ARRAY(obj->palette_cand_array[cd].color_idx_map);
49
0
            }
50
0
        }
51
52
0
        EB_FREE_ARRAY(obj->palette_cand_array);
53
0
    }
54
1.48k
    if (obj->palette_size_array_0) {
55
0
        EB_FREE_ARRAY(obj->palette_size_array_0);
56
0
    }
57
8.90k
    for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
58
7.42k
        EB_FREE_ARRAY(obj->cand_buff_indices[cand_class_it]);
59
7.42k
    }
60
1.48k
    EB_FREE_ARRAY(obj->best_candidate_index_array);
61
62
1.48k
    EB_FREE_ARRAY(obj->above_txfm_context);
63
1.48k
    EB_FREE_ARRAY(obj->left_txfm_context);
64
127k
    for (uint32_t coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) {
65
126k
        if (obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp) {
66
126k
            EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp);
67
126k
        }
68
126k
        if (obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp) {
69
126k
            EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp);
70
126k
        }
71
126k
    }
72
1.48k
    EB_DELETE_PTR_ARRAY(obj->cand_bf_ptr_array, obj->max_nics_uv);
73
1.48k
    EB_FREE_ARRAY(obj->cand_bf_tx_depth_1->cand);
74
1.48k
    EB_DELETE(obj->cand_bf_tx_depth_1);
75
1.48k
    EB_FREE_ARRAY(obj->cand_bf_tx_depth_2->cand);
76
1.48k
    EB_DELETE(obj->cand_bf_tx_depth_2);
77
1.48k
    EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon16bit);
78
1.48k
    EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon);
79
1.48k
    EB_FREE_ALIGNED_ARRAY(obj->pred_buf_q3);
80
1.48k
    EB_FREE_ARRAY(obj->fast_cand_array);
81
1.48k
    EB_FREE_2D(obj->injected_mvs);
82
1.48k
    EB_FREE_ARRAY(obj->injected_ref_types);
83
1.48k
    EB_FREE_ARRAY(obj->fast_cost_array);
84
1.48k
    EB_FREE_ARRAY(obj->full_cost_array);
85
1.48k
    if (obj->md_blk_arr_nsq) {
86
5.93k
        for (int i = 0; i < 3; i++) {
87
4.45k
            EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon_16bit[i]);
88
4.45k
            EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon_16bit[i]);
89
4.45k
            EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon[i]);
90
4.45k
            EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon[i]);
91
4.45k
        }
92
1.48k
    }
93
1.48k
    if (obj->md_blk_arr_nsq) {
94
1.48k
        EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].av1xd);
95
1.48k
    }
96
1.48k
    EB_FREE_ARRAY(obj->mds);
97
1.48k
    EB_FREE_ARRAY(obj->pc_tree);
98
1.48k
    EB_FREE_ARRAY(obj->tested_blk);
99
1.48k
    obj->blocks_to_alloc = 0;
100
1.48k
    EB_FREE_ARRAY(obj->md_blk_arr_nsq);
101
1.48k
    if (obj->rate_est_table) {
102
0
        EB_FREE_ARRAY(obj->rate_est_table);
103
0
    }
104
105
7.42k
    for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) {
106
5.93k
        if (obj->cmp_store.pred0_buf[i]) {
107
0
            EB_FREE(obj->cmp_store.pred0_buf[i]);
108
0
        }
109
5.93k
        if (obj->cmp_store.pred1_buf[i]) {
110
0
            EB_FREE(obj->cmp_store.pred1_buf[i]);
111
0
        }
112
5.93k
    }
113
1.48k
    if (obj->residual1) {
114
0
        EB_FREE(obj->residual1);
115
0
    }
116
1.48k
    if (obj->diff10) {
117
0
        EB_FREE(obj->diff10);
118
0
    }
119
120
1.48k
    if (obj->intrapred_buf) {
121
0
        EB_FREE_2D(obj->intrapred_buf);
122
0
    }
123
124
1.48k
    if (obj->obmc_buff_0) {
125
1.48k
        EB_FREE(obj->obmc_buff_0);
126
1.48k
    }
127
1.48k
    if (obj->obmc_buff_1) {
128
1.48k
        EB_FREE(obj->obmc_buff_1);
129
1.48k
    }
130
1.48k
    if (obj->wsrc_buf) {
131
1.48k
        EB_FREE(obj->wsrc_buf);
132
1.48k
    }
133
1.48k
    if (obj->mask_buf) {
134
1.48k
        EB_FREE(obj->mask_buf);
135
1.48k
    }
136
25.2k
    for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) {
137
23.7k
        EB_DELETE(obj->recon_coeff_ptr[txt_itr]);
138
23.7k
        EB_DELETE(obj->recon_ptr[txt_itr]);
139
23.7k
        EB_DELETE(obj->quant_coeff_ptr[txt_itr]);
140
23.7k
    }
141
1.48k
    EB_DELETE(obj->tx_coeffs);
142
1.48k
    EB_DELETE(obj->scratch_prediction_ptr);
143
1.48k
    EB_DELETE(obj->temp_residual);
144
1.48k
    EB_DELETE(obj->temp_recon_ptr);
145
1.48k
    EB_FREE_ARRAY(obj->full_cost_ssim_array);
146
1.48k
}
147
148
void svt_aom_set_nics(SequenceControlSet* scs, NicScalingCtrls* scaling_ctrls, uint32_t mds1_count[CAND_CLASS_TOTAL],
149
                      uint32_t mds2_count[CAND_CLASS_TOTAL], uint32_t mds3_count[CAND_CLASS_TOTAL], uint8_t pic_type,
150
                      uint32_t qp);
151
152
static void setup_mds(SequenceControlSet* scs, MdScan* mds, uint32_t* mds_idx, int index, BlockSize bsize,
153
126k
                      const int min_sq_size) {
154
126k
    mds->mds_idx = *mds_idx;
155
126k
    mds->bsize   = bsize;
156
126k
    mds->index   = index;
157
158
    // If applicable, add split depths
159
126k
    const BlockGeom* blk_geom = get_blk_geom_mds(scs->blk_geom_mds, *mds_idx);
160
126k
    const int        sq_size  = block_size_wide[bsize];
161
126k
    if (sq_size > min_sq_size) {
162
31.1k
        const BlockSize subsize             = get_partition_subsize(bsize, PARTITION_SPLIT);
163
31.1k
        const int       sq_subsize          = block_size_wide[subsize];
164
31.1k
        int             blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size);
165
31.1k
        int             blocks_to_skip      = 0;
166
167
71.2k
        for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) {
168
40.0k
            blocks_to_skip += blocks_per_subdepth;
169
40.0k
        }
170
171
31.1k
        *mds_idx += blk_geom->d1_depth_offset;
172
155k
        for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
173
124k
            mds->split[i] = mds + i * blocks_to_skip + 1;
174
124k
            setup_mds(scs, mds->split[i], mds_idx, i, subsize, min_sq_size);
175
124k
        }
176
94.9k
    } else {
177
94.9k
        *mds_idx += blk_geom->ns_depth_offset;
178
94.9k
    }
179
126k
}
180
181
static void setup_pc_tree(PC_TREE* pc_tree, bool (*test_blk_array)[PART_S][4], int index, BlockSize bsize,
182
126k
                          const int min_sq_size) {
183
126k
    pc_tree->bsize      = bsize;
184
126k
    pc_tree->index      = index;
185
126k
    pc_tree->tested_blk = test_blk_array[0];
186
187
    // If applicable, add split depths
188
126k
    const int sq_size = block_size_wide[bsize];
189
126k
    if (sq_size > min_sq_size) {
190
31.1k
        const BlockSize subsize             = get_partition_subsize(bsize, PARTITION_SPLIT);
191
31.1k
        const int       sq_subsize          = block_size_wide[subsize];
192
31.1k
        int             blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size);
193
31.1k
        int             blocks_to_skip      = 0;
194
195
71.2k
        for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) {
196
40.0k
            blocks_to_skip += blocks_per_subdepth;
197
40.0k
        }
198
199
155k
        for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
200
124k
            pc_tree->split[i]         = pc_tree + i * blocks_to_skip + 1;
201
124k
            pc_tree->split[i]->parent = pc_tree;
202
124k
            setup_pc_tree(pc_tree->split[i], test_blk_array + i * blocks_to_skip + 1, i, subsize, min_sq_size);
203
124k
        }
204
31.1k
    }
205
126k
}
206
207
/******************************************************
208
 * Mode Decision Context Constructor
209
 ******************************************************/
210
EbErrorType svt_aom_mode_decision_context_ctor(ModeDecisionContext* ctx, SequenceControlSet* scs,
211
                                               EbColorFormat color_format, uint8_t sb_size, EncMode enc_mode,
212
                                               uint16_t max_block_cnt, uint32_t encoder_bit_depth,
213
                                               EbFifo* mode_decision_configuration_input_fifo_ptr,
214
                                               EbFifo* mode_decision_output_fifo_ptr, uint8_t enable_hbd_mode_decision,
215
1.48k
                                               uint8_t seq_qp_mod) {
216
1.48k
    const bool allintra = scs->allintra;
217
1.48k
    const bool rtc_tune = scs->static_config.rtc;
218
1.48k
    uint32_t   buffer_index;
219
1.48k
    uint32_t   cand_index;
220
221
1.48k
    ctx->init_max_block_cnt     = max_block_cnt;
222
1.48k
    uint32_t block_max_count_sb = max_block_cnt;
223
224
1.48k
    ctx->sb_size = sb_size;
225
1.48k
    (void)color_format;
226
227
1.48k
    ctx->dctor  = mode_decision_context_dctor;
228
1.48k
    ctx->hbd_md = enable_hbd_mode_decision;
229
230
    // Input/Output System Resource Manager FIFOs
231
1.48k
    ctx->mode_decision_configuration_input_fifo_ptr = mode_decision_configuration_input_fifo_ptr;
232
1.48k
    ctx->mode_decision_output_fifo_ptr              = mode_decision_output_fifo_ptr;
233
234
    // Maximum number of candidates MD can support
235
    // determine MAX_NICS for a given preset
236
    // get the min scaling level (the smallest scaling level is the most conservative)
237
1.48k
    uint8_t min_nic_scaling_level = NICS_SCALING_LEVELS - 1;
238
1.48k
    uint8_t stage1_scaling_num;
239
1.48k
    if (allintra) {
240
1.48k
        uint8_t nic_level  = svt_aom_get_nic_level_allintra(enc_mode);
241
1.48k
        stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1];
242
1.48k
    } else if (rtc_tune) {
243
0
#if TUNE_SIMPLIFY_SETTINGS
244
0
        uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode);
245
#else
246
        uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode, scs->use_flat_ipp);
247
#endif
248
0
        stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1];
249
0
    } else {
250
0
#if TUNE_SIMPLIFY_SETTINGS
251
0
        for (uint8_t is_base = 0; is_base < 2; is_base++) {
252
0
            uint8_t nic_level         = svt_aom_get_nic_level_default(enc_mode, is_base);
253
0
            uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level);
254
0
            min_nic_scaling_level     = MIN(min_nic_scaling_level, nic_scaling_level);
255
0
        }
256
#else
257
        for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) {
258
            for (uint8_t is_base = 0; is_base < 2; is_base++) {
259
                uint8_t nic_level         = svt_aom_get_nic_level_default(enc_mode, is_base, sc_class1);
260
                uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level);
261
                min_nic_scaling_level     = MIN(min_nic_scaling_level, nic_scaling_level);
262
            }
263
        }
264
#endif
265
266
0
        stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[min_nic_scaling_level][MD_STAGE_1];
267
0
    }
268
    // scale max_nics
269
1.48k
    uint32_t max_nics = 0;
270
1.48k
    {
271
1.48k
        NicScalingCtrls scaling_ctrls;
272
1.48k
        scaling_ctrls.stage1_scaling_num = stage1_scaling_num;
273
1.48k
        scaling_ctrls.stage2_scaling_num = stage1_scaling_num;
274
1.48k
        scaling_ctrls.stage3_scaling_num = stage1_scaling_num;
275
1.48k
        uint32_t mds1_count[CAND_CLASS_TOTAL];
276
1.48k
        uint32_t mds2_count[CAND_CLASS_TOTAL];
277
1.48k
        uint32_t mds3_count[CAND_CLASS_TOTAL];
278
5.93k
        for (uint8_t pic_type = 0; pic_type < NICS_PIC_TYPE; pic_type++) {
279
289k
            for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) {
280
284k
                svt_aom_set_nics(scs, &scaling_ctrls, mds1_count, mds2_count, mds3_count, pic_type, qp);
281
282
284k
                uint32_t nics = 0;
283
1.70M
                for (CandClass cidx = CAND_CLASS_0; cidx < CAND_CLASS_TOTAL; cidx++) {
284
1.42M
                    nics += mds1_count[cidx];
285
1.42M
                }
286
284k
                max_nics = MAX(max_nics, nics);
287
284k
            }
288
4.45k
        }
289
1.48k
    }
290
291
    // If independent chroma search is used, need to allocate additional 84 candidate buffers
292
1.48k
    bool is_chroma_mode_0;
293
1.48k
    if (allintra) {
294
1.48k
        is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_allintra(enc_mode)) ==
295
1.48k
            CHROMA_MODE_0;
296
1.48k
    } else if (scs->static_config.rtc) {
297
0
#if TUNE_SIMPLIFY_SETTINGS
298
0
        is_chroma_mode_0 = svt_aom_set_chroma_controls(
299
0
                               NULL, svt_aom_get_chroma_level_rtc(enc_mode, scs->use_flat_ipp)) == CHROMA_MODE_0;
300
#else
301
        for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) {
302
            is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_rtc(enc_mode, is_i_slice)) ==
303
                CHROMA_MODE_0;
304
            if (is_chroma_mode_0) {
305
                break;
306
            }
307
        }
308
#endif
309
0
    } else {
310
0
        for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) {
311
0
            is_chroma_mode_0 = svt_aom_set_chroma_controls(
312
0
                                   NULL, svt_aom_get_chroma_level_default(enc_mode, is_i_slice)) == CHROMA_MODE_0;
313
0
            if (is_chroma_mode_0) {
314
0
                break;
315
0
            }
316
0
        }
317
0
    }
318
1.48k
    const uint8_t ind_uv_cands = is_chroma_mode_0 ? 84 : 0;
319
1.48k
    max_nics += CAND_CLASS_TOTAL; //need one extra temp buffer for each fast loop call
320
1.48k
    ctx->max_nics    = max_nics;
321
1.48k
    ctx->max_nics_uv = max_nics + ind_uv_cands;
322
    // Cfl scratch memory
323
1.48k
    if (ctx->hbd_md > EB_8_BIT_MD) {
324
0
        EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon16bit, sizeof(uint16_t) * sb_size * sb_size);
325
0
    }
326
1.48k
    if (ctx->hbd_md != EB_10_BIT_MD) {
327
1.48k
        EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon, sizeof(uint8_t) * sb_size * sb_size);
328
1.48k
    }
329
1.48k
    EB_MALLOC_ALIGNED(ctx->pred_buf_q3, CFL_BUF_SQUARE);
330
1.48k
    uint8_t use_update_cdf = 0;
331
1.48k
    if (allintra) {
332
1.48k
        use_update_cdf = svt_aom_get_update_cdf_level_allintra(enc_mode);
333
1.48k
    } else if (rtc_tune) {
334
0
#if TUNE_SIMPLIFY_SETTINGS
335
0
        for (uint8_t is_islice = 0; is_islice < 2; is_islice++) {
336
0
            if (use_update_cdf) {
337
0
                break;
338
0
            }
339
0
            use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice);
340
0
        }
341
#else
342
        for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) {
343
            for (uint8_t is_islice = 0; is_islice < 2; is_islice++) {
344
                for (uint8_t is_base = 0; is_base < 2; is_base++) {
345
                    if (use_update_cdf) {
346
                        break;
347
                    }
348
                    use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice, is_base, sc_class1);
349
                }
350
            }
351
        }
352
#endif
353
0
    } else {
354
0
#if TUNE_SIMPLIFY_SETTINGS
355
0
        for (uint8_t is_islice = 0; is_islice < 2; is_islice++) {
356
0
            for (uint8_t is_base = 0; is_base < 2; is_base++) {
357
0
                if (use_update_cdf) {
358
0
                    break;
359
0
                }
360
0
                use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base);
361
0
            }
362
0
        }
363
#else
364
        for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) {
365
            for (uint8_t is_islice = 0; is_islice < 2; is_islice++) {
366
                for (uint8_t is_base = 0; is_base < 2; is_base++) {
367
                    if (use_update_cdf) {
368
                        break;
369
                    }
370
                    use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base, sc_class1);
371
                }
372
            }
373
        }
374
#endif
375
0
    }
376
1.48k
    if (use_update_cdf) {
377
0
        EB_CALLOC_ARRAY(ctx->rate_est_table, 1);
378
1.48k
    } else {
379
1.48k
        ctx->rate_est_table = NULL;
380
1.48k
    }
381
    // Allocate buffer for inter-inter compound prediction
382
1.48k
    if (get_inter_compound_level(enc_mode)) {
383
0
        const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1;
384
0
        for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) {
385
0
            EB_MALLOC(ctx->cmp_store.pred0_buf[i], sb_size * sb_size * bits * sizeof(uint8_t));
386
0
            EB_MALLOC(ctx->cmp_store.pred1_buf[i], sb_size * sb_size * bits * sizeof(uint8_t));
387
0
        }
388
0
        EB_MALLOC(ctx->residual1, sb_size * sb_size * sizeof(ctx->residual1[0]));
389
0
        EB_MALLOC(ctx->diff10, sb_size * sb_size * sizeof(ctx->diff10[0]));
390
0
    }
391
392
    // Allocate buffer for inter-intra prediction
393
1.48k
    uint8_t ii_allowed = 0;
394
4.45k
    for (uint8_t transition_present = 0; transition_present < 2; transition_present++) {
395
2.96k
        if (ii_allowed) {
396
0
            break;
397
0
        }
398
2.96k
        ii_allowed |= svt_aom_get_inter_intra_level(enc_mode, transition_present);
399
2.96k
    }
400
1.48k
    if (ii_allowed) {
401
0
        const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1;
402
        // MAX block size for inter intra is 32x32
403
0
        EB_MALLOC_2D(ctx->intrapred_buf, INTERINTRA_MODES, 32 * 32 * bits * sizeof(ctx->intrapred_buf[0][0]));
404
0
    }
405
406
    // Allocate buffers for obmc prediction
407
1.48k
    uint8_t obmc_allowed = 0;
408
4.45k
    for (uint8_t is_base = 0; is_base < 2; is_base++) {
409
4.45k
        for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) {
410
4.45k
            if (obmc_allowed) {
411
2.96k
                break;
412
2.96k
            }
413
1.48k
#if TUNE_SHIFT_PRESETS_RTC
414
1.48k
            obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod, rtc_tune);
415
#else
416
            obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod);
417
#endif
418
1.48k
        }
419
2.96k
    }
420
1.48k
    if (obmc_allowed) {
421
1.48k
        const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1;
422
1.48k
        EB_MALLOC(ctx->obmc_buff_0, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_0[0]));
423
1.48k
        EB_MALLOC(ctx->obmc_buff_1, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_1[0]));
424
1.48k
        EB_MALLOC(ctx->wsrc_buf, sb_size * sb_size * sizeof(ctx->wsrc_buf[0]));
425
1.48k
        EB_MALLOC(ctx->mask_buf, sb_size * sb_size * sizeof(ctx->mask_buf[0]));
426
1.48k
    }
427
1.48k
    EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq, block_max_count_sb);
428
    // Fast Candidate Array
429
1.48k
    uint16_t max_can_count = svt_aom_get_max_can_count(enc_mode) + ind_uv_cands;
430
1.48k
    EB_MALLOC_ARRAY(ctx->fast_cand_array, max_can_count);
431
432
283k
    for (cand_index = 0; cand_index < max_can_count; ++cand_index) {
433
281k
        ctx->fast_cand_array[cand_index].palette_info = NULL;
434
281k
    }
435
1.48k
    svt_aom_assert_err(max_can_count > ind_uv_cands, "Max. candidates is too low");
436
1.48k
    EB_MALLOC_2D(ctx->injected_mvs, (uint16_t)(max_can_count - ind_uv_cands), 2);
437
1.48k
    EB_MALLOC_ARRAY(ctx->injected_ref_types, (max_can_count - ind_uv_cands));
438
439
    // Set buffers for MD palette search to NULL; will be init'd at runtime if needed
440
1.48k
    ctx->palette_buffer       = NULL;
441
1.48k
    ctx->palette_cand_array   = NULL;
442
1.48k
    ctx->palette_size_array_0 = NULL;
443
444
    // Cost Arrays
445
1.48k
    EB_MALLOC_ARRAY(ctx->fast_cost_array, ctx->max_nics_uv);
446
1.48k
    EB_MALLOC_ARRAY(ctx->full_cost_array, ctx->max_nics_uv);
447
1.48k
    EB_MALLOC_ARRAY(ctx->full_cost_ssim_array, ctx->max_nics_uv);
448
    // Candidate Buffers
449
1.48k
    EB_NEW(ctx->cand_bf_tx_depth_1,
450
1.48k
           svt_aom_mode_decision_scratch_cand_bf_ctor,
451
1.48k
           sb_size,
452
1.48k
           ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT);
453
454
1.48k
    EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_1->cand, 1);
455
1.48k
    EB_NEW(ctx->cand_bf_tx_depth_2,
456
1.48k
           svt_aom_mode_decision_scratch_cand_bf_ctor,
457
1.48k
           sb_size,
458
1.48k
           ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT);
459
460
1.48k
    EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_2->cand, 1);
461
5.93k
    for (int i = 0; i < 3; i++) {
462
4.45k
        ctx->md_blk_arr_nsq[0].neigh_left_recon[i]       = NULL;
463
4.45k
        ctx->md_blk_arr_nsq[0].neigh_top_recon[i]        = NULL;
464
4.45k
        ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[i] = NULL;
465
4.45k
        ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[i]  = NULL;
466
4.45k
    }
467
1.48k
    uint32_t coded_leaf_index;
468
1.48k
    uint16_t sz = sizeof(uint16_t);
469
1.48k
    if (ctx->hbd_md > EB_8_BIT_MD) {
470
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0], block_max_count_sb * sb_size * sz);
471
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0], block_max_count_sb * sb_size * sz);
472
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1);
473
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1);
474
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1);
475
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1);
476
477
0
        for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) {
478
0
            size_t offset = coded_leaf_index * sb_size * sz;
479
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[0] =
480
0
                ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0] + offset;
481
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[0] =
482
0
                ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0] + offset;
483
0
            offset >>= 1;
484
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[1] =
485
0
                ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1] + offset;
486
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[1] =
487
0
                ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1] + offset;
488
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[2] =
489
0
                ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2] + offset;
490
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[2] =
491
0
                ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2] + offset;
492
0
        }
493
0
    }
494
1.48k
    if (ctx->hbd_md != EB_10_BIT_MD) {
495
1.48k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[0], block_max_count_sb * sb_size);
496
1.48k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[0], block_max_count_sb * sb_size);
497
1.48k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[1], block_max_count_sb * sb_size >> 1);
498
1.48k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[1], block_max_count_sb * sb_size >> 1);
499
1.48k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[2], block_max_count_sb * sb_size >> 1);
500
1.48k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[2], block_max_count_sb * sb_size >> 1);
501
502
127k
        for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) {
503
126k
            size_t offset                                             = coded_leaf_index * sb_size;
504
126k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[0] = ctx->md_blk_arr_nsq[0].neigh_left_recon[0] +
505
126k
                offset;
506
126k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[0] = ctx->md_blk_arr_nsq[0].neigh_top_recon[0] +
507
126k
                offset;
508
126k
            offset >>= 1;
509
126k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[1] = ctx->md_blk_arr_nsq[0].neigh_left_recon[1] +
510
126k
                offset;
511
126k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[1] = ctx->md_blk_arr_nsq[0].neigh_top_recon[1] +
512
126k
                offset;
513
126k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[2] = ctx->md_blk_arr_nsq[0].neigh_left_recon[2] +
514
126k
                offset;
515
126k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[2] = ctx->md_blk_arr_nsq[0].neigh_top_recon[2] +
516
126k
                offset;
517
126k
        }
518
1.48k
    }
519
1.48k
    ctx->md_blk_arr_nsq[0].av1xd = NULL;
520
1.48k
    EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].av1xd, block_max_count_sb);
521
522
    // Alloc mds and pc_tree, which are used to track tested blocks in MD
523
1.48k
    bool disallow_4x4 = allintra ? svt_aom_get_disallow_4x4_allintra(enc_mode)
524
1.48k
#if TUNE_SIMPLIFY_SETTINGS
525
1.48k
        : rtc_tune ? svt_aom_get_disallow_4x4_rtc()
526
#else
527
        : rtc_tune ? svt_aom_get_disallow_4x4_rtc(enc_mode)
528
#endif
529
0
                   : svt_aom_get_disallow_4x4_default(enc_mode);
530
1.48k
    bool    disallow_8x8     = allintra ? svt_aom_get_disallow_8x8_allintra()
531
1.48k
               : rtc_tune ? svt_aom_get_disallow_8x8_rtc(enc_mode, scs->max_input_luma_width, scs->max_input_luma_height)
532
0
                          : svt_aom_get_disallow_8x8_default();
533
1.48k
    uint8_t min_bsize        = disallow_8x8 ? 16 : disallow_4x4 ? 8 : 4;
534
1.48k
    int     blocks_per_depth = (sb_size / min_bsize) * (sb_size / min_bsize);
535
1.48k
    int     blocks_to_alloc  = 0;
536
537
7.42k
    for (int i = min_bsize; i <= sb_size; i <<= 1, blocks_per_depth >>= 2) {
538
5.93k
        blocks_to_alloc += blocks_per_depth;
539
5.93k
    }
540
1.48k
    EB_CALLOC_ARRAY(ctx->mds, blocks_to_alloc);
541
1.48k
    uint32_t mds_idx = 0;
542
1.48k
    setup_mds(scs, ctx->mds, &mds_idx, 0, scs->seq_header.sb_size, min_bsize);
543
1.48k
    EB_CALLOC_ARRAY(ctx->pc_tree, blocks_to_alloc);
544
1.48k
    EB_MALLOC_ARRAY(ctx->tested_blk, blocks_to_alloc);
545
1.48k
    setup_pc_tree(ctx->pc_tree, ctx->tested_blk, 0, scs->seq_header.sb_size, min_bsize);
546
1.48k
    ctx->blocks_to_alloc = blocks_to_alloc;
547
548
1.48k
    bool bypass_encdec = allintra ? svt_aom_get_bypass_encdec_allintra(enc_mode)
549
1.48k
        : rtc_tune                ? svt_aom_get_bypass_encdec_rtc(enc_mode, encoder_bit_depth)
550
0
                                  : svt_aom_get_bypass_encdec_default(enc_mode, encoder_bit_depth);
551
127k
    for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) {
552
126k
        ctx->md_blk_arr_nsq[coded_leaf_index].av1xd      = ctx->md_blk_arr_nsq[0].av1xd + coded_leaf_index;
553
126k
        ctx->md_blk_arr_nsq[coded_leaf_index].segment_id = 0;
554
126k
        const BlockGeom* blk_geom                        = get_blk_geom_mds(scs->blk_geom_mds, coded_leaf_index);
555
126k
        if (bypass_encdec) {
556
126k
            EbPictureBufferDescInitData init_data;
557
558
126k
            init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
559
126k
            init_data.max_width          = blk_geom->bwidth;
560
126k
            init_data.max_height         = blk_geom->bheight;
561
126k
            init_data.bit_depth          = EB_THIRTYTWO_BIT;
562
126k
            init_data.color_format       = (blk_geom->bwidth > 4 && blk_geom->bheight > 4)
563
126k
                      ? EB_YUV420
564
126k
                      : EB_YUV444; // PW - must have at least 4x4 for chroma coeffs
565
126k
            init_data.border             = 0;
566
126k
            init_data.split_mode         = false;
567
568
126k
            EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data);
569
570
126k
            init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
571
126k
            init_data.max_width          = blk_geom->bwidth;
572
126k
            init_data.max_height         = blk_geom->bheight;
573
126k
            init_data.bit_depth          = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT;
574
126k
            ;
575
126k
            init_data.color_format = (blk_geom->bwidth > 4 && blk_geom->bheight > 4) ? EB_YUV420 : EB_YUV444;
576
126k
            init_data.border       = 0;
577
126k
            init_data.split_mode   = false;
578
579
126k
            EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data);
580
126k
        } else {
581
0
            ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp = NULL;
582
0
            ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp = NULL;
583
0
        }
584
126k
    }
585
8.90k
    for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
586
7.42k
        EB_MALLOC_ARRAY(ctx->cand_buff_indices[cand_class_it], ctx->max_nics_uv);
587
7.42k
    }
588
589
1.48k
    EB_MALLOC_ARRAY(ctx->best_candidate_index_array, ctx->max_nics_uv);
590
1.48k
    EB_MALLOC_ARRAY(ctx->above_txfm_context, (sb_size >> MI_SIZE_LOG2));
591
1.48k
    EB_MALLOC_ARRAY(ctx->left_txfm_context, (sb_size >> MI_SIZE_LOG2));
592
1.48k
    EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data;
593
1.48k
    EbPictureBufferDescInitData picture_buffer_desc_init_data;
594
595
1.48k
    picture_buffer_desc_init_data.max_width          = sb_size;
596
1.48k
    picture_buffer_desc_init_data.max_height         = sb_size;
597
1.48k
    picture_buffer_desc_init_data.bit_depth          = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT;
598
1.48k
    picture_buffer_desc_init_data.color_format       = EB_YUV420;
599
1.48k
    picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
600
1.48k
    picture_buffer_desc_init_data.border             = 0;
601
1.48k
    picture_buffer_desc_init_data.split_mode         = false;
602
1.48k
    picture_buffer_desc_init_data.is_16bit_pipeline  = false;
603
604
1.48k
    thirty_two_width_picture_buffer_desc_init_data.max_width          = sb_size;
605
1.48k
    thirty_two_width_picture_buffer_desc_init_data.max_height         = sb_size;
606
1.48k
    thirty_two_width_picture_buffer_desc_init_data.bit_depth          = EB_THIRTYTWO_BIT;
607
1.48k
    thirty_two_width_picture_buffer_desc_init_data.color_format       = EB_YUV420;
608
1.48k
    thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
609
1.48k
    thirty_two_width_picture_buffer_desc_init_data.border             = 0;
610
1.48k
    thirty_two_width_picture_buffer_desc_init_data.split_mode         = false;
611
1.48k
    thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline  = false;
612
25.2k
    for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) {
613
23.7k
        EB_NEW(ctx->recon_coeff_ptr[txt_itr],
614
23.7k
               svt_picture_buffer_desc_ctor,
615
23.7k
               (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
616
23.7k
        EB_NEW(ctx->recon_ptr[txt_itr], svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
617
23.7k
        EB_NEW(ctx->quant_coeff_ptr[txt_itr],
618
23.7k
               svt_picture_buffer_desc_ctor,
619
23.7k
               (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
620
23.7k
    }
621
1.48k
    EB_NEW(ctx->tx_coeffs, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
622
1.48k
    EB_NEW(ctx->scratch_prediction_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
623
1.48k
    EbPictureBufferDescInitData double_width_picture_buffer_desc_init_data;
624
1.48k
    double_width_picture_buffer_desc_init_data.max_width          = sb_size;
625
1.48k
    double_width_picture_buffer_desc_init_data.max_height         = sb_size;
626
1.48k
    double_width_picture_buffer_desc_init_data.bit_depth          = EB_SIXTEEN_BIT;
627
1.48k
    double_width_picture_buffer_desc_init_data.color_format       = EB_YUV420;
628
1.48k
    double_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
629
1.48k
    double_width_picture_buffer_desc_init_data.border             = 0;
630
1.48k
    double_width_picture_buffer_desc_init_data.split_mode         = false;
631
1.48k
    double_width_picture_buffer_desc_init_data.is_16bit_pipeline  = false;
632
633
    // The temp_recon_ptr and temp_residual will be shared by all candidates
634
    // If you want to do something with residual or recon, you need to create one
635
1.48k
    EB_NEW(ctx->temp_recon_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
636
1.48k
    EB_NEW(ctx->temp_residual, svt_picture_buffer_desc_ctor, (EbPtr)&double_width_picture_buffer_desc_init_data);
637
638
    // Candidate Buffers
639
1.48k
    EB_ALLOC_PTR_ARRAY(ctx->cand_bf_ptr_array, ctx->max_nics_uv);
640
641
16.3k
    for (buffer_index = 0; buffer_index < ctx->max_nics; ++buffer_index) {
642
14.8k
        EB_NEW(ctx->cand_bf_ptr_array[buffer_index],
643
14.8k
               svt_aom_mode_decision_cand_bf_ctor,
644
14.8k
               ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
645
14.8k
               sb_size,
646
14.8k
               PICTURE_BUFFER_DESC_FULL_MASK,
647
14.8k
               ctx->temp_residual,
648
14.8k
               ctx->temp_recon_ptr,
649
14.8k
               &(ctx->fast_cost_array[buffer_index]),
650
14.8k
               &(ctx->full_cost_array[buffer_index]),
651
14.8k
               &(ctx->full_cost_ssim_array[buffer_index]));
652
14.8k
    }
653
654
1.48k
    for (buffer_index = max_nics; buffer_index < ctx->max_nics_uv; ++buffer_index) {
655
0
        EB_NEW(ctx->cand_bf_ptr_array[buffer_index],
656
0
               svt_aom_mode_decision_cand_bf_ctor,
657
0
               ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
658
0
               sb_size,
659
0
               PICTURE_BUFFER_DESC_CHROMA_MASK,
660
0
               ctx->temp_residual,
661
0
               ctx->temp_recon_ptr,
662
0
               &(ctx->fast_cost_array[buffer_index]),
663
0
               &(ctx->full_cost_array[buffer_index]),
664
0
               &(ctx->full_cost_ssim_array[buffer_index]));
665
0
    }
666
667
1.48k
    return EB_ErrorNone;
668
1.48k
}
669
670
/**************************************************
671
 * Reset Mode Decision Neighbor Arrays
672
 *************************************************/
673
4.99k
void svt_aom_reset_mode_decision_neighbor_arrays(PictureControlSet* pcs, uint16_t tile_idx) {
674
4.99k
    uint8_t depth;
675
19.9k
    for (depth = 0; depth < NA_TOT_CNT; depth++) {
676
14.9k
        svt_aom_neighbor_array_unit_reset(pcs->mdleaf_partition_na[depth][tile_idx]);
677
14.9k
        if (pcs->hbd_md != EB_10_BIT_MD) {
678
14.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na[depth][tile_idx]);
679
14.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na[depth][tile_idx]);
680
14.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na[depth][tile_idx]);
681
14.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na[depth][tile_idx]);
682
14.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na[depth][tile_idx]);
683
14.9k
        }
684
14.9k
        if (pcs->hbd_md > EB_8_BIT_MD || (pcs->scs->encoder_bit_depth > EB_EIGHT_BIT && pcs->pic_bypass_encdec)) {
685
0
            svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na_16bit[depth][tile_idx]);
686
0
            svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na_16bit[depth][tile_idx]);
687
0
            svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na_16bit[depth][tile_idx]);
688
0
            svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na_16bit[depth][tile_idx]);
689
0
            svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na_16bit[depth][tile_idx]);
690
0
        }
691
692
14.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_y_dcs_na[depth][tile_idx]);
693
14.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_dc_sign_level_coeff_na[depth][tile_idx]);
694
14.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_cb_dc_sign_level_coeff_na[depth][tile_idx]);
695
14.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_cr_dc_sign_level_coeff_na[depth][tile_idx]);
696
14.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_txfm_context_array[depth][tile_idx]);
697
14.9k
    }
698
699
4.99k
    return;
700
4.99k
}
701
702
// If the ref intra percentage is below the TH, applying modulation to the MD lambda
703
0
#define LAMBDA_MOD_INTRA_TH 50
704
0
#define LAMBDA_MOD_INTRA_SCALING_FACTOR 138
705
706
// Set the lambda for each sb.
707
// When lambda tuning is on (blk_lambda_tuning), lambda of each block is set separately (full_lambda_md/fast_lambda_md)
708
// later in svt_aom_set_tuned_blk_lambda
709
// Testing showed that updating SAD lambda based on frame info was not helpful; therefore, the SAD lambda generation is not changed.
710
6.22k
static void av1_lambda_assign_md(PictureControlSet* pcs, ModeDecisionContext* ctx) {
711
6.22k
    ctx->full_lambda_md[0] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT);
712
6.22k
    ctx->fast_lambda_md[0] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT);
713
6.22k
    ctx->full_lambda_md[1] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT);
714
6.22k
    ctx->fast_lambda_md[1] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT);
715
716
6.22k
    if (!pcs->scs->static_config.rtc && pcs->scs->stats_based_sb_lambda_modulation) {
717
6.22k
        if (pcs->temporal_layer_index > 0) {
718
0
            if (pcs->ref_intra_percentage < LAMBDA_MOD_INTRA_TH) {
719
0
                ctx->full_lambda_md[0] = (ctx->full_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7;
720
0
                ctx->fast_lambda_md[0] = (ctx->fast_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7;
721
0
                ctx->full_lambda_md[1] = (ctx->full_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7;
722
0
                ctx->fast_lambda_md[1] = (ctx->fast_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7;
723
0
            }
724
0
        }
725
6.22k
    }
726
727
6.22k
    if (pcs->lambda_weight) {
728
3.35k
        ctx->full_lambda_md[0] = (uint32_t)((ctx->full_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7);
729
3.35k
        ctx->fast_lambda_md[0] = (uint32_t)((ctx->fast_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7);
730
3.35k
        ctx->full_lambda_md[1] = (uint32_t)((ctx->full_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7);
731
3.35k
        ctx->fast_lambda_md[1] = (uint32_t)((ctx->fast_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7);
732
3.35k
    }
733
6.22k
    ctx->full_lambda_md[1] *= 16;
734
6.22k
    ctx->fast_lambda_md[1] *= 4;
735
736
6.22k
    SequenceControlSet* scs          = pcs->scs;
737
6.22k
    uint64_t            scale_factor = scs->static_config.lambda_scale_factors[pcs->ppcs->update_type];
738
6.22k
    ctx->full_lambda_md[0]           = (uint32_t)((ctx->full_lambda_md[0] * scale_factor) >> 7);
739
6.22k
    ctx->full_lambda_md[1]           = (uint32_t)((ctx->full_lambda_md[1] * scale_factor) >> 7);
740
6.22k
    ctx->fast_lambda_md[0]           = (uint32_t)((ctx->fast_lambda_md[0] * scale_factor) >> 7);
741
6.22k
    ctx->fast_lambda_md[1]           = (uint32_t)((ctx->fast_lambda_md[1] * scale_factor) >> 7);
742
743
6.22k
    ctx->full_sb_lambda_md[0] = ctx->full_lambda_md[0];
744
6.22k
    ctx->full_sb_lambda_md[1] = ctx->full_lambda_md[1];
745
6.22k
}
746
747
void svt_aom_reset_mode_decision(SequenceControlSet* scs, ModeDecisionContext* ctx, PictureControlSet* pcs,
748
5.16k
                                 uint16_t tile_group_idx, uint32_t segment_index) {
749
5.16k
    const bool rtc_tune = scs->static_config.rtc;
750
5.16k
    ctx->hbd_md         = pcs->hbd_md;
751
    // Reset MD rate Estimation table to initial values by copying from md_rate_est_ctx
752
5.16k
    ctx->md_rate_est_ctx = pcs->md_rate_est_ctx;
753
    // Reset CABAC Contexts
754
755
    // Reset Neighbor Arrays at start of new Segment / Picture
756
5.16k
    if (segment_index == 0) {
757
474
        for (uint16_t r = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_y;
758
1.99k
             r < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_y;
759
1.51k
             r++) {
760
1.51k
            for (uint16_t c = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_x;
761
6.51k
                 c < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_x;
762
4.99k
                 c++) {
763
4.99k
                uint16_t tile_idx = c + r * pcs->ppcs->av1_cm->tiles_info.tile_cols;
764
4.99k
                svt_aom_reset_mode_decision_neighbor_arrays(pcs, tile_idx);
765
4.99k
            }
766
1.51k
        }
767
474
        (void)scs;
768
474
    }
769
    //each segment enherits the bypass encdec from the picture level
770
5.16k
    ctx->bypass_encdec = pcs->pic_bypass_encdec;
771
772
5.16k
    if (!rtc_tune && (pcs->enc_mode <= ENC_M11 || pcs->temporal_layer_index != 0)) {
773
5.16k
        ctx->rtc_use_N4_dct_dct_shortcut = 1;
774
5.16k
    } else {
775
0
        ctx->rtc_use_N4_dct_dct_shortcut = 0;
776
0
    }
777
5.16k
    return;
778
5.16k
}
779
780
/******************************************************
781
 * Mode Decision Configure SB
782
 ******************************************************/
783
void svt_aom_mode_decision_configure_sb(ModeDecisionContext* ctx, PictureControlSet* pcs, uint8_t sb_qp,
784
6.22k
                                        uint8_t me_sb_qp) {
785
    /* Note(CHKN) : when Qp modulation varies QP on a sub-SB(CU) basis,  Lamda has to change based on Cu->QP , and then this code has to move inside the CU loop in MD */
786
787
    // Lambda Assignement
788
6.22k
    ctx->qp_index = pcs->ppcs->frm_hdr.delta_q_params.delta_q_present || pcs->ppcs->r0_delta_qp_md
789
6.22k
        ? sb_qp
790
6.22k
        : (uint8_t)pcs->ppcs->frm_hdr.quantization_params.base_q_idx;
791
792
6.22k
    ctx->me_q_index = me_sb_qp;
793
794
6.22k
    av1_lambda_assign_md(pcs, ctx);
795
796
6.22k
    ctx->hbd_pack_done = 0;
797
798
6.22k
    return;
799
6.22k
}