Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/md_process.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
*
4
* This source code is subject to the terms of the BSD 2 Clause License and
5
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6
* was not distributed with this source code in the LICENSE file, you can
7
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8
* Media Patent License 1.0 was not distributed with this source code in the
9
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10
*/
11
12
#include <stdlib.h>
13
14
#include "utility.h"
15
#include "md_process.h"
16
#include "lambda_rate_tables.h"
17
#include "rc_process.h"
18
#include "enc_mode_config.h"
19
20
const uint8_t quantizer_to_qindex[64] = {
21
    0,   4,   8,   12,  16,  20,  24,  28,  32,  36,  40,  44,  48,  52,  56,  60,  64,  68,  72,  76,  80,  84,
22
    88,  92,  96,  100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172,
23
    176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255};
24
25
const int percents[2][FIXED_QP_OFFSET_COUNT] = {
26
    {75, 70, 60, 20, 15, 0}, {76, 60, 30, 15, 8, 4} // libaom offsets
27
};
28
29
const uint8_t uni_psy_bias[64] = {
30
    85, 85, 85, 85, 85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  85,  95,  95,  95,  95,  95, 95,
31
    95, 95, 95, 95, 95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95,  95, 95,
32
    95, 95, 95, 95, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
33
};
34
35
1.37k
static void mode_decision_context_dctor(EbPtr p) {
36
1.37k
    ModeDecisionContext* obj = (ModeDecisionContext*)p;
37
38
1.37k
    uint32_t block_max_count_sb = obj->init_max_block_cnt;
39
40
    // MD palette search
41
1.37k
    if (obj->palette_buffer) {
42
0
        EB_FREE(obj->palette_buffer);
43
0
    }
44
1.37k
    if (obj->palette_cand_array) {
45
        // Free fields in palette_cand_array before freeing palette_cand_array
46
0
        for (int cd = 0; cd < MAX_PAL_CAND; cd++) {
47
0
            if (obj->palette_cand_array[cd].color_idx_map) {
48
0
                EB_FREE_ARRAY(obj->palette_cand_array[cd].color_idx_map);
49
0
            }
50
0
        }
51
52
0
        EB_FREE_ARRAY(obj->palette_cand_array);
53
0
    }
54
1.37k
    if (obj->palette_size_array_0) {
55
0
        EB_FREE_ARRAY(obj->palette_size_array_0);
56
0
    }
57
8.22k
    for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
58
6.85k
        EB_FREE_ARRAY(obj->cand_buff_indices[cand_class_it]);
59
6.85k
    }
60
1.37k
    EB_FREE_ARRAY(obj->best_candidate_index_array);
61
62
1.37k
    EB_FREE_ARRAY(obj->above_txfm_context);
63
1.37k
    EB_FREE_ARRAY(obj->left_txfm_context);
64
117k
    for (uint32_t coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) {
65
116k
        if (obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp) {
66
116k
            EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].coeff_tmp);
67
116k
        }
68
116k
        if (obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp) {
69
116k
            EB_DELETE(obj->md_blk_arr_nsq[coded_leaf_index].recon_tmp);
70
116k
        }
71
116k
    }
72
1.37k
    EB_DELETE_PTR_ARRAY(obj->cand_bf_ptr_array, obj->max_nics_uv);
73
1.37k
    EB_FREE_ARRAY(obj->cand_bf_tx_depth_1->cand);
74
1.37k
    EB_DELETE(obj->cand_bf_tx_depth_1);
75
1.37k
    EB_FREE_ARRAY(obj->cand_bf_tx_depth_2->cand);
76
1.37k
    EB_DELETE(obj->cand_bf_tx_depth_2);
77
1.37k
    EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon16bit);
78
1.37k
    EB_FREE_ALIGNED_ARRAY(obj->cfl_temp_luma_recon);
79
1.37k
    EB_FREE_ALIGNED_ARRAY(obj->pred_buf_q3);
80
1.37k
    EB_FREE_ARRAY(obj->fast_cand_array);
81
1.37k
    EB_FREE_2D(obj->injected_mvs);
82
1.37k
    EB_FREE_ARRAY(obj->injected_ref_types);
83
1.37k
    EB_FREE_ARRAY(obj->fast_cost_array);
84
1.37k
    EB_FREE_ARRAY(obj->full_cost_array);
85
1.37k
    if (obj->md_blk_arr_nsq) {
86
5.48k
        for (int i = 0; i < 3; i++) {
87
4.11k
            EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon_16bit[i]);
88
4.11k
            EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon_16bit[i]);
89
4.11k
            EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_left_recon[i]);
90
4.11k
            EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].neigh_top_recon[i]);
91
4.11k
        }
92
1.37k
    }
93
1.37k
    if (obj->md_blk_arr_nsq) {
94
1.37k
        EB_FREE_ARRAY(obj->md_blk_arr_nsq[0].av1xd);
95
1.37k
    }
96
1.37k
    EB_FREE_ARRAY(obj->mds);
97
1.37k
    EB_FREE_ARRAY(obj->pc_tree);
98
1.37k
    EB_FREE_ARRAY(obj->tested_blk);
99
1.37k
    obj->blocks_to_alloc = 0;
100
1.37k
    EB_FREE_ARRAY(obj->md_blk_arr_nsq);
101
1.37k
#if OPT_LPD1_GLOBALMV_BYPASS
102
1.37k
    EB_FREE_ARRAY(obj->pd0_mds0_best_cost);
103
1.37k
#endif
104
1.37k
    if (obj->rate_est_table) {
105
0
        EB_FREE_ARRAY(obj->rate_est_table);
106
0
    }
107
108
6.85k
    for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) {
109
5.48k
        if (obj->cmp_store.pred0_buf[i]) {
110
0
            EB_FREE(obj->cmp_store.pred0_buf[i]);
111
0
        }
112
5.48k
        if (obj->cmp_store.pred1_buf[i]) {
113
0
            EB_FREE(obj->cmp_store.pred1_buf[i]);
114
0
        }
115
5.48k
    }
116
1.37k
    if (obj->residual1) {
117
0
        EB_FREE(obj->residual1);
118
0
    }
119
1.37k
    if (obj->diff10) {
120
0
        EB_FREE(obj->diff10);
121
0
    }
122
123
1.37k
    if (obj->intrapred_buf) {
124
0
        EB_FREE_2D(obj->intrapred_buf);
125
0
    }
126
127
1.37k
    if (obj->obmc_buff_0) {
128
0
        EB_FREE(obj->obmc_buff_0);
129
0
    }
130
1.37k
    if (obj->obmc_buff_1) {
131
0
        EB_FREE(obj->obmc_buff_1);
132
0
    }
133
1.37k
    if (obj->wsrc_buf) {
134
0
        EB_FREE(obj->wsrc_buf);
135
0
    }
136
1.37k
    if (obj->mask_buf) {
137
0
        EB_FREE(obj->mask_buf);
138
0
    }
139
23.2k
    for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) {
140
21.9k
        EB_DELETE(obj->recon_coeff_ptr[txt_itr]);
141
21.9k
        EB_DELETE(obj->recon_ptr[txt_itr]);
142
21.9k
        EB_DELETE(obj->quant_coeff_ptr[txt_itr]);
143
21.9k
    }
144
1.37k
    EB_DELETE(obj->tx_coeffs);
145
1.37k
    EB_DELETE(obj->scratch_prediction_ptr);
146
1.37k
    EB_DELETE(obj->temp_residual);
147
1.37k
    EB_DELETE(obj->temp_recon_ptr);
148
1.37k
    EB_FREE_ARRAY(obj->full_cost_ssim_array);
149
1.37k
}
150
151
void svt_aom_set_nics(SequenceControlSet* scs, NicScalingCtrls* scaling_ctrls, uint32_t mds1_count[CAND_CLASS_TOTAL],
152
                      uint32_t mds2_count[CAND_CLASS_TOTAL], uint32_t mds3_count[CAND_CLASS_TOTAL], uint8_t pic_type,
153
                      uint32_t qp);
154
155
static void setup_mds(SequenceControlSet* scs, MdScan* mds, uint32_t* mds_idx, int index, BlockSize bsize,
156
116k
                      const int min_sq_size) {
157
116k
    mds->mds_idx = *mds_idx;
158
116k
    mds->bsize   = bsize;
159
116k
    mds->index   = index;
160
161
    // If applicable, add split depths
162
116k
    const BlockGeom* blk_geom = get_blk_geom_mds(scs->blk_geom_mds, *mds_idx);
163
116k
    const int        sq_size  = block_size_wide[bsize];
164
116k
    if (sq_size > min_sq_size) {
165
28.7k
        const BlockSize subsize             = get_partition_subsize(bsize, PARTITION_SPLIT);
166
28.7k
        const int       sq_subsize          = block_size_wide[subsize];
167
28.7k
        int             blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size);
168
28.7k
        int             blocks_to_skip      = 0;
169
170
65.7k
        for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) {
171
36.9k
            blocks_to_skip += blocks_per_subdepth;
172
36.9k
        }
173
174
28.7k
        *mds_idx += blk_geom->d1_depth_offset;
175
143k
        for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
176
115k
            mds->split[i] = mds + i * blocks_to_skip + 1;
177
115k
            setup_mds(scs, mds->split[i], mds_idx, i, subsize, min_sq_size);
178
115k
        }
179
87.6k
    } else {
180
87.6k
        *mds_idx += blk_geom->ns_depth_offset;
181
87.6k
    }
182
116k
}
183
184
static void setup_pc_tree(PC_TREE* pc_tree, bool (*test_blk_array)[PART_S][4], int index, BlockSize bsize,
185
116k
                          const int min_sq_size) {
186
116k
    pc_tree->bsize      = bsize;
187
116k
    pc_tree->index      = index;
188
116k
    pc_tree->tested_blk = test_blk_array[0];
189
190
    // If applicable, add split depths
191
116k
    const int sq_size = block_size_wide[bsize];
192
116k
    if (sq_size > min_sq_size) {
193
28.7k
        const BlockSize subsize             = get_partition_subsize(bsize, PARTITION_SPLIT);
194
28.7k
        const int       sq_subsize          = block_size_wide[subsize];
195
28.7k
        int             blocks_per_subdepth = (sq_subsize / min_sq_size) * (sq_subsize / min_sq_size);
196
28.7k
        int             blocks_to_skip      = 0;
197
198
65.7k
        for (int i = min_sq_size; i <= sq_subsize; i <<= 1, blocks_per_subdepth >>= 2) {
199
36.9k
            blocks_to_skip += blocks_per_subdepth;
200
36.9k
        }
201
202
143k
        for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
203
115k
            pc_tree->split[i]         = pc_tree + i * blocks_to_skip + 1;
204
115k
            pc_tree->split[i]->parent = pc_tree;
205
115k
            setup_pc_tree(pc_tree->split[i], test_blk_array + i * blocks_to_skip + 1, i, subsize, min_sq_size);
206
115k
        }
207
28.7k
    }
208
116k
}
209
210
/******************************************************
211
 * Mode Decision Context Constructor
212
 ******************************************************/
213
EbErrorType svt_aom_mode_decision_context_ctor(ModeDecisionContext* ctx, SequenceControlSet* scs,
214
                                               EbColorFormat color_format, uint8_t sb_size, EncMode enc_mode,
215
                                               uint16_t max_block_cnt, uint32_t encoder_bit_depth,
216
                                               EbFifo* mode_decision_configuration_input_fifo_ptr,
217
                                               EbFifo* mode_decision_output_fifo_ptr, uint8_t enable_hbd_mode_decision,
218
1.37k
                                               uint8_t seq_qp_mod) {
219
1.37k
    const bool allintra = scs->allintra;
220
1.37k
    const bool rtc_tune = scs->static_config.rtc;
221
1.37k
    uint32_t   buffer_index;
222
1.37k
    uint32_t   cand_index;
223
224
1.37k
    ctx->init_max_block_cnt     = max_block_cnt;
225
1.37k
    uint32_t block_max_count_sb = max_block_cnt;
226
227
1.37k
    ctx->sb_size = sb_size;
228
1.37k
    (void)color_format;
229
230
1.37k
    ctx->dctor  = mode_decision_context_dctor;
231
1.37k
    ctx->hbd_md = enable_hbd_mode_decision;
232
233
    // Zero the tail of md_levels_buf once; it serves as permanent bottom-padding
234
    // for set_levels() and is never overwritten by svt_av1_txb_init_levels().
235
1.37k
    memset(ctx->md_levels_buf + LEVELS_TAIL_OFFSET, 0, TX_PAD_2D - LEVELS_TAIL_OFFSET);
236
237
    // Input/Output System Resource Manager FIFOs
238
1.37k
    ctx->mode_decision_configuration_input_fifo_ptr = mode_decision_configuration_input_fifo_ptr;
239
1.37k
    ctx->mode_decision_output_fifo_ptr              = mode_decision_output_fifo_ptr;
240
241
    // Maximum number of candidates MD can support
242
    // determine MAX_NICS for a given preset
243
    // get the min scaling level (the smallest scaling level is the most conservative)
244
1.37k
    uint8_t min_nic_scaling_level = NICS_SCALING_LEVELS - 1;
245
1.37k
    uint8_t stage1_scaling_num;
246
1.37k
    if (allintra) {
247
1.37k
        uint8_t nic_level  = svt_aom_get_nic_level_allintra(enc_mode);
248
1.37k
        stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1];
249
1.37k
    } else if (rtc_tune) {
250
0
#if TUNE_RTC
251
0
        uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode, scs->use_flat_ipp);
252
#else
253
#if TUNE_SIMPLIFY_SETTINGS
254
        uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode);
255
#else
256
        uint8_t nic_level = svt_aom_get_nic_level_rtc(enc_mode, scs->use_flat_ipp);
257
#endif
258
#endif
259
0
        stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[svt_aom_set_nic_controls(NULL, nic_level)][MD_STAGE_1];
260
0
    } else {
261
0
#if TUNE_SIMPLIFY_SETTINGS
262
0
        for (uint8_t is_base = 0; is_base < 2; is_base++) {
263
0
            uint8_t nic_level         = svt_aom_get_nic_level_default(enc_mode, is_base);
264
0
            uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level);
265
0
            min_nic_scaling_level     = MIN(min_nic_scaling_level, nic_scaling_level);
266
0
        }
267
#else
268
        for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) {
269
            for (uint8_t is_base = 0; is_base < 2; is_base++) {
270
                uint8_t nic_level         = svt_aom_get_nic_level_default(enc_mode, is_base, sc_class1);
271
                uint8_t nic_scaling_level = svt_aom_set_nic_controls(NULL, nic_level);
272
                min_nic_scaling_level     = MIN(min_nic_scaling_level, nic_scaling_level);
273
            }
274
        }
275
#endif
276
277
0
        stage1_scaling_num = MD_STAGE_NICS_SCAL_NUM[min_nic_scaling_level][MD_STAGE_1];
278
0
    }
279
    // scale max_nics
280
1.37k
    uint32_t max_nics = 0;
281
1.37k
    {
282
1.37k
        NicScalingCtrls scaling_ctrls;
283
1.37k
        scaling_ctrls.stage1_scaling_num = stage1_scaling_num;
284
1.37k
        scaling_ctrls.stage2_scaling_num = stage1_scaling_num;
285
1.37k
        scaling_ctrls.stage3_scaling_num = stage1_scaling_num;
286
1.37k
        uint32_t mds1_count[CAND_CLASS_TOTAL];
287
1.37k
        uint32_t mds2_count[CAND_CLASS_TOTAL];
288
1.37k
        uint32_t mds3_count[CAND_CLASS_TOTAL];
289
5.48k
        for (uint8_t pic_type = 0; pic_type < NICS_PIC_TYPE; pic_type++) {
290
267k
            for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) {
291
263k
                svt_aom_set_nics(scs, &scaling_ctrls, mds1_count, mds2_count, mds3_count, pic_type, qp);
292
293
263k
                uint32_t nics = 0;
294
1.57M
                for (CandClass cidx = CAND_CLASS_0; cidx < CAND_CLASS_TOTAL; cidx++) {
295
1.31M
                    nics += mds1_count[cidx];
296
1.31M
                }
297
263k
                max_nics = MAX(max_nics, nics);
298
263k
            }
299
4.11k
        }
300
1.37k
    }
301
302
    // If independent chroma search is used, need to allocate additional 84 candidate buffers
303
1.37k
    bool is_chroma_mode_0;
304
1.37k
    if (allintra) {
305
1.37k
        is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_allintra(enc_mode)) ==
306
1.37k
            CHROMA_MODE_0;
307
1.37k
    } else if (scs->static_config.rtc) {
308
0
#if TUNE_SIMPLIFY_SETTINGS
309
0
        is_chroma_mode_0 = svt_aom_set_chroma_controls(
310
0
                               NULL, svt_aom_get_chroma_level_rtc(enc_mode, scs->use_flat_ipp)) == CHROMA_MODE_0;
311
#else
312
        for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) {
313
            is_chroma_mode_0 = svt_aom_set_chroma_controls(NULL, svt_aom_get_chroma_level_rtc(enc_mode, is_i_slice)) ==
314
                CHROMA_MODE_0;
315
            if (is_chroma_mode_0) {
316
                break;
317
            }
318
        }
319
#endif
320
0
    } else {
321
0
        for (uint8_t is_i_slice = 0; is_i_slice < 2; is_i_slice++) {
322
0
            is_chroma_mode_0 = svt_aom_set_chroma_controls(
323
0
                                   NULL, svt_aom_get_chroma_level_default(enc_mode, is_i_slice)) == CHROMA_MODE_0;
324
0
            if (is_chroma_mode_0) {
325
0
                break;
326
0
            }
327
0
        }
328
0
    }
329
1.37k
    const uint8_t ind_uv_cands = is_chroma_mode_0 ? 84 : 0;
330
1.37k
    max_nics += CAND_CLASS_TOTAL; //need one extra temp buffer for each fast loop call
331
1.37k
    ctx->max_nics    = max_nics;
332
1.37k
    ctx->max_nics_uv = max_nics + ind_uv_cands;
333
    // Cfl scratch memory
334
1.37k
    if (ctx->hbd_md > EB_8_BIT_MD) {
335
0
        EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon16bit, sizeof(uint16_t) * sb_size * sb_size);
336
0
    }
337
1.37k
    if (ctx->hbd_md != EB_10_BIT_MD) {
338
1.37k
        EB_MALLOC_ALIGNED(ctx->cfl_temp_luma_recon, sizeof(uint8_t) * sb_size * sb_size);
339
1.37k
    }
340
1.37k
    EB_MALLOC_ALIGNED(ctx->pred_buf_q3, CFL_BUF_SQUARE);
341
1.37k
    uint8_t use_update_cdf = 0;
342
1.37k
    if (allintra) {
343
1.37k
        use_update_cdf = svt_aom_get_update_cdf_level_allintra(enc_mode);
344
1.37k
    } else if (rtc_tune) {
345
0
#if TUNE_SIMPLIFY_SETTINGS
346
0
        for (uint8_t is_islice = 0; is_islice < 2; is_islice++) {
347
0
            if (use_update_cdf) {
348
0
                break;
349
0
            }
350
0
            use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice);
351
0
        }
352
#else
353
        for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) {
354
            for (uint8_t is_islice = 0; is_islice < 2; is_islice++) {
355
                for (uint8_t is_base = 0; is_base < 2; is_base++) {
356
                    if (use_update_cdf) {
357
                        break;
358
                    }
359
                    use_update_cdf |= svt_aom_get_update_cdf_level_rtc(enc_mode, is_islice, is_base, sc_class1);
360
                }
361
            }
362
        }
363
#endif
364
0
    } else {
365
0
#if TUNE_SIMPLIFY_SETTINGS
366
0
        for (uint8_t is_islice = 0; is_islice < 2; is_islice++) {
367
0
            for (uint8_t is_base = 0; is_base < 2; is_base++) {
368
0
                if (use_update_cdf) {
369
0
                    break;
370
0
                }
371
0
                use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base);
372
0
            }
373
0
        }
374
#else
375
        for (uint8_t sc_class1 = 0; sc_class1 < 2; sc_class1++) {
376
            for (uint8_t is_islice = 0; is_islice < 2; is_islice++) {
377
                for (uint8_t is_base = 0; is_base < 2; is_base++) {
378
                    if (use_update_cdf) {
379
                        break;
380
                    }
381
                    use_update_cdf |= svt_aom_get_update_cdf_level_default(enc_mode, is_islice, is_base, sc_class1);
382
                }
383
            }
384
        }
385
#endif
386
0
    }
387
1.37k
    if (use_update_cdf) {
388
0
        EB_CALLOC_ARRAY(ctx->rate_est_table, 1);
389
1.37k
    } else {
390
1.37k
        ctx->rate_est_table = NULL;
391
1.37k
    }
392
    // Allocate buffer for inter-inter compound prediction
393
1.37k
    if (get_inter_compound_level(enc_mode)) {
394
0
        const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1;
395
0
        for (int i = 0; i < NEAREST_NEAR_MV_CNT; i++) {
396
0
            EB_MALLOC(ctx->cmp_store.pred0_buf[i], sb_size * sb_size * bits * sizeof(uint8_t));
397
0
            EB_MALLOC(ctx->cmp_store.pred1_buf[i], sb_size * sb_size * bits * sizeof(uint8_t));
398
0
        }
399
0
        EB_MALLOC(ctx->residual1, sb_size * sb_size * sizeof(ctx->residual1[0]));
400
0
        EB_MALLOC(ctx->diff10, sb_size * sb_size * sizeof(ctx->diff10[0]));
401
0
    }
402
403
    // Allocate buffer for inter-intra prediction
404
1.37k
    uint8_t ii_allowed = 0;
405
4.11k
    for (uint8_t transition_present = 0; transition_present < 2; transition_present++) {
406
2.74k
        if (ii_allowed) {
407
0
            break;
408
0
        }
409
2.74k
        ii_allowed |= svt_aom_get_inter_intra_level(enc_mode, transition_present);
410
2.74k
    }
411
1.37k
    if (ii_allowed) {
412
0
        const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1;
413
        // MAX block size for inter intra is 32x32
414
0
        EB_MALLOC_2D(ctx->intrapred_buf, INTERINTRA_MODES, 32 * 32 * bits * sizeof(ctx->intrapred_buf[0][0]));
415
0
    }
416
417
    // Allocate buffers for obmc prediction
418
1.37k
    uint8_t obmc_allowed = 0;
419
4.11k
    for (uint8_t is_base = 0; is_base < 2; is_base++) {
420
178k
        for (uint8_t qp = MIN_QP_VALUE; qp <= MAX_QP_VALUE; qp++) {
421
175k
            if (obmc_allowed) {
422
0
                break;
423
0
            }
424
#if TUNE_SHIFT_PRESETS_RTC && !TUNE_RTC
425
            obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod, rtc_tune);
426
#else
427
175k
            obmc_allowed |= svt_aom_get_obmc_level(enc_mode, qp, seq_qp_mod);
428
175k
#endif
429
175k
        }
430
2.74k
    }
431
1.37k
    if (obmc_allowed) {
432
0
        const uint8_t bits = ctx->hbd_md > EB_8_BIT_MD ? 2 : 1;
433
0
        EB_MALLOC(ctx->obmc_buff_0, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_0[0]));
434
0
        EB_MALLOC(ctx->obmc_buff_1, sb_size * sb_size * bits * MAX_PLANES * sizeof(ctx->obmc_buff_1[0]));
435
0
        EB_MALLOC(ctx->wsrc_buf, sb_size * sb_size * sizeof(ctx->wsrc_buf[0]));
436
0
        EB_MALLOC(ctx->mask_buf, sb_size * sb_size * sizeof(ctx->mask_buf[0]));
437
0
    }
438
1.37k
    EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq, block_max_count_sb);
439
1.37k
#if OPT_LPD1_GLOBALMV_BYPASS
440
1.37k
    EB_MALLOC_ARRAY(ctx->pd0_mds0_best_cost, block_max_count_sb);
441
1.37k
#endif
442
    // Fast Candidate Array
443
1.37k
#if OPT_MAX_CAN_COUNT_RTC
444
1.37k
    uint16_t max_can_count = svt_aom_get_max_can_count(enc_mode, rtc_tune) + ind_uv_cands;
445
#else
446
    uint16_t max_can_count = svt_aom_get_max_can_count(enc_mode) + ind_uv_cands;
447
#endif
448
1.37k
    EB_MALLOC_ARRAY(ctx->fast_cand_array, max_can_count);
449
450
261k
    for (cand_index = 0; cand_index < max_can_count; ++cand_index) {
451
260k
        ctx->fast_cand_array[cand_index].palette_info = NULL;
452
260k
    }
453
1.37k
    svt_aom_assert_err(max_can_count > ind_uv_cands, "Max. candidates is too low");
454
1.37k
    EB_MALLOC_2D(ctx->injected_mvs, (uint16_t)(max_can_count - ind_uv_cands), 2);
455
1.37k
    EB_MALLOC_ARRAY(ctx->injected_ref_types, (max_can_count - ind_uv_cands));
456
457
    // Set buffers for MD palette search to NULL; will be init'd at runtime if needed
458
1.37k
    ctx->palette_buffer       = NULL;
459
1.37k
    ctx->palette_cand_array   = NULL;
460
1.37k
    ctx->palette_size_array_0 = NULL;
461
462
    // Cost Arrays
463
1.37k
    EB_MALLOC_ARRAY(ctx->fast_cost_array, ctx->max_nics_uv);
464
1.37k
    EB_MALLOC_ARRAY(ctx->full_cost_array, ctx->max_nics_uv);
465
1.37k
    EB_MALLOC_ARRAY(ctx->full_cost_ssim_array, ctx->max_nics_uv);
466
    // Candidate Buffers
467
1.37k
    EB_NEW(ctx->cand_bf_tx_depth_1,
468
1.37k
           svt_aom_mode_decision_scratch_cand_bf_ctor,
469
1.37k
           sb_size,
470
1.37k
           ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT);
471
472
1.37k
    EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_1->cand, 1);
473
1.37k
    EB_NEW(ctx->cand_bf_tx_depth_2,
474
1.37k
           svt_aom_mode_decision_scratch_cand_bf_ctor,
475
1.37k
           sb_size,
476
1.37k
           ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT);
477
478
1.37k
    EB_ALLOC_PTR_ARRAY(ctx->cand_bf_tx_depth_2->cand, 1);
479
5.48k
    for (int i = 0; i < 3; i++) {
480
4.11k
        ctx->md_blk_arr_nsq[0].neigh_left_recon[i]       = NULL;
481
4.11k
        ctx->md_blk_arr_nsq[0].neigh_top_recon[i]        = NULL;
482
4.11k
        ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[i] = NULL;
483
4.11k
        ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[i]  = NULL;
484
4.11k
    }
485
1.37k
    uint32_t coded_leaf_index;
486
1.37k
    uint16_t sz = sizeof(uint16_t);
487
1.37k
    if (ctx->hbd_md > EB_8_BIT_MD) {
488
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0], block_max_count_sb * sb_size * sz);
489
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0], block_max_count_sb * sb_size * sz);
490
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1);
491
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1], block_max_count_sb * sb_size * sz >> 1);
492
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1);
493
0
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2], block_max_count_sb * sb_size * sz >> 1);
494
495
0
        for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) {
496
0
            size_t offset = coded_leaf_index * sb_size * sz;
497
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[0] =
498
0
                ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[0] + offset;
499
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[0] =
500
0
                ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[0] + offset;
501
0
            offset >>= 1;
502
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[1] =
503
0
                ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[1] + offset;
504
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[1] =
505
0
                ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[1] + offset;
506
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon_16bit[2] =
507
0
                ctx->md_blk_arr_nsq[0].neigh_left_recon_16bit[2] + offset;
508
0
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon_16bit[2] =
509
0
                ctx->md_blk_arr_nsq[0].neigh_top_recon_16bit[2] + offset;
510
0
        }
511
0
    }
512
1.37k
    if (ctx->hbd_md != EB_10_BIT_MD) {
513
1.37k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[0], block_max_count_sb * sb_size);
514
1.37k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[0], block_max_count_sb * sb_size);
515
1.37k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[1], block_max_count_sb * sb_size >> 1);
516
1.37k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[1], block_max_count_sb * sb_size >> 1);
517
1.37k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_left_recon[2], block_max_count_sb * sb_size >> 1);
518
1.37k
        EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].neigh_top_recon[2], block_max_count_sb * sb_size >> 1);
519
520
117k
        for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) {
521
116k
            size_t offset                                             = coded_leaf_index * sb_size;
522
116k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[0] = ctx->md_blk_arr_nsq[0].neigh_left_recon[0] +
523
116k
                offset;
524
116k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[0] = ctx->md_blk_arr_nsq[0].neigh_top_recon[0] +
525
116k
                offset;
526
116k
            offset >>= 1;
527
116k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[1] = ctx->md_blk_arr_nsq[0].neigh_left_recon[1] +
528
116k
                offset;
529
116k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[1] = ctx->md_blk_arr_nsq[0].neigh_top_recon[1] +
530
116k
                offset;
531
116k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_left_recon[2] = ctx->md_blk_arr_nsq[0].neigh_left_recon[2] +
532
116k
                offset;
533
116k
            ctx->md_blk_arr_nsq[coded_leaf_index].neigh_top_recon[2] = ctx->md_blk_arr_nsq[0].neigh_top_recon[2] +
534
116k
                offset;
535
116k
        }
536
1.37k
    }
537
1.37k
    ctx->md_blk_arr_nsq[0].av1xd = NULL;
538
1.37k
    EB_MALLOC_ARRAY(ctx->md_blk_arr_nsq[0].av1xd, block_max_count_sb);
539
540
    // Alloc mds and pc_tree, which are used to track tested blocks in MD
541
1.37k
    bool disallow_4x4 = allintra ? svt_aom_get_disallow_4x4_allintra(enc_mode)
542
1.37k
#if TUNE_SIMPLIFY_SETTINGS
543
1.37k
        : rtc_tune ? svt_aom_get_disallow_4x4_rtc()
544
#else
545
        : rtc_tune ? svt_aom_get_disallow_4x4_rtc(enc_mode)
546
#endif
547
0
                   : svt_aom_get_disallow_4x4_default(enc_mode);
548
1.37k
    bool    disallow_8x8     = allintra ? svt_aom_get_disallow_8x8_allintra()
549
1.37k
               : rtc_tune ? svt_aom_get_disallow_8x8_rtc(enc_mode, scs->max_input_luma_width, scs->max_input_luma_height)
550
0
                          : svt_aom_get_disallow_8x8_default();
551
1.37k
    uint8_t min_bsize        = disallow_8x8 ? 16 : disallow_4x4 ? 8 : 4;
552
1.37k
    int     blocks_per_depth = (sb_size / min_bsize) * (sb_size / min_bsize);
553
1.37k
    int     blocks_to_alloc  = 0;
554
555
6.85k
    for (int i = min_bsize; i <= sb_size; i <<= 1, blocks_per_depth >>= 2) {
556
5.48k
        blocks_to_alloc += blocks_per_depth;
557
5.48k
    }
558
1.37k
    EB_CALLOC_ARRAY(ctx->mds, blocks_to_alloc);
559
1.37k
    uint32_t mds_idx = 0;
560
1.37k
    setup_mds(scs, ctx->mds, &mds_idx, 0, scs->seq_header.sb_size, min_bsize);
561
1.37k
    EB_CALLOC_ARRAY(ctx->pc_tree, blocks_to_alloc);
562
1.37k
    EB_MALLOC_ARRAY(ctx->tested_blk, blocks_to_alloc);
563
1.37k
    setup_pc_tree(ctx->pc_tree, ctx->tested_blk, 0, scs->seq_header.sb_size, min_bsize);
564
1.37k
    ctx->blocks_to_alloc = blocks_to_alloc;
565
566
1.37k
    bool bypass_encdec = allintra ? svt_aom_get_bypass_encdec_allintra(enc_mode)
567
1.37k
        : rtc_tune                ? svt_aom_get_bypass_encdec_rtc(enc_mode, encoder_bit_depth)
568
0
                                  : svt_aom_get_bypass_encdec_default(enc_mode, encoder_bit_depth);
569
117k
    for (coded_leaf_index = 0; coded_leaf_index < block_max_count_sb; ++coded_leaf_index) {
570
116k
        ctx->md_blk_arr_nsq[coded_leaf_index].av1xd      = ctx->md_blk_arr_nsq[0].av1xd + coded_leaf_index;
571
116k
        ctx->md_blk_arr_nsq[coded_leaf_index].segment_id = 0;
572
116k
        const BlockGeom* blk_geom                        = get_blk_geom_mds(scs->blk_geom_mds, coded_leaf_index);
573
116k
        if (bypass_encdec) {
574
116k
            EbPictureBufferDescInitData init_data;
575
576
116k
            init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
577
116k
            init_data.max_width          = blk_geom->bwidth;
578
116k
            init_data.max_height         = blk_geom->bheight;
579
116k
            init_data.bit_depth          = EB_THIRTYTWO_BIT;
580
116k
            init_data.color_format       = (blk_geom->bwidth > 4 && blk_geom->bheight > 4)
581
116k
                      ? EB_YUV420
582
116k
                      : EB_YUV444; // PW - must have at least 4x4 for chroma coeffs
583
116k
            init_data.border             = 0;
584
116k
            init_data.split_mode         = false;
585
586
116k
            EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data);
587
588
116k
            init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
589
116k
            init_data.max_width          = blk_geom->bwidth;
590
116k
            init_data.max_height         = blk_geom->bheight;
591
116k
            init_data.bit_depth          = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT;
592
116k
            ;
593
116k
            init_data.color_format = (blk_geom->bwidth > 4 && blk_geom->bheight > 4) ? EB_YUV420 : EB_YUV444;
594
116k
            init_data.border       = 0;
595
116k
            init_data.split_mode   = false;
596
597
116k
            EB_NEW(ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp, svt_picture_buffer_desc_ctor, (EbPtr)&init_data);
598
116k
        } else {
599
0
            ctx->md_blk_arr_nsq[coded_leaf_index].coeff_tmp = NULL;
600
0
            ctx->md_blk_arr_nsq[coded_leaf_index].recon_tmp = NULL;
601
0
        }
602
116k
    }
603
8.22k
    for (CandClass cand_class_it = CAND_CLASS_0; cand_class_it < CAND_CLASS_TOTAL; cand_class_it++) {
604
6.85k
        EB_MALLOC_ARRAY(ctx->cand_buff_indices[cand_class_it], ctx->max_nics_uv);
605
6.85k
    }
606
607
1.37k
    EB_MALLOC_ARRAY(ctx->best_candidate_index_array, ctx->max_nics_uv);
608
1.37k
    EB_MALLOC_ARRAY(ctx->above_txfm_context, (sb_size >> MI_SIZE_LOG2));
609
1.37k
    EB_MALLOC_ARRAY(ctx->left_txfm_context, (sb_size >> MI_SIZE_LOG2));
610
1.37k
    EbPictureBufferDescInitData thirty_two_width_picture_buffer_desc_init_data;
611
1.37k
    EbPictureBufferDescInitData picture_buffer_desc_init_data;
612
613
1.37k
    picture_buffer_desc_init_data.max_width          = sb_size;
614
1.37k
    picture_buffer_desc_init_data.max_height         = sb_size;
615
1.37k
    picture_buffer_desc_init_data.bit_depth          = ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT;
616
1.37k
    picture_buffer_desc_init_data.color_format       = EB_YUV420;
617
1.37k
    picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
618
1.37k
    picture_buffer_desc_init_data.border             = 0;
619
1.37k
    picture_buffer_desc_init_data.split_mode         = false;
620
1.37k
    picture_buffer_desc_init_data.is_16bit_pipeline  = false;
621
622
1.37k
    thirty_two_width_picture_buffer_desc_init_data.max_width          = sb_size;
623
1.37k
    thirty_two_width_picture_buffer_desc_init_data.max_height         = sb_size;
624
1.37k
    thirty_two_width_picture_buffer_desc_init_data.bit_depth          = EB_THIRTYTWO_BIT;
625
1.37k
    thirty_two_width_picture_buffer_desc_init_data.color_format       = EB_YUV420;
626
1.37k
    thirty_two_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
627
1.37k
    thirty_two_width_picture_buffer_desc_init_data.border             = 0;
628
1.37k
    thirty_two_width_picture_buffer_desc_init_data.split_mode         = false;
629
1.37k
    thirty_two_width_picture_buffer_desc_init_data.is_16bit_pipeline  = false;
630
23.2k
    for (uint32_t txt_itr = 0; txt_itr < TX_TYPES; ++txt_itr) {
631
21.9k
        EB_NEW(ctx->recon_coeff_ptr[txt_itr],
632
21.9k
               svt_picture_buffer_desc_ctor,
633
21.9k
               (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
634
21.9k
        EB_NEW(ctx->recon_ptr[txt_itr], svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
635
21.9k
        EB_NEW(ctx->quant_coeff_ptr[txt_itr],
636
21.9k
               svt_picture_buffer_desc_ctor,
637
21.9k
               (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
638
21.9k
    }
639
1.37k
    EB_NEW(ctx->tx_coeffs, svt_picture_buffer_desc_ctor, (EbPtr)&thirty_two_width_picture_buffer_desc_init_data);
640
1.37k
    EB_NEW(ctx->scratch_prediction_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
641
1.37k
    EbPictureBufferDescInitData double_width_picture_buffer_desc_init_data;
642
1.37k
    double_width_picture_buffer_desc_init_data.max_width          = sb_size;
643
1.37k
    double_width_picture_buffer_desc_init_data.max_height         = sb_size;
644
1.37k
    double_width_picture_buffer_desc_init_data.bit_depth          = EB_SIXTEEN_BIT;
645
1.37k
    double_width_picture_buffer_desc_init_data.color_format       = EB_YUV420;
646
1.37k
    double_width_picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_FULL_MASK;
647
1.37k
    double_width_picture_buffer_desc_init_data.border             = 0;
648
1.37k
    double_width_picture_buffer_desc_init_data.split_mode         = false;
649
1.37k
    double_width_picture_buffer_desc_init_data.is_16bit_pipeline  = false;
650
651
    // The temp_recon_ptr and temp_residual will be shared by all candidates
652
    // If you want to do something with residual or recon, you need to create one
653
1.37k
    EB_NEW(ctx->temp_recon_ptr, svt_picture_buffer_desc_ctor, (EbPtr)&picture_buffer_desc_init_data);
654
1.37k
    EB_NEW(ctx->temp_residual, svt_picture_buffer_desc_ctor, (EbPtr)&double_width_picture_buffer_desc_init_data);
655
656
    // Candidate Buffers
657
1.37k
    EB_ALLOC_PTR_ARRAY(ctx->cand_bf_ptr_array, ctx->max_nics_uv);
658
659
15.0k
    for (buffer_index = 0; buffer_index < ctx->max_nics; ++buffer_index) {
660
13.7k
        EB_NEW(ctx->cand_bf_ptr_array[buffer_index],
661
13.7k
               svt_aom_mode_decision_cand_bf_ctor,
662
13.7k
               ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
663
13.7k
               sb_size,
664
13.7k
               PICTURE_BUFFER_DESC_FULL_MASK,
665
13.7k
               ctx->temp_residual,
666
13.7k
               ctx->temp_recon_ptr,
667
13.7k
               &(ctx->fast_cost_array[buffer_index]),
668
13.7k
               &(ctx->full_cost_array[buffer_index]),
669
13.7k
               &(ctx->full_cost_ssim_array[buffer_index]));
670
13.7k
    }
671
672
1.37k
    for (buffer_index = max_nics; buffer_index < ctx->max_nics_uv; ++buffer_index) {
673
0
        EB_NEW(ctx->cand_bf_ptr_array[buffer_index],
674
0
               svt_aom_mode_decision_cand_bf_ctor,
675
0
               ctx->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT,
676
0
               sb_size,
677
0
               PICTURE_BUFFER_DESC_CHROMA_MASK,
678
0
               ctx->temp_residual,
679
0
               ctx->temp_recon_ptr,
680
0
               &(ctx->fast_cost_array[buffer_index]),
681
0
               &(ctx->full_cost_array[buffer_index]),
682
0
               &(ctx->full_cost_ssim_array[buffer_index]));
683
0
    }
684
685
1.37k
    return EB_ErrorNone;
686
1.37k
}
687
688
/**************************************************
689
 * Reset Mode Decision Neighbor Arrays
690
 *************************************************/
691
4.65k
void svt_aom_reset_mode_decision_neighbor_arrays(PictureControlSet* pcs, uint16_t tile_idx) {
692
4.65k
    uint8_t depth;
693
18.6k
    for (depth = 0; depth < NA_TOT_CNT; depth++) {
694
13.9k
        svt_aom_neighbor_array_unit_reset(pcs->mdleaf_partition_na[depth][tile_idx]);
695
13.9k
        if (pcs->hbd_md != EB_10_BIT_MD) {
696
13.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na[depth][tile_idx]);
697
13.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na[depth][tile_idx]);
698
13.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na[depth][tile_idx]);
699
13.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na[depth][tile_idx]);
700
13.9k
            svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na[depth][tile_idx]);
701
13.9k
        }
702
13.9k
        if (pcs->hbd_md > EB_8_BIT_MD || (pcs->scs->encoder_bit_depth > EB_EIGHT_BIT && pcs->pic_bypass_encdec)) {
703
0
            svt_aom_neighbor_array_unit_reset(pcs->md_luma_recon_na_16bit[depth][tile_idx]);
704
0
            svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_recon_na_16bit[depth][tile_idx]);
705
0
            svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_2_luma_recon_na_16bit[depth][tile_idx]);
706
0
            svt_aom_neighbor_array_unit_reset(pcs->md_cb_recon_na_16bit[depth][tile_idx]);
707
0
            svt_aom_neighbor_array_unit_reset(pcs->md_cr_recon_na_16bit[depth][tile_idx]);
708
0
        }
709
710
13.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_y_dcs_na[depth][tile_idx]);
711
13.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_tx_depth_1_luma_dc_sign_level_coeff_na[depth][tile_idx]);
712
13.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_cb_dc_sign_level_coeff_na[depth][tile_idx]);
713
13.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_cr_dc_sign_level_coeff_na[depth][tile_idx]);
714
13.9k
        svt_aom_neighbor_array_unit_reset(pcs->md_txfm_context_array[depth][tile_idx]);
715
13.9k
    }
716
717
4.65k
    return;
718
4.65k
}
719
720
// If the ref intra percentage is below the TH, applying modulation to the MD lambda
721
0
#define LAMBDA_MOD_INTRA_TH 50
722
0
#define LAMBDA_MOD_INTRA_SCALING_FACTOR 138
723
724
// Set the lambda for each sb.
725
// When lambda tuning is on (blk_lambda_tuning), lambda of each block is set separately (full_lambda_md/fast_lambda_md)
726
// later in svt_aom_set_tuned_blk_lambda
727
// Testing showed that updating SAD lambda based on frame info was not helpful; therefore, the SAD lambda generation is not changed.
728
5.73k
static void av1_lambda_assign_md(PictureControlSet* pcs, ModeDecisionContext* ctx) {
729
5.73k
    ctx->full_lambda_md[0] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT);
730
5.73k
    ctx->fast_lambda_md[0] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_EIGHT_BIT);
731
5.73k
    ctx->full_lambda_md[1] = svt_aom_compute_rd_mult(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT);
732
5.73k
    ctx->fast_lambda_md[1] = svt_aom_compute_fast_lambda(pcs, ctx->qp_index, ctx->me_q_index, EB_TEN_BIT);
733
734
5.73k
    if (!pcs->scs->static_config.rtc && pcs->scs->stats_based_sb_lambda_modulation) {
735
5.73k
        if (pcs->temporal_layer_index > 0) {
736
0
            if (pcs->ref_intra_percentage < LAMBDA_MOD_INTRA_TH) {
737
0
                ctx->full_lambda_md[0] = (ctx->full_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7;
738
0
                ctx->fast_lambda_md[0] = (ctx->fast_lambda_md[0] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7;
739
0
                ctx->full_lambda_md[1] = (ctx->full_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7;
740
0
                ctx->fast_lambda_md[1] = (ctx->fast_lambda_md[1] * LAMBDA_MOD_INTRA_SCALING_FACTOR) >> 7;
741
0
            }
742
0
        }
743
5.73k
    }
744
745
5.73k
    if (pcs->lambda_weight) {
746
3.10k
        ctx->full_lambda_md[0] = (uint32_t)((ctx->full_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7);
747
3.10k
        ctx->fast_lambda_md[0] = (uint32_t)((ctx->fast_lambda_md[0] * (uint64_t)pcs->lambda_weight) >> 7);
748
3.10k
        ctx->full_lambda_md[1] = (uint32_t)((ctx->full_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7);
749
3.10k
        ctx->fast_lambda_md[1] = (uint32_t)((ctx->fast_lambda_md[1] * (uint64_t)pcs->lambda_weight) >> 7);
750
3.10k
    }
751
5.73k
    ctx->full_lambda_md[1] *= 16;
752
5.73k
    ctx->fast_lambda_md[1] *= 4;
753
754
5.73k
    SequenceControlSet* scs          = pcs->scs;
755
5.73k
    uint64_t            scale_factor = scs->static_config.lambda_scale_factors[pcs->ppcs->update_type];
756
5.73k
    ctx->full_lambda_md[0]           = (uint32_t)((ctx->full_lambda_md[0] * scale_factor) >> 7);
757
5.73k
    ctx->full_lambda_md[1]           = (uint32_t)((ctx->full_lambda_md[1] * scale_factor) >> 7);
758
5.73k
    ctx->fast_lambda_md[0]           = (uint32_t)((ctx->fast_lambda_md[0] * scale_factor) >> 7);
759
5.73k
    ctx->fast_lambda_md[1]           = (uint32_t)((ctx->fast_lambda_md[1] * scale_factor) >> 7);
760
761
5.73k
    ctx->full_sb_lambda_md[0] = ctx->full_lambda_md[0];
762
5.73k
    ctx->full_sb_lambda_md[1] = ctx->full_lambda_md[1];
763
5.73k
}
764
765
void svt_aom_reset_mode_decision(SequenceControlSet* scs, ModeDecisionContext* ctx, PictureControlSet* pcs,
766
4.75k
                                 uint16_t tile_group_idx, uint32_t segment_index) {
767
#if !OPT_LPD1_FAST_SKIP
768
    const bool rtc_tune = scs->static_config.rtc;
769
#endif
770
4.75k
    ctx->hbd_md = pcs->hbd_md;
771
    // Reset MD rate Estimation table to initial values by copying from md_rate_est_ctx
772
4.75k
    ctx->md_rate_est_ctx = pcs->md_rate_est_ctx;
773
    // Reset CABAC Contexts
774
775
    // Reset Neighbor Arrays at start of new Segment / Picture
776
4.75k
    if (segment_index == 0) {
777
431
        for (uint16_t r = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_y;
778
1.81k
             r < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_y;
779
1.38k
             r++) {
780
1.38k
            for (uint16_t c = pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_start_x;
781
6.03k
                 c < pcs->ppcs->tile_group_info[tile_group_idx].tile_group_tile_end_x;
782
4.65k
                 c++) {
783
4.65k
                uint16_t tile_idx = c + r * pcs->ppcs->av1_cm->tiles_info.tile_cols;
784
4.65k
                svt_aom_reset_mode_decision_neighbor_arrays(pcs, tile_idx);
785
4.65k
            }
786
1.38k
        }
787
431
        (void)scs;
788
431
    }
789
    //each segment enherits the bypass encdec from the picture level
790
4.75k
    ctx->bypass_encdec = pcs->pic_bypass_encdec;
791
#if !OPT_LPD1_FAST_SKIP
792
    if (!rtc_tune && (pcs->enc_mode <= ENC_M11 || pcs->temporal_layer_index != 0)) {
793
        ctx->rtc_use_N4_dct_dct_shortcut = 1;
794
    } else {
795
        ctx->rtc_use_N4_dct_dct_shortcut = 0;
796
    }
797
#endif
798
4.75k
    return;
799
4.75k
}
800
801
/******************************************************
802
 * Mode Decision Configure SB
803
 ******************************************************/
804
void svt_aom_mode_decision_configure_sb(ModeDecisionContext* ctx, PictureControlSet* pcs, uint8_t sb_qp,
805
5.73k
                                        uint8_t me_sb_qp) {
806
    /* Note(CHKN) : when Qp modulation varies QP on a sub-SB(CU) basis,  Lamda has to change based on Cu->QP , and then this code has to move inside the CU loop in MD */
807
808
    // Lambda Assignement
809
5.73k
    ctx->qp_index = pcs->ppcs->frm_hdr.delta_q_params.delta_q_present || pcs->ppcs->r0_delta_qp_md
810
5.73k
        ? sb_qp
811
5.73k
        : (uint8_t)pcs->ppcs->frm_hdr.quantization_params.base_q_idx;
812
813
5.73k
    ctx->me_q_index = me_sb_qp;
814
815
5.73k
    av1_lambda_assign_md(pcs, ctx);
816
817
5.73k
    ctx->hbd_pack_done = 0;
818
819
5.73k
    return;
820
5.73k
}