Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/rd_cost.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 2 Clause License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
/***************************************
14
* Includes
15
***************************************/
16
#include "rd_cost.h"
17
#include "common_utils.h"
18
#include "aom_dsp_rtcd.h"
19
#include "svt_log.h"
20
#include "enc_inter_prediction.h"
21
#include "full_loop.h"
22
#include "entropy_coding.h"
23
24
#include <assert.h>
25
26
0
#define MV_COST_WEIGHT 108
27
int svt_aom_get_reference_mode_context_new(const MacroBlockD* xd);
28
int svt_av1_get_pred_context_uni_comp_ref_p(const MacroBlockD* xd);
29
int svt_av1_get_pred_context_uni_comp_ref_p1(const MacroBlockD* xd);
30
int svt_av1_get_pred_context_uni_comp_ref_p2(const MacroBlockD* xd);
31
int svt_aom_get_comp_reference_type_context_new(const MacroBlockD* xd);
32
33
int  svt_aom_get_palette_bsize_ctx(BlockSize bsize);
34
int  svt_aom_get_palette_mode_ctx(const MacroBlockD* xd);
35
int  svt_aom_write_uniform_cost(int n, int v);
36
int  svt_get_palette_cache_y(const MacroBlockD* const xd, uint16_t* cache);
37
int  svt_av1_palette_color_cost_y(const PaletteModeInfo* const pmi, uint16_t* color_cache, const int palette_size,
38
                                  int n_cache, int bit_depth);
39
int  svt_av1_cost_color_map(ModeDecisionCandidate* cand, MdRateEstimationContext* rate_table,
40
41
                            BlkStruct* blk_ptr, int plane, BlockSize bsize, COLOR_MAP_TYPE type);
42
void svt_aom_get_block_dimensions(BlockSize bsize, int plane, const MacroBlockD* xd, int* width, int* height,
43
                                  int* rows_within_bounds, int* cols_within_bounds);
44
int  svt_aom_allow_palette(int allow_screen_content_tools, BlockSize bsize);
45
int  svt_aom_allow_intrabc(const FrameHeader* frm_hdr, SliceType slice_type);
46
47
0
MvJointType svt_av1_get_mv_joint(const Mv* mv) {
48
0
    if (mv->y == 0) {
49
0
        return mv->x == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
50
0
    } else {
51
0
        return mv->x == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
52
0
    }
53
0
}
54
55
0
static int32_t mv_cost(const Mv* mv, const int32_t* joint_cost, const int32_t* const comp_cost[2]) {
56
0
    int32_t jn_c = svt_av1_get_mv_joint(mv);
57
0
    int32_t res  = joint_cost[jn_c] + comp_cost[0][CLIP3(MV_LOW, MV_UPP, mv->y)] +
58
0
        comp_cost[1][CLIP3(MV_LOW, MV_UPP, mv->x)];
59
0
    return res;
60
0
}
61
62
0
int32_t svt_av1_mv_bit_cost_light(const Mv* mv, const Mv* ref) {
63
0
    const uint32_t factor     = 50;
64
0
    const uint32_t absmvdiffx = ABS(mv->x - ref->x);
65
0
    const uint32_t absmvdiffy = ABS(mv->y - ref->y);
66
0
    const uint32_t mv_rate    = 1296 + (factor * (absmvdiffx + absmvdiffy));
67
0
    return mv_rate;
68
0
}
69
70
int32_t svt_av1_mv_bit_cost(const Mv* mv, const Mv* ref, const int32_t* mvjcost, const int32_t* const mvcost[2],
71
0
                            int32_t weight) {
72
    // Restrict the size of the MV diff to be within the max AV1 range.  If the MV diff
73
    // is outside this range, the diff will index beyond the cost array, causing a seg fault.
74
    // Both the MVs and the MV diffs should be within the allowable range for accessing the MV cost
75
    // infrastructure.
76
0
    const int16_t x         = MIN(MAX(mv->x - ref->x, MV_LOW), MV_UPP);
77
0
    const int16_t y         = MIN(MAX(mv->y - ref->y, MV_LOW), MV_UPP);
78
0
    Mv            temp_diff = {{x, y}};
79
80
0
    return ROUND_POWER_OF_TWO(mv_cost(&temp_diff, mvjcost, mvcost) * weight, 7);
81
0
}
82
83
/////////////////////////////COEFFICIENT CALCULATION //////////////////////////////////////////////
84
9.33k
static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
85
9.33k
    if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
86
9.33k
        const int32_t r      = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
87
9.33k
        const int32_t length = get_msb(r) + 1;
88
9.33k
        return av1_cost_literal(2 * length - 1);
89
9.33k
    }
90
0
    return 0;
91
9.33k
}
92
93
void svt_av1_txb_init_levels_c(const TranLow* const coeff, const int32_t width, const int32_t height,
94
0
                               uint8_t* const levels) {
95
0
    const int32_t stride = width + TX_PAD_HOR;
96
0
    uint8_t*      ls     = levels;
97
98
0
    memset(levels - TX_PAD_TOP * stride, 0, sizeof(*levels) * TX_PAD_TOP * stride);
99
0
    memset(levels + stride * height, 0, sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END));
100
101
0
    for (int32_t i = 0; i < height; i++) {
102
0
        for (int32_t j = 0; j < width; j++) {
103
0
            *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
104
0
        }
105
0
        for (int32_t j = 0; j < TX_PAD_HOR; j++) {
106
0
            *ls++ = 0;
107
0
        }
108
0
    }
109
0
}
110
111
static int32_t av1_transform_type_rate_estimation(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* fc,
112
                                                  ModeDecisionCandidateBuffer* cand_bf, bool is_inter,
113
                                                  TxSize transform_size, TxType transform_type,
114
0
                                                  bool reduced_tx_set_used) {
115
    // const MbModeInfo *mbmi = &xd->mi[0]->mbmi;
116
    // const int32_t is_inter = is_inter_block(mbmi);
117
118
0
    if (get_ext_tx_types(transform_size, is_inter, reduced_tx_set_used) >
119
0
        1 /*&&    !xd->lossless[xd->mi[0]->mbmi.segment_id]  WE ARE NOT LOSSLESS*/) {
120
0
        const TxSize square_tx_size = txsize_sqr_map[transform_size];
121
0
        assert(square_tx_size < EXT_TX_SIZES);
122
123
0
        const int32_t ext_tx_set = get_ext_tx_set(transform_size, is_inter, reduced_tx_set_used);
124
0
        if (is_inter) {
125
0
            if (ext_tx_set > 0) {
126
0
                if (allow_update_cdf) {
127
0
                    const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
128
129
0
                    update_cdf(fc->inter_ext_tx_cdf[ext_tx_set][square_tx_size],
130
0
                               av1_ext_tx_ind[tx_set_type][transform_type],
131
0
                               av1_num_ext_tx_set[tx_set_type]);
132
0
                }
133
0
                return ctx->md_rate_est_ctx->inter_tx_type_fac_bits[ext_tx_set][square_tx_size][transform_type];
134
0
            }
135
0
        } else {
136
0
            if (ext_tx_set > 0) {
137
0
                PredictionMode intra_dir;
138
0
                if (cand_bf->cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) {
139
0
                    intra_dir = fimode_to_intradir[cand_bf->cand->block_mi.filter_intra_mode];
140
0
                } else {
141
0
                    intra_dir = cand_bf->cand->block_mi.mode;
142
0
                }
143
0
                assert(intra_dir < INTRA_MODES);
144
0
                const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
145
146
0
                if (allow_update_cdf) {
147
0
                    update_cdf(fc->intra_ext_tx_cdf[ext_tx_set][square_tx_size][intra_dir],
148
0
                               av1_ext_tx_ind[tx_set_type][transform_type],
149
0
                               av1_num_ext_tx_set[tx_set_type]);
150
0
                }
151
0
                return ctx->md_rate_est_ctx
152
0
                    ->intra_tx_type_fac_bits[ext_tx_set][square_tx_size][intra_dir][transform_type];
153
0
            }
154
0
        }
155
0
    }
156
0
    return 0;
157
0
}
158
159
// Update the eob-related CDFs. Function assumes allow_update_cdf is true
160
// as the only action of the function is to update the CDFs.
161
0
static void update_eob_context(int eob, TxSize tx_size, TxClass tx_class, PlaneType plane, FRAME_CONTEXT* ec_ctx) {
162
0
    int          eob_extra;
163
0
    const int    eob_pt  = get_eob_pos_token(eob, &eob_extra);
164
0
    const TxSize txs_ctx = (TxSize)((txsize_sqr_map[tx_size] + txsize_sqr_up_map[tx_size] + 1) >> 1);
165
0
    assert(txs_ctx < TX_SIZES);
166
0
    const int eob_multi_size = txsize_log2_minus4[tx_size];
167
0
    const int eob_multi_ctx  = (tx_class == TX_CLASS_2D) ? 0 : 1;
168
169
0
    switch (eob_multi_size) {
170
0
    case 0:
171
0
        update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
172
0
        break;
173
0
    case 1:
174
0
        update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
175
0
        break;
176
0
    case 2:
177
0
        update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
178
0
        break;
179
0
    case 3:
180
0
        update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1, 8);
181
0
        break;
182
0
    case 4:
183
0
        update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1, 9);
184
0
        break;
185
0
    case 5:
186
0
        update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1, 10);
187
0
        break;
188
0
    case 6:
189
0
    default:
190
0
        update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1, 11);
191
0
        break;
192
0
    }
193
194
0
    const int eob_offset_bits = svt_aom_eob_offset_bits[eob_pt];
195
0
    if (eob_offset_bits > 0) {
196
0
        const int eob_ctx   = eob_pt - 3;
197
0
        const int eob_shift = eob_offset_bits - 1;
198
0
        const int bit       = (eob_extra & (1 << eob_shift)) ? 1 : 0;
199
0
        update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2);
200
0
    }
201
0
}
202
203
// Transform end of block bit estimation
204
21.2k
int get_eob_cost(int eob, const LvMapEobCost* txb_eob_costs, const LvMapCoeffCost* txb_costs, TxClass tx_class) {
205
21.2k
    int       eob_extra;
206
21.2k
    const int eob_pt        = get_eob_pos_token(eob, &eob_extra);
207
21.2k
    const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
208
21.2k
    int       eob_cost      = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
209
210
21.2k
    const int eob_offset_bits = svt_aom_eob_offset_bits[eob_pt];
211
21.2k
    if (eob_offset_bits > 0) {
212
0
        const int eob_ctx   = eob_pt - 3;
213
0
        const int eob_shift = eob_offset_bits - 1;
214
0
        const int bit       = (eob_extra & (1 << eob_shift)) ? 1 : 0;
215
0
        eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit];
216
0
        if (eob_offset_bits > 1) {
217
0
            eob_cost += av1_cost_literal(eob_offset_bits - 1);
218
0
        }
219
0
    }
220
21.2k
    return eob_cost;
221
21.2k
}
222
223
static INLINE int32_t av1_cost_skip_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
224
243k
                                        TxSize transform_size, PlaneType plane_type, int16_t txb_skip_ctx) {
225
243k
    const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1);
226
243k
    assert(txs_ctx < TX_SIZES);
227
243k
    const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type];
228
243k
    if (allow_update_cdf) {
229
0
        update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 1, 2);
230
0
    }
231
243k
    return coeff_costs->txb_skip_cost[txb_skip_ctx][1];
232
243k
}
233
234
static INLINE int32_t av1_cost_coeffs_txb_loop_cost_one_eob(const TranLow* const qcoeff, int8_t* const coeff_contexts,
235
10.9k
                                                            const LvMapCoeffCost* coeff_costs, int16_t dc_sign_ctx) {
236
10.9k
    const TranLow v         = qcoeff[0];
237
10.9k
    const int32_t level     = abs(v);
238
10.9k
    const int32_t coeff_ctx = coeff_contexts[0];
239
240
10.9k
    assert((AOMMIN(level, 3) - 1) >= 0);
241
10.9k
    int32_t cost = coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
242
243
10.9k
    if (v != 0) {
244
10.9k
        const int32_t sign = (v < 0) ? 1 : 0;
245
        // sign bit cost
246
10.9k
        cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
247
248
10.9k
        if (level > NUM_BASE_LEVELS) {
249
9.98k
            const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
250
251
9.98k
            if (base_range < COEFF_BASE_RANGE) {
252
644
                cost += coeff_costs->lps_cost[0][base_range];
253
9.33k
            } else {
254
9.33k
                cost += coeff_costs->lps_cost[0][COEFF_BASE_RANGE];
255
9.33k
            }
256
257
9.98k
            if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
258
9.33k
                cost += get_golomb_cost(level);
259
9.33k
            }
260
9.98k
        }
261
10.9k
    }
262
10.9k
    return cost;
263
10.9k
}
264
265
static INLINE int32_t av1_cost_coeffs_txb_loop_cost_eob(ModeDecisionContext* md_ctx, uint16_t eob,
266
                                                        const int16_t* const scan, const TranLow* const qcoeff,
267
                                                        int8_t* const coeff_contexts, const LvMapCoeffCost* coeff_costs,
268
                                                        int16_t dc_sign_ctx, uint8_t* const levels, const int32_t bwl,
269
10.9k
                                                        TxType transform_type) {
270
10.9k
    const uint32_t cost_literal = av1_cost_literal(1);
271
10.9k
    int32_t        cost         = 0;
272
273
    //Optimized/simplified function when eob is 1
274
10.9k
    if (eob == 1) {
275
10.9k
        return av1_cost_coeffs_txb_loop_cost_one_eob(qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx);
276
10.9k
    }
277
278
    //  first (eob - 1) index
279
0
    {
280
0
        const int32_t pos       = scan[eob - 1];
281
0
        const TranLow v         = qcoeff[pos];
282
0
        const int32_t level     = abs(v);
283
0
        const int32_t coeff_ctx = coeff_contexts[pos];
284
285
0
        assert((AOMMIN(level, 3) - 1) >= 0);
286
0
        cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
287
288
0
        if (v != 0) {
289
0
            cost += cost_literal;
290
0
            if (level > NUM_BASE_LEVELS) {
291
0
                int32_t       ctx        = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]);
292
0
                const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
293
294
0
                if (base_range < COEFF_BASE_RANGE) {
295
0
                    cost += coeff_costs->lps_cost[ctx][base_range];
296
0
                } else {
297
0
                    cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
298
0
                }
299
300
0
                if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
301
0
                    cost += get_golomb_cost(level);
302
0
                }
303
0
            }
304
0
        }
305
0
    }
306
    // last (0) index
307
0
    {
308
0
        const TranLow v         = qcoeff[0];
309
0
        const int32_t level     = abs(v);
310
0
        const int32_t coeff_ctx = coeff_contexts[0];
311
312
0
        cost += coeff_costs->base_cost[coeff_ctx][AOMMIN(level, 3)];
313
314
0
        if (v != 0) {
315
0
            const int32_t sign = (v < 0) ? 1 : 0;
316
            // sign bit cost
317
318
0
            cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
319
320
0
            if (level > NUM_BASE_LEVELS) {
321
0
                int32_t       ctx        = get_br_ctx(levels, 0, bwl, tx_type_to_class[transform_type]);
322
0
                const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
323
324
0
                if (base_range < COEFF_BASE_RANGE) {
325
0
                    cost += coeff_costs->lps_cost[ctx][base_range];
326
0
                } else {
327
0
                    cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
328
0
                }
329
330
0
                if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
331
0
                    cost += get_golomb_cost(level);
332
0
                }
333
0
            }
334
0
        }
335
0
    }
336
0
    int32_t c;
337
    /* Optimized Loop, omitted first (eob - 1) and last (0) index */
338
    // Estimate the rate of the first(eob / fast_coeff_est_level) coeff(s), DC and last coeff only
339
0
    int32_t  c_start = MIN(eob - 2, eob / MAX(1, (int)(md_ctx->mds_fast_coeff_est_level - md_ctx->mds_subres_step)));
340
0
    uint32_t cost_literal_cnt = 0;
341
0
    for (c = c_start; c >= 1; --c) {
342
0
        const int32_t pos = scan[c];
343
0
        cost_literal_cnt += !!(qcoeff[pos]);
344
0
        const int32_t level = abs(qcoeff[pos]);
345
0
        if (level > NUM_BASE_LEVELS) {
346
0
            int32_t       ctx        = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]);
347
0
            const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
348
349
0
            cost += coeff_costs->base_cost[coeff_contexts[pos]][3];
350
0
            if (base_range < COEFF_BASE_RANGE) {
351
0
                cost += coeff_costs->lps_cost[ctx][base_range];
352
0
            } else {
353
0
                cost += get_golomb_cost(level) + coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
354
0
            }
355
0
        } else {
356
0
            cost += coeff_costs->base_cost[coeff_contexts[pos]][level];
357
0
        }
358
0
    }
359
0
    cost += cost_literal_cnt * cost_literal;
360
361
0
    return cost;
362
10.9k
}
363
364
// Note: don't call this function when eob is 0.
365
uint64_t svt_av1_cost_coeffs_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
366
                                 ModeDecisionCandidateBuffer* cand_bf, const TranLow* const qcoeff, uint16_t eob,
367
                                 PlaneType plane_type, TxSize transform_size, TxType transform_type,
368
                                 int16_t txb_skip_ctx, int16_t dc_sign_ctx, bool reduced_transform_set_flag)
369
370
10.9k
{
371
    //Note: there is a different version of this function in AOM that seems to be efficient as its name is:
372
    //warehouse_efficients_txb
373
374
10.9k
    const TxSize  txs_ctx  = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1);
375
10.9k
    const TxClass tx_class = tx_type_to_class[transform_type];
376
10.9k
    int32_t       cost;
377
10.9k
    const int32_t bwl    = get_txb_bwl(transform_size);
378
10.9k
    const int32_t width  = get_txb_wide(transform_size);
379
10.9k
    const int32_t height = get_txb_high(transform_size);
380
381
10.9k
    const ScanOrder* const scan_order = get_scan_order(transform_size, transform_type);
382
10.9k
    const int16_t* const   scan       = scan_order->scan;
383
10.9k
    uint8_t                levels_buf[TX_PAD_2D];
384
10.9k
    uint8_t* const         levels = set_levels(levels_buf, width);
385
10.9k
    DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]);
386
10.9k
    assert(txs_ctx < TX_SIZES);
387
10.9k
    const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type];
388
389
10.9k
    const int32_t             eob_multi_size = txsize_log2_minus4[transform_size];
390
10.9k
    const LvMapEobCost* const eob_bits       = &ctx->md_rate_est_ctx->eob_frac_bits[eob_multi_size][plane_type];
391
    // eob must be greater than 0 here.
392
10.9k
    assert(eob > 0);
393
10.9k
    cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
394
395
10.9k
    if (allow_update_cdf) {
396
0
        update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], eob == 0, 2);
397
0
    }
398
399
10.9k
    if (eob > 1) {
400
0
        svt_av1_txb_init_levels(qcoeff,
401
0
                                width,
402
0
                                height,
403
0
                                levels); // NM - Needs to be optimized - to be combined with the quantisation.
404
0
    }
405
10.9k
    const bool is_inter = is_inter_mode(cand_bf->cand->block_mi.mode);
406
    // Transform type bit estimation
407
10.9k
    cost += plane_type > PLANE_TYPE_Y ? 0
408
10.9k
                                      : av1_transform_type_rate_estimation(ctx,
409
1
                                                                           allow_update_cdf,
410
1
                                                                           ec_ctx,
411
1
                                                                           cand_bf,
412
1
                                                                           is_inter,
413
1
                                                                           transform_size,
414
1
                                                                           transform_type,
415
1
                                                                           reduced_transform_set_flag);
416
417
    // Transform eob bit estimation
418
10.9k
    cost += get_eob_cost(eob, eob_bits, coeff_costs, tx_class);
419
10.9k
    if (allow_update_cdf) {
420
0
        update_eob_context(eob, transform_size, tx_class, plane_type, ec_ctx);
421
0
    }
422
    // Transform non-zero coeff bit estimation
423
10.9k
    svt_av1_get_nz_map_contexts(levels,
424
10.9k
                                scan,
425
10.9k
                                eob,
426
10.9k
                                transform_size,
427
10.9k
                                tx_class,
428
10.9k
                                coeff_contexts); // NM - Assembly version is available in AOM
429
10.9k
    assert(eob <= width * height);
430
10.9k
    if (allow_update_cdf) {
431
0
        for (int c = eob - 1; c >= 0; --c) {
432
0
            const int     pos       = scan[c];
433
0
            const int     coeff_ctx = coeff_contexts[pos];
434
0
            const TranLow v         = qcoeff[pos];
435
0
            const TranLow level     = abs(v);
436
0
            if (c == eob - 1) {
437
0
                assert(coeff_ctx < 4);
438
0
                update_cdf(ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3) - 1, 3);
439
0
            } else {
440
0
                update_cdf(ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3), 4);
441
0
            }
442
443
0
            {
444
0
                if (c == eob - 1) {
445
0
                    assert(coeff_ctx < 4);
446
#if CONFIG_ENTROPY_STATS
447
                    ++td->counts
448
                          ->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type][coeff_ctx][AOMMIN(level, 3) - 1];
449
                } else {
450
                    ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type][coeff_ctx][AOMMIN(level, 3)];
451
#endif
452
0
                }
453
0
            }
454
455
0
            if (level > NUM_BASE_LEVELS) {
456
0
                const int base_range = level - 1 - NUM_BASE_LEVELS;
457
0
                int       br_ctx;
458
0
                if (eob == 1) {
459
0
                    br_ctx = 0;
460
0
                } else {
461
0
                    br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
462
0
                }
463
464
0
                for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
465
0
                    const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
466
0
                    update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx], k, BR_CDF_SIZE);
467
0
                    for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
468
#if CONFIG_ENTROPY_STATS
469
                        ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps][br_ctx][lps == k];
470
#endif // CONFIG_ENTROPY_STATS
471
0
                        if (lps == k) {
472
0
                            break;
473
0
                        }
474
0
                    }
475
#if CONFIG_ENTROPY_STATS
476
                    ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)][plane_type][br_ctx][k];
477
#endif
478
0
                    if (k < BR_CDF_SIZE - 1) {
479
0
                        break;
480
0
                    }
481
0
                }
482
0
            }
483
0
        }
484
485
0
        if (qcoeff[0] != 0) {
486
0
            update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], qcoeff[0] < 0, 2);
487
0
        }
488
489
        //TODO: CHKN  for 128x128 where we need more than one TXb, we need to update the txb_context(dc_sign+skip_ctx) in a Txb basis.
490
491
0
        return 0;
492
0
    }
493
494
10.9k
    cost += av1_cost_coeffs_txb_loop_cost_eob(
495
10.9k
        ctx, eob, scan, qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx, levels, bwl, transform_type);
496
10.9k
    return cost;
497
10.9k
}
498
499
uint64_t svt_aom_get_intra_uv_fast_rate(PictureControlSet* pcs, ModeDecisionContext* ctx,
500
127k
                                        ModeDecisionCandidateBuffer* cand_bf, bool use_accurate_cfl) {
501
127k
    const BlockGeom* const blk_geom = ctx->blk_geom;
502
127k
    ModeDecisionCandidate* cand     = cand_bf->cand;
503
127k
    assert(ctx->has_uv);
504
127k
    assert(!(svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc));
505
127k
    MdRateEstimationContext* md_rate_est_ctx = ctx->md_rate_est_ctx;
506
127k
    const uint8_t            is_cfl_allowed  = (blk_geom->bwidth <= 32 && blk_geom->bheight <= 32) ? 1 : 0;
507
127k
    PredictionMode           intra_mode      = (PredictionMode)cand->block_mi.mode;
508
    // If CFL alphas are not known yet, calculate the chroma mode bits based on DC Mode. If CFL is selected the chroma mode bits must be updated later
509
127k
    const UvPredictionMode chroma_mode = cand->block_mi.uv_mode == UV_CFL_PRED && !use_accurate_cfl
510
127k
        ? UV_DC_PRED
511
127k
        : cand->block_mi.uv_mode;
512
127k
    const uint32_t         mi_row      = ctx->blk_org_y >> MI_SIZE_LOG2;
513
127k
    const uint32_t         mi_col      = ctx->blk_org_x >> MI_SIZE_LOG2;
514
    // Subsampling assumes YUV 420 content
515
127k
    const uint8_t ss_x = 1;
516
127k
    const uint8_t ss_y = 1;
517
518
127k
    uint64_t chroma_rate = 0;
519
    // Estimate chroma nominal intra mode bits
520
127k
    chroma_rate += (uint64_t)md_rate_est_ctx->intra_uv_mode_fac_bits[is_cfl_allowed][intra_mode][chroma_mode];
521
522
    // Estimate chroma angular mode bits; angular offset only allow for bsize >= 8x8
523
127k
    if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(get_uv_mode(chroma_mode))) {
524
0
        chroma_rate +=
525
0
            md_rate_est_ctx->angle_delta_fac_bits[chroma_mode - V_PRED]
526
0
                                                 [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_UV]];
527
0
    }
528
529
    // Estimate CFL factor bits when CFL is used
530
127k
    if (chroma_mode == UV_CFL_PRED) {
531
0
        chroma_rate += (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_U]
532
0
                                                                    [CFL_IDX_U(cand->block_mi.cfl_alpha_idx)] +
533
0
            (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_V]
534
0
                                                         [CFL_IDX_V(cand->block_mi.cfl_alpha_idx)];
535
0
    }
536
537
    // Estimate chroma palette mode bits (currently not supported, so just cost of signalling off)
538
127k
    if (chroma_mode == UV_DC_PRED &&
539
127k
        svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) &&
540
0
        is_chroma_reference(mi_row, mi_col, blk_geom->bsize, ss_x, ss_y)) {
541
0
        const int use_palette_y  = cand->palette_info && (cand->palette_size[0] > 0);
542
0
        const int use_palette_uv = cand->palette_info && (cand->palette_size[1] > 0);
543
0
        chroma_rate += ctx->md_rate_est_ctx->palette_uv_mode_fac_bits[use_palette_y][use_palette_uv];
544
0
    }
545
546
127k
    return chroma_rate;
547
127k
}
548
549
uint64_t svt_aom_intra_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
550
127k
                                 uint64_t lambda, uint64_t luma_distortion) {
551
127k
    const BlockGeom*       blk_geom = ctx->blk_geom;
552
127k
    BlkStruct*             blk_ptr  = ctx->blk_ptr;
553
127k
    ModeDecisionCandidate* cand     = cand_bf->cand;
554
127k
    if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc) {
555
0
        uint64_t rate = 0;
556
557
0
        Mv         mv        = {.as_int = cand->block_mi.mv[0].as_int};
558
0
        Mv         ref_mv    = {.as_int = cand->pred_mv[0].as_int};
559
0
        const int* dvcost[2] = {(int*)&ctx->md_rate_est_ctx->dv_cost[0][MV_MAX],
560
0
                                (int*)&ctx->md_rate_est_ctx->dv_cost[1][MV_MAX]};
561
0
        int32_t    mv_rate   = svt_av1_mv_bit_cost(
562
0
            &mv, &ref_mv, ctx->md_rate_est_ctx->dv_joint_cost, dvcost, MV_COST_WEIGHT_SUB);
563
564
0
        rate                      = mv_rate + ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc];
565
0
        cand_bf->fast_luma_rate   = rate;
566
0
        cand_bf->fast_chroma_rate = 0;
567
0
        return (RDCOST(lambda, rate, luma_distortion));
568
127k
    } else {
569
        // Number of bits for each synatax element
570
127k
        uint64_t       intra_mode_bits_num          = 0;
571
127k
        uint64_t       intra_luma_mode_bits_num     = 0;
572
127k
        uint64_t       intra_luma_ang_mode_bits_num = 0;
573
127k
        uint64_t       intra_filter_mode_bits_num   = 0;
574
127k
        uint64_t       skip_mode_rate               = 0;
575
127k
        const uint8_t  skip_mode_ctx                = ctx->skip_mode_ctx;
576
127k
        PredictionMode intra_mode                   = (PredictionMode)cand->block_mi.mode;
577
        // Luma and chroma rate
578
127k
        uint32_t rate;
579
127k
        uint32_t luma_rate   = 0;
580
127k
        uint32_t chroma_rate = 0;
581
127k
        intra_mode_bits_num  = pcs->slice_type != I_SLICE
582
127k
             ? (uint64_t)ctx->md_rate_est_ctx->mb_mode_fac_bits[eb_size_group_lookup[blk_geom->bsize]][intra_mode]
583
127k
             : ZERO_COST;
584
585
127k
        skip_mode_rate = pcs->slice_type != I_SLICE && pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag &&
586
0
                is_comp_ref_allowed(blk_geom->bsize)
587
127k
            ? (uint64_t)ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0]
588
127k
            : ZERO_COST;
589
        // Estimate luma nominal intra mode bits for key frame
590
127k
        intra_luma_mode_bits_num = pcs->slice_type == I_SLICE
591
127k
            ? (uint64_t)
592
127k
                  ctx->md_rate_est_ctx->y_mode_fac_bits[ctx->intra_luma_top_ctx][ctx->intra_luma_left_ctx][intra_mode]
593
127k
            : ZERO_COST;
594
        // Estimate luma angular mode bits
595
127k
        if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(cand->block_mi.mode)) {
596
0
            assert((intra_mode - V_PRED) < 8);
597
0
            assert((intra_mode - V_PRED) >= 0);
598
0
            intra_luma_ang_mode_bits_num =
599
0
                ctx->md_rate_est_ctx->angle_delta_fac_bits[intra_mode - V_PRED]
600
0
                                                          [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_Y]];
601
0
        }
602
127k
        if (svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) &&
603
0
            intra_mode == DC_PRED) {
604
0
            const int use_palette = cand->palette_info ? (cand->palette_size[0] > 0) : 0;
605
0
            const int bsize_ctx   = svt_aom_get_palette_bsize_ctx(blk_geom->bsize);
606
0
            const int mode_ctx    = svt_aom_get_palette_mode_ctx(blk_ptr->av1xd);
607
0
            intra_luma_mode_bits_num += ctx->md_rate_est_ctx->palette_ymode_fac_bits[bsize_ctx][mode_ctx][use_palette];
608
0
            if (use_palette) {
609
0
                const uint8_t* const color_map = cand->palette_info->color_idx_map;
610
0
                int                  block_width, block_height, rows, cols;
611
0
                svt_aom_get_block_dimensions(
612
0
                    blk_geom->bsize, 0, blk_ptr->av1xd, &block_width, &block_height, &rows, &cols);
613
0
                const int plt_size = cand->palette_size[0];
614
0
                int       palette_mode_cost =
615
0
                    ctx->md_rate_est_ctx->palette_ysize_fac_bits[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
616
0
                    svt_aom_write_uniform_cost(plt_size, color_map[0]);
617
0
                uint16_t  color_cache[2 * PALETTE_MAX_SIZE];
618
0
                const int n_cache = svt_get_palette_cache_y(blk_ptr->av1xd, color_cache);
619
0
                palette_mode_cost += svt_av1_palette_color_cost_y(&cand->palette_info->pmi,
620
0
                                                                  color_cache,
621
0
                                                                  cand->palette_size[0],
622
0
                                                                  n_cache,
623
0
                                                                  pcs->ppcs->scs->encoder_bit_depth);
624
0
                palette_mode_cost += svt_av1_cost_color_map(
625
0
                    cand, ctx->md_rate_est_ctx, blk_ptr, 0, blk_geom->bsize, PALETTE_MAP);
626
0
                intra_luma_mode_bits_num += palette_mode_cost;
627
0
            }
628
0
        }
629
630
127k
        if (svt_aom_filter_intra_allowed(pcs->ppcs->scs->seq_header.filter_intra_level,
631
127k
                                         blk_geom->bsize,
632
127k
                                         cand->palette_info ? cand->palette_size[0] : 0,
633
127k
                                         intra_mode)) {
634
0
            intra_filter_mode_bits_num =
635
0
                ctx->md_rate_est_ctx
636
0
                    ->filter_intra_fac_bits[blk_geom->bsize][cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES];
637
0
            if (cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) {
638
0
                intra_filter_mode_bits_num +=
639
0
                    ctx->md_rate_est_ctx->filter_intra_mode_fac_bits[cand->block_mi.filter_intra_mode];
640
0
            }
641
0
        }
642
127k
        if (ctx->has_uv) {
643
            // CFL info not known in fasta loop, so assume DC mode when CFL is allowed
644
127k
            chroma_rate = (uint32_t)svt_aom_get_intra_uv_fast_rate(pcs, ctx, cand_bf, 0);
645
127k
        }
646
647
127k
        uint32_t is_inter_rate = pcs->slice_type != I_SLICE
648
127k
            ? ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][0]
649
127k
            : 0;
650
127k
        luma_rate              = (uint32_t)(intra_mode_bits_num + skip_mode_rate + intra_luma_mode_bits_num +
651
127k
                               intra_luma_ang_mode_bits_num + is_inter_rate + intra_filter_mode_bits_num);
652
127k
        if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type)) {
653
0
            svt_aom_assert_err(cand->block_mi.use_intrabc == 0, "this block ibc should be off\n");
654
0
            luma_rate += ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc];
655
0
        }
656
        // Keep the Fast Luma and Chroma rate for future use
657
127k
        cand_bf->fast_luma_rate   = luma_rate;
658
127k
        cand_bf->fast_chroma_rate = chroma_rate;
659
127k
        rate                      = luma_rate + chroma_rate;
660
        // Assign fast cost
661
127k
        return (RDCOST(lambda, rate, luma_distortion));
662
127k
    }
663
127k
}
664
665
// This function encodes the reference frame
666
uint64_t estimate_ref_frame_type_bits(ModeDecisionContext* ctx, BlkStruct* blk_ptr, uint8_t ref_frame_type,
667
0
                                      bool is_compound) {
668
0
    uint64_t ref_rate_bits = 0;
669
670
0
    MbModeInfo* const mbmi = blk_ptr->av1xd->mi[0];
671
0
    MvReferenceFrame  ref_type[2];
672
0
    av1_set_ref_frame(ref_type, ref_frame_type);
673
0
    mbmi->block_mi.ref_frame[0] = ref_type[0];
674
0
    mbmi->block_mi.ref_frame[1] = ref_type[1];
675
    //const int is_compound = svt_aom_has_second_ref(mbmi);
676
0
    {
677
0
        if (is_compound) {
678
0
            const CompReferenceType comp_ref_type = has_uni_comp_refs(&mbmi->block_mi) ? UNIDIR_COMP_REFERENCE
679
0
                                                                                       : BIDIR_COMP_REFERENCE;
680
681
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_type_fac_bits[svt_aom_get_comp_reference_type_context_new(
682
0
                blk_ptr->av1xd)][comp_ref_type];
683
            /*aom_write_symbol(w, comp_ref_type,
684
               svt_aom_get_comp_reference_type_cdf(blk_ptr->av1xd), 2);*/
685
686
0
            if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
687
                // SVT_LOG("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n");
688
0
                const int bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME;
689
690
0
                ref_rate_bits += ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p(
691
0
                    blk_ptr->av1xd)][0][bit];
692
                // blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][0];
693
                // WRITE_REF_BIT(bit, uni_comp_ref_p);
694
695
0
                if (!bit) {
696
0
                    assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
697
0
                    const int bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME ||
698
0
                        mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
699
0
                    ref_rate_bits +=
700
0
                        ctx->md_rate_est_ctx
701
0
                            ->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p1(blk_ptr->av1xd)][1][bit1];
702
                    // ref_rate_d = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][1];
703
                    // WRITE_REF_BIT(bit1, uni_comp_ref_p1);
704
0
                    if (bit1) {
705
0
                        const int bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
706
0
                        ref_rate_bits +=
707
0
                            ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p2(
708
0
                                blk_ptr->av1xd)][2][bit2];
709
710
                        // ref_rate_e = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][2];
711
                        //WRITE_REF_BIT(bit2, uni_comp_ref_p2);
712
0
                    }
713
0
                }
714
0
                return ref_rate_bits;
715
0
            }
716
717
0
            assert(comp_ref_type == BIDIR_COMP_REFERENCE);
718
719
0
            const int bit = (mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME || mbmi->block_mi.ref_frame[0] == LAST3_FRAME);
720
0
            const int pred_ctx = svt_av1_get_pred_context_comp_ref_p(blk_ptr->av1xd);
721
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_fac_bits[pred_ctx][0][bit];
722
            // ref_rate_f = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_ctx][0];
723
            // WRITE_REF_BIT(bit, comp_ref_p);
724
725
0
            if (!bit) {
726
0
                const int bit1 = mbmi->block_mi.ref_frame[0] == LAST2_FRAME;
727
0
                ref_rate_bits += ctx->md_rate_est_ctx
728
0
                                     ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p1(blk_ptr->av1xd)][1][bit1];
729
                // ref_rate_g = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][1];
730
                // WRITE_REF_BIT(bit1, comp_ref_p1);
731
0
            } else {
732
0
                const int bit2 = mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME;
733
0
                ref_rate_bits += ctx->md_rate_est_ctx
734
0
                                     ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p2(blk_ptr->av1xd)][2][bit2];
735
                // ref_rate_h = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][2];
736
                // WRITE_REF_BIT(bit2, comp_ref_p2);
737
0
            }
738
739
0
            const int bit_bwd    = mbmi->block_mi.ref_frame[1] == ALTREF_FRAME;
740
0
            const int pred_ctx_2 = svt_av1_get_pred_context_comp_bwdref_p(blk_ptr->av1xd);
741
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[pred_ctx_2][0][bit_bwd];
742
            // ref_rate_i = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_ctx_2][0];
743
            // WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
744
745
0
            if (!bit_bwd) {
746
0
                ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[svt_av1_get_pred_context_comp_bwdref_p1(
747
0
                    blk_ptr->av1xd)][1][ref_type[1] == ALTREF2_FRAME];
748
                // ref_rate_j = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_context][1];
749
                // WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
750
0
            }
751
0
        } else {
752
0
            const int bit0 = (mbmi->block_mi.ref_frame[0] <= ALTREF_FRAME &&
753
0
                              mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME);
754
0
            ref_rate_bits += ctx->md_rate_est_ctx
755
0
                                 ->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0][bit0];
756
            // ref_rate_k =
757
            // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0];
758
            // WRITE_REF_BIT(bit0, single_ref_p1);
759
760
0
            if (bit0) {
761
0
                const int bit1 = mbmi->block_mi.ref_frame[0] == ALTREF_FRAME;
762
0
                ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p2(
763
0
                    blk_ptr->av1xd)][1][bit1];
764
                // ref_rate_l =
765
                // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p2(blk_ptr->av1xd)][1];
766
                // WRITE_REF_BIT(bit1, single_ref_p2);
767
0
                if (!bit1) {
768
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p6(
769
0
                        blk_ptr->av1xd)][5][ref_frame_type == ALTREF2_FRAME];
770
                    // ref_rate_m =
771
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p6(blk_ptr->av1xd)][5];
772
                    // WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
773
0
                }
774
0
            } else {
775
0
                const int bit2 = (mbmi->block_mi.ref_frame[0] == LAST3_FRAME ||
776
0
                                  mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME);
777
0
                ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p3(
778
0
                    blk_ptr->av1xd)][2][bit2];
779
                // ref_rate_n =
780
                // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p3(blk_ptr->av1xd)][2];
781
                // WRITE_REF_BIT(bit2, single_ref_p3);
782
0
                if (!bit2) {
783
0
                    const int bit3 = mbmi->block_mi.ref_frame[0] != LAST_FRAME;
784
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p4(
785
0
                        blk_ptr->av1xd)][3][bit3];
786
                    // ref_rate_o =
787
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p4(blk_ptr->av1xd)][3];
788
                    // WRITE_REF_BIT(bit3, single_ref_p4);
789
0
                } else {
790
0
                    const int bit4 = mbmi->block_mi.ref_frame[0] != LAST3_FRAME;
791
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p5(
792
0
                        blk_ptr->av1xd)][4][bit4];
793
                    // ref_rate_p =
794
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p5(blk_ptr->av1xd)][4];
795
                    // WRITE_REF_BIT(bit4, single_ref_p5);
796
0
                }
797
0
            }
798
0
        }
799
0
    }
800
0
    return ref_rate_bits;
801
0
}
802
803
int svt_aom_get_comp_group_idx_context_enc(const MacroBlockD* xd);
804
int is_any_masked_compound_used(BlockSize bsize);
805
806
static INLINE uint32_t get_compound_mode_rate(PictureControlSet* pcs, ModeDecisionContext* ctx,
807
0
                                              ModeDecisionCandidate* cand, BlockSize bsize) {
808
0
    BlkStruct*          blk_ptr   = ctx->blk_ptr;
809
0
    SequenceControlSet* scs       = pcs->ppcs->scs;
810
0
    uint32_t            comp_rate = 0;
811
0
    MbModeInfo* const   mbmi      = blk_ptr->av1xd->mi[0];
812
0
    MvReferenceFrame    rf[2]     = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
813
0
    mbmi->block_mi.ref_frame[0]   = rf[0];
814
0
    mbmi->block_mi.ref_frame[1]   = rf[1];
815
816
    //NOTE  :  Make sure, any cuPtr data is already set before   usage
817
818
0
    if (has_second_ref(&mbmi->block_mi)) {
819
0
        const int masked_compound_used = is_any_masked_compound_used(bsize) && scs->seq_header.enable_masked_compound;
820
821
0
        if (masked_compound_used) {
822
0
            const int ctx_comp_group_idx = svt_aom_get_comp_group_idx_context_enc(blk_ptr->av1xd);
823
0
            comp_rate =
824
0
                ctx->md_rate_est_ctx->comp_group_idx_fac_bits[ctx_comp_group_idx][cand->block_mi.comp_group_idx];
825
0
        } else {
826
0
            assert(cand->block_mi.comp_group_idx == 0);
827
0
        }
828
829
0
        if (cand->block_mi.comp_group_idx == 0) {
830
0
            if (cand->block_mi.compound_idx) {
831
0
                assert(cand->block_mi.interinter_comp.type == COMPOUND_AVERAGE);
832
0
            }
833
834
0
            if (scs->seq_header.order_hint_info.enable_jnt_comp) {
835
0
                const int comp_index_ctx = svt_aom_get_comp_index_context_enc(pcs->ppcs,
836
0
                                                                              pcs->ppcs->cur_order_hint,
837
0
                                                                              pcs->ppcs->ref_order_hint[rf[0] - 1],
838
0
                                                                              pcs->ppcs->ref_order_hint[rf[1] - 1],
839
0
                                                                              blk_ptr->av1xd);
840
0
                comp_rate += ctx->md_rate_est_ctx->comp_idx_fac_bits[comp_index_ctx][cand->block_mi.compound_idx];
841
0
            } else {
842
0
                assert(cand->block_mi.compound_idx == 1);
843
0
            }
844
0
        } else {
845
0
            assert(pcs->ppcs->frm_hdr.reference_mode != SINGLE_REFERENCE &&
846
0
                   is_inter_compound_mode(cand->block_mi.mode));
847
0
            assert(masked_compound_used);
848
            // compound_diffwtd, wedge
849
0
            assert(cand->block_mi.interinter_comp.type == COMPOUND_WEDGE ||
850
0
                   cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD);
851
852
0
            if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
853
0
                comp_rate += ctx->md_rate_est_ctx
854
0
                                 ->compound_type_fac_bits[bsize][cand->block_mi.interinter_comp.type - COMPOUND_WEDGE];
855
0
            }
856
857
0
            if (cand->block_mi.interinter_comp.type == COMPOUND_WEDGE) {
858
0
                assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
859
0
                comp_rate +=
860
0
                    ctx->md_rate_est_ctx->wedge_idx_fac_bits[bsize][cand->block_mi.interinter_comp.wedge_index];
861
0
                comp_rate += av1_cost_literal(1);
862
0
            } else {
863
0
                assert(cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD);
864
0
                comp_rate += av1_cost_literal(1);
865
0
            }
866
0
        }
867
0
    }
868
869
0
    return comp_rate;
870
0
}
871
872
int32_t svt_aom_get_switchable_rate(BlockModeInfo* block_mi, const FrameHeader* const frm_hdr, ModeDecisionContext* ctx,
873
0
                                    const bool enable_dual_filter) {
874
0
    if (frm_hdr->interpolation_filter != SWITCHABLE) {
875
0
        return 0;
876
0
    }
877
878
0
    int32_t   inter_filter_cost = 0;
879
0
    const int max_dir           = enable_dual_filter ? 2 : 1;
880
0
    for (int dir = 0; dir < max_dir; ++dir) {
881
0
        const int32_t pred_ctx = svt_aom_get_pred_context_switchable_interp(
882
0
            block_mi->ref_frame[0], block_mi->ref_frame[1], ctx->blk_ptr->av1xd, dir);
883
0
        const InterpFilter filter = av1_extract_interp_filter(block_mi->interp_filters, dir);
884
0
        assert(pred_ctx < SWITCHABLE_FILTER_CONTEXTS);
885
0
        assert(filter < SWITCHABLE_FILTERS);
886
0
        inter_filter_cost += ctx->md_rate_est_ctx->switchable_interp_fac_bitss[pred_ctx][filter];
887
0
    }
888
0
    return inter_filter_cost;
889
0
}
890
891
int svt_aom_is_interintra_wedge_used(BlockSize bsize);
892
893
static uint64_t av1_inter_fast_cost_light(ModeDecisionContext* ctx, BlkStruct* blk_ptr,
894
                                          ModeDecisionCandidateBuffer* cand_bf, uint64_t luma_distortion,
895
0
                                          uint64_t lambda, PictureControlSet* pcs, CandidateMv* ref_mv_stack) {
896
0
    ModeDecisionCandidate* cand = cand_bf->cand;
897
    // NM - fast inter cost estimation
898
0
    MdRateEstimationContext* r = ctx->md_rate_est_ctx;
899
    //_mm_prefetch(p, _MM_HINT_T2);
900
    // Luma rate
901
0
    uint32_t             luma_rate           = 0;
902
0
    uint64_t             mv_rate             = 0;
903
0
    const PredictionMode inter_mode          = (PredictionMode)cand->block_mi.mode;
904
0
    const uint8_t        have_nearmv         = have_nearmv_in_inter_mode(inter_mode);
905
0
    uint64_t             inter_mode_bits_num = 0;
906
0
    const uint8_t        skip_mode_ctx       = ctx->skip_mode_ctx;
907
0
    MvReferenceFrame     rf[2]               = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
908
0
    const int8_t         ref_frame_type      = av1_ref_frame_type(rf);
909
0
    const uint8_t        is_compound         = is_inter_compound_mode(cand->block_mi.mode);
910
0
    const uint32_t       mode_context        = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf);
911
0
    uint64_t             reference_picture_bits_num = 0;
912
0
    if (ctx->approx_inter_rate < 2) {
913
0
        reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type];
914
0
    }
915
0
    if (is_compound) {
916
0
        assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES);
917
0
        inter_mode_bits_num += r->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)];
918
0
    } else {
919
0
        int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
920
        //aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2);
921
0
        inter_mode_bits_num += r->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
922
0
        if (inter_mode != NEWMV) {
923
0
            const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
924
            //aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx], 2);
925
0
            inter_mode_bits_num += r->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV];
926
0
            if (inter_mode != GLOBALMV) {
927
0
                int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
928
                /*aom_write_symbol(ec_writer, mode != NEARESTMV, frame_context->refmv_cdf[refmv_ctx], 2);*/
929
0
                inter_mode_bits_num += r->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV];
930
0
            }
931
0
        }
932
0
    }
933
0
    if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv) {
934
        //drLIdex cost estimation
935
0
        const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
936
0
        if (new_mv) {
937
0
            int32_t idx;
938
0
            for (idx = 0; idx < 2; ++idx) {
939
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
940
0
                    uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx);
941
0
                    inter_mode_bits_num += r->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx];
942
0
                    if (cand->drl_index == idx) {
943
0
                        break;
944
0
                    }
945
0
                }
946
0
            }
947
0
        }
948
0
        if (have_nearmv) {
949
0
            int32_t idx;
950
0
            for (idx = 1; idx < 3; ++idx) {
951
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
952
0
                    uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx);
953
0
                    inter_mode_bits_num += r->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)];
954
0
                    if (cand->drl_index == (idx - 1)) {
955
0
                        break;
956
0
                    }
957
0
                }
958
0
            }
959
0
        }
960
0
    }
961
0
    if (svt_aom_have_newmv_in_inter_mode(inter_mode)) {
962
0
        const uint16_t factor = pcs->ppcs->frm_hdr.allow_screen_content_tools ? 20 : 50;
963
0
        if (is_compound) {
964
0
            mv_rate = 0;
965
0
            if (inter_mode == NEW_NEWMV) {
966
0
                for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) {
967
0
                    Mv             mv         = cand->block_mi.mv[ref_list_idx];
968
0
                    Mv             ref_mv     = cand->pred_mv[ref_list_idx];
969
0
                    const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
970
0
                    const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
971
0
                    mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
972
0
                }
973
0
            } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
974
                // New MV is second ref
975
0
                Mv             mv         = cand->block_mi.mv[1];
976
0
                Mv             ref_mv     = cand->pred_mv[1];
977
0
                const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
978
0
                const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
979
0
                mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
980
0
            } else {
981
0
                assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
982
                // New MV is first ref
983
0
                Mv             mv         = cand->block_mi.mv[0];
984
0
                Mv             ref_mv     = cand->pred_mv[0];
985
0
                const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
986
0
                const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
987
0
                mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
988
0
            }
989
0
        } else {
990
0
            assert(!is_compound); // single ref inter prediction
991
            // unipred MV stored in idx0
992
0
            Mv             mv         = cand->block_mi.mv[0];
993
0
            Mv             ref_mv     = cand->pred_mv[0];
994
0
            const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
995
0
            const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
996
0
            mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
997
0
        }
998
0
    }
999
    // Get the interpolation filter rate if IFS is performed at MDS0.  Otherwise, the filter is unknown, so the rate will be updated after IFS is performed.
1000
0
    uint32_t ifs_rate = 0;
1001
0
    if (ctx->ifs_ctrls.level == IFS_MDS0 &&
1002
0
        av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) &&
1003
0
        pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE) {
1004
0
        ifs_rate = svt_aom_get_switchable_rate(
1005
0
            &cand_bf->cand->block_mi, &pcs->ppcs->frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter);
1006
0
    }
1007
0
    uint32_t is_inter_rate = r->intra_inter_fac_bits[ctx->is_inter_ctx][1];
1008
1009
0
    uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag &&
1010
0
            is_comp_ref_allowed(ctx->blk_geom->bsize)
1011
0
        ? r->skip_mode_fac_bits[skip_mode_ctx][0]
1012
0
        : 0;
1013
0
    luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate +
1014
0
                           ifs_rate);
1015
    // Keep the Fast Luma and Chroma rate for future use
1016
0
    cand_bf->fast_luma_rate   = luma_rate;
1017
0
    cand_bf->fast_chroma_rate = 0;
1018
    // Assign fast cost
1019
0
    if (cand->skip_mode_allowed) {
1020
0
        skip_mode_rate = r->skip_mode_fac_bits[skip_mode_ctx][1];
1021
0
        if (skip_mode_rate < luma_rate) {
1022
0
            return (RDCOST(lambda, skip_mode_rate, luma_distortion));
1023
0
        }
1024
0
    }
1025
0
    return (RDCOST(lambda, luma_rate, luma_distortion));
1026
0
}
1027
1028
uint64_t svt_aom_inter_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1029
0
                                 uint64_t lambda, uint64_t luma_distortion) {
1030
0
    const BlockGeom*       blk_geom       = ctx->blk_geom;
1031
0
    BlkStruct*             blk_ptr        = ctx->blk_ptr;
1032
0
    ModeDecisionCandidate* cand           = cand_bf->cand;
1033
0
    MvReferenceFrame       rf[2]          = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
1034
0
    const int8_t           ref_frame_type = av1_ref_frame_type(cand->block_mi.ref_frame);
1035
0
    CandidateMv*           ref_mv_stack   = &(ctx->ref_mv_stack[ref_frame_type][0]);
1036
1037
0
    if (ctx->approx_inter_rate) {
1038
0
        return av1_inter_fast_cost_light(ctx, blk_ptr, cand_bf, luma_distortion, lambda, pcs, ref_mv_stack);
1039
0
    }
1040
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
1041
1042
    // Luma rate
1043
0
    uint32_t       luma_rate  = 0;
1044
0
    uint64_t       mv_rate    = 0;
1045
0
    PredictionMode inter_mode = (PredictionMode)cand->block_mi.mode;
1046
1047
0
    uint64_t inter_mode_bits_num = 0;
1048
1049
0
    const uint8_t skip_mode_ctx              = ctx->skip_mode_ctx;
1050
0
    const uint8_t is_compound                = is_inter_compound_mode(cand->block_mi.mode);
1051
0
    uint32_t      mode_context               = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf);
1052
0
    uint64_t      reference_picture_bits_num = 0;
1053
1054
    //Reference Type and Mode Bit estimation
1055
0
    reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type];
1056
0
    if (is_compound) {
1057
0
        assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES);
1058
0
        inter_mode_bits_num +=
1059
0
            ctx->md_rate_est_ctx->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)];
1060
0
    } else {
1061
        // uint32_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
1062
        // inter_mode_bits_num = cand_bf->cand->md_rate_est_ctx->new_mv_mode_fac_bits[mode_ctx][0];
1063
1064
0
        int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
1065
        // aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2);
1066
0
        inter_mode_bits_num += ctx->md_rate_est_ctx->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
1067
0
        if (inter_mode != NEWMV) {
1068
0
            const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
1069
            // aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx],
1070
            // 2);
1071
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV];
1072
0
            if (inter_mode != GLOBALMV) {
1073
0
                int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
1074
                /*aom_write_symbol(ec_writer, mode != NEARESTMV,
1075
                 * frame_context->refmv_cdf[refmv_ctx], 2);*/
1076
0
                inter_mode_bits_num += ctx->md_rate_est_ctx->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV];
1077
0
            }
1078
0
        }
1079
0
    }
1080
0
    if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) {
1081
        //drLIdex cost estimation
1082
0
        const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
1083
0
        if (new_mv) {
1084
0
            int32_t idx;
1085
0
            for (idx = 0; idx < 2; ++idx) {
1086
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
1087
0
                    uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx);
1088
0
                    inter_mode_bits_num += ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx];
1089
0
                    if (cand->drl_index == idx) {
1090
0
                        break;
1091
0
                    }
1092
0
                }
1093
0
            }
1094
0
        }
1095
1096
0
        if (have_nearmv_in_inter_mode(inter_mode)) {
1097
0
            int32_t idx;
1098
0
            for (idx = 1; idx < 3; ++idx) {
1099
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
1100
0
                    uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx);
1101
0
                    inter_mode_bits_num +=
1102
0
                        ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)];
1103
1104
0
                    if (cand->drl_index == (idx - 1)) {
1105
0
                        break;
1106
0
                    }
1107
0
                }
1108
0
            }
1109
0
        }
1110
0
    }
1111
1112
0
    if (svt_aom_have_newmv_in_inter_mode(inter_mode)) {
1113
0
        if (is_compound) {
1114
0
            mv_rate = 0;
1115
1116
0
            if (inter_mode == NEW_NEWMV) {
1117
0
                for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) {
1118
0
                    Mv mv     = cand->block_mi.mv[ref_list_idx];
1119
0
                    Mv ref_mv = cand->pred_mv[ref_list_idx];
1120
0
                    mv_rate += svt_av1_mv_bit_cost(&mv,
1121
0
                                                   &ref_mv,
1122
0
                                                   ctx->md_rate_est_ctx->nmv_vec_cost,
1123
0
                                                   ctx->md_rate_est_ctx->nmvcoststack,
1124
0
                                                   MV_COST_WEIGHT);
1125
0
                }
1126
0
            } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
1127
0
                Mv mv     = cand->block_mi.mv[1];
1128
0
                Mv ref_mv = cand->pred_mv[1];
1129
0
                mv_rate += svt_av1_mv_bit_cost(&mv,
1130
0
                                               &ref_mv,
1131
0
                                               ctx->md_rate_est_ctx->nmv_vec_cost,
1132
0
                                               ctx->md_rate_est_ctx->nmvcoststack,
1133
0
                                               MV_COST_WEIGHT);
1134
0
            } else {
1135
0
                assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
1136
0
                Mv mv     = cand->block_mi.mv[0];
1137
0
                Mv ref_mv = cand->pred_mv[0];
1138
0
                mv_rate += svt_av1_mv_bit_cost(&mv,
1139
0
                                               &ref_mv,
1140
0
                                               ctx->md_rate_est_ctx->nmv_vec_cost,
1141
0
                                               ctx->md_rate_est_ctx->nmvcoststack,
1142
0
                                               MV_COST_WEIGHT);
1143
0
            }
1144
0
        } else {
1145
0
            assert(!is_compound); // single ref inter prediction
1146
            // unipred MVs stored in idx0
1147
0
            Mv mv     = cand->block_mi.mv[0];
1148
0
            Mv ref_mv = cand->pred_mv[0];
1149
0
            mv_rate   = svt_av1_mv_bit_cost(
1150
0
                &mv, &ref_mv, ctx->md_rate_est_ctx->nmv_vec_cost, ctx->md_rate_est_ctx->nmvcoststack, MV_COST_WEIGHT);
1151
0
        }
1152
0
    }
1153
    // inter intra mode rate
1154
0
    if (pcs->ppcs->scs->seq_header.enable_interintra_compound &&
1155
        /* Check if inter-intra is allowed for current block size / mode (even if the feature is off
1156
        * for the current block, we still need to signal inter-intra off.
1157
        */
1158
0
        svt_is_interintra_allowed(true, blk_geom->bsize, cand->block_mi.mode, rf)) {
1159
0
        const int interintra  = cand->block_mi.is_interintra_used;
1160
0
        const int bsize_group = eb_size_group_lookup[blk_geom->bsize];
1161
1162
0
        inter_mode_bits_num +=
1163
0
            ctx->md_rate_est_ctx->inter_intra_fac_bits[bsize_group][cand->block_mi.is_interintra_used];
1164
1165
0
        if (interintra) {
1166
0
            inter_mode_bits_num +=
1167
0
                ctx->md_rate_est_ctx->inter_intra_mode_fac_bits[bsize_group][cand->block_mi.interintra_mode];
1168
1169
0
            if (svt_aom_is_interintra_wedge_used(blk_geom->bsize)) {
1170
0
                inter_mode_bits_num +=
1171
0
                    ctx->md_rate_est_ctx
1172
0
                        ->wedge_inter_intra_fac_bits[blk_geom->bsize][cand->block_mi.use_wedge_interintra];
1173
1174
0
                if (cand->block_mi.use_wedge_interintra) {
1175
0
                    inter_mode_bits_num +=
1176
0
                        ctx->md_rate_est_ctx
1177
0
                            ->wedge_idx_fac_bits[blk_geom->bsize][cand->block_mi.interintra_wedge_index];
1178
0
                }
1179
0
            }
1180
0
        }
1181
0
    }
1182
0
    if (is_inter_singleref_mode(inter_mode) && frm_hdr->is_motion_mode_switchable && rf[1] != INTRA_FRAME) {
1183
0
        assert(!cand->block_mi.is_interintra_used);
1184
0
        const MotionMode motion_mode_rd           = cand->block_mi.motion_mode;
1185
0
        const BlockSize  bsize                    = blk_geom->bsize;
1186
0
        const MotionMode last_motion_mode_allowed = svt_aom_motion_mode_allowed(
1187
0
            pcs, cand->block_mi.num_proj_ref, blk_ptr->overlappable_neighbors, bsize, rf[0], rf[1], inter_mode);
1188
0
        switch (last_motion_mode_allowed) {
1189
0
        case SIMPLE_TRANSLATION:
1190
0
            break;
1191
0
        case OBMC_CAUSAL:
1192
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits1[bsize][motion_mode_rd == OBMC_CAUSAL];
1193
0
            break;
1194
0
        default:
1195
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits[bsize][motion_mode_rd];
1196
0
        }
1197
0
    }
1198
    // this func return 0 if masked=0 and distance=0
1199
0
    inter_mode_bits_num += get_compound_mode_rate(pcs, ctx, cand, blk_geom->bsize);
1200
    // Get the interpolation filter rate if IFS is performed at MDS0.  Otherwise, the filter is unknown, so the rate will be updated after IFS is performed.
1201
0
    uint32_t ifs_rate = 0;
1202
0
    if (ctx->ifs_ctrls.level == IFS_MDS0 &&
1203
0
        av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) &&
1204
0
        frm_hdr->interpolation_filter == SWITCHABLE) {
1205
0
        ifs_rate = svt_aom_get_switchable_rate(
1206
0
            &cand_bf->cand->block_mi, frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter);
1207
0
    }
1208
0
    uint32_t is_inter_rate  = ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][1];
1209
0
    uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && is_comp_ref_allowed(blk_geom->bsize)
1210
0
        ? ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0]
1211
0
        : 0;
1212
0
    luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate +
1213
0
                           ifs_rate);
1214
    // Keep the Fast Luma and Chroma rate for future use
1215
0
    cand_bf->fast_luma_rate   = luma_rate;
1216
0
    cand_bf->fast_chroma_rate = 0;
1217
    // Assign fast cost
1218
0
    if (cand->skip_mode_allowed) {
1219
0
        skip_mode_rate = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1];
1220
0
        if (skip_mode_rate < luma_rate) {
1221
0
            return (RDCOST(lambda, skip_mode_rate, luma_distortion));
1222
0
        }
1223
0
    }
1224
0
    return (RDCOST(lambda, luma_rate, luma_distortion));
1225
0
}
1226
1227
/*
1228
 */
1229
EbErrorType svt_aom_txb_estimate_coeff_bits_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1230
                                                      uint32_t txb_origin_index, EbPictureBufferDesc* coeff_buffer_sb,
1231
0
                                                      uint32_t y_eob, uint64_t* y_txb_coeff_bits, TxSize txsize) {
1232
0
    if (y_eob) {
1233
0
        *y_txb_coeff_bits = svt_av1_cost_coeffs_txb(
1234
0
            ctx,
1235
0
            0,
1236
0
            0,
1237
0
            cand_bf,
1238
0
            (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)],
1239
0
            (uint16_t)y_eob,
1240
0
            PLANE_TYPE_Y,
1241
0
            txsize,
1242
0
            DCT_DCT,
1243
0
            0,
1244
0
            0,
1245
0
            0);
1246
1247
0
        *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step;
1248
1249
0
    } else {
1250
0
        *y_txb_coeff_bits = av1_cost_skip_txb(ctx, 0, 0, txsize, PLANE_TYPE_Y, 0);
1251
0
    }
1252
1253
0
    return EB_ErrorNone;
1254
0
}
1255
1256
EbErrorType svt_aom_txb_estimate_coeff_bits(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
1257
                                            PictureControlSet* pcs, ModeDecisionCandidateBuffer* cand_bf,
1258
                                            uint32_t txb_origin_index, uint32_t txb_chroma_origin_index,
1259
                                            EbPictureBufferDesc* coeff_buffer_sb, uint32_t y_eob, uint32_t cb_eob,
1260
                                            uint32_t cr_eob, uint64_t* y_txb_coeff_bits, uint64_t* cb_txb_coeff_bits,
1261
                                            uint64_t* cr_txb_coeff_bits, TxSize txsize, TxSize txsize_uv,
1262
127k
                                            TxType tx_type, TxType tx_type_uv, COMPONENT_TYPE component_type) {
1263
127k
    EbErrorType return_error = EB_ErrorNone;
1264
1265
127k
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
1266
1267
127k
    int32_t* coeff_buffer;
1268
127k
    int16_t  luma_txb_skip_context = ctx->luma_txb_skip_context;
1269
127k
    int16_t  luma_dc_sign_context  = ctx->luma_dc_sign_context;
1270
127k
    int16_t  cb_txb_skip_context   = ctx->cb_txb_skip_context;
1271
127k
    int16_t  cb_dc_sign_context    = ctx->cb_dc_sign_context;
1272
127k
    int16_t  cr_txb_skip_context   = ctx->cr_txb_skip_context;
1273
127k
    int16_t  cr_dc_sign_context    = ctx->cr_dc_sign_context;
1274
1275
127k
    bool reduced_transform_set_flag = frm_hdr->reduced_tx_set ? true : false;
1276
1277
    //Estimate the rate of the transform type and coefficient for Luma
1278
1279
127k
    if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
1280
0
        if (y_eob) {
1281
0
            coeff_buffer = (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)];
1282
1283
0
            *y_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1284
0
                                                        allow_update_cdf,
1285
0
                                                        ec_ctx,
1286
0
                                                        cand_bf,
1287
0
                                                        coeff_buffer,
1288
0
                                                        (uint16_t)y_eob,
1289
0
                                                        PLANE_TYPE_Y,
1290
0
                                                        txsize,
1291
0
                                                        tx_type,
1292
0
                                                        luma_txb_skip_context,
1293
0
                                                        luma_dc_sign_context,
1294
0
                                                        reduced_transform_set_flag);
1295
0
            *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step;
1296
0
        } else {
1297
0
            *y_txb_coeff_bits = av1_cost_skip_txb(
1298
0
                ctx, allow_update_cdf, ec_ctx, txsize, PLANE_TYPE_Y, luma_txb_skip_context);
1299
0
        }
1300
0
    }
1301
    // Estimate the rate of the transform type and coefficient for chroma Cb
1302
1303
127k
    if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA ||
1304
127k
        component_type == COMPONENT_ALL) {
1305
127k
        if (cb_eob) {
1306
5.47k
            coeff_buffer = (int32_t*)&coeff_buffer_sb->u_buffer[txb_chroma_origin_index * sizeof(int32_t)];
1307
1308
5.47k
            *cb_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1309
5.47k
                                                         allow_update_cdf,
1310
5.47k
                                                         ec_ctx,
1311
5.47k
                                                         cand_bf,
1312
5.47k
                                                         coeff_buffer,
1313
5.47k
                                                         (uint16_t)cb_eob,
1314
5.47k
                                                         PLANE_TYPE_UV,
1315
5.47k
                                                         txsize_uv,
1316
5.47k
                                                         tx_type_uv,
1317
5.47k
                                                         cb_txb_skip_context,
1318
5.47k
                                                         cb_dc_sign_context,
1319
5.47k
                                                         reduced_transform_set_flag);
1320
122k
        } else {
1321
122k
            *cb_txb_coeff_bits = av1_cost_skip_txb(
1322
122k
                ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cb_txb_skip_context);
1323
122k
        }
1324
127k
    }
1325
1326
127k
    if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA ||
1327
127k
        component_type == COMPONENT_ALL) {
1328
        //Estimate the rate of the transform type and coefficient for chroma Cr
1329
127k
        if (cr_eob) {
1330
5.46k
            coeff_buffer = (int32_t*)&coeff_buffer_sb->v_buffer[txb_chroma_origin_index * sizeof(int32_t)];
1331
1332
5.46k
            *cr_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1333
5.46k
                                                         allow_update_cdf,
1334
5.46k
                                                         ec_ctx,
1335
5.46k
                                                         cand_bf,
1336
5.46k
                                                         coeff_buffer,
1337
5.46k
                                                         (uint16_t)cr_eob,
1338
5.46k
                                                         PLANE_TYPE_UV,
1339
5.46k
                                                         txsize_uv,
1340
5.46k
                                                         tx_type_uv,
1341
5.46k
                                                         cr_txb_skip_context,
1342
5.46k
                                                         cr_dc_sign_context,
1343
5.46k
                                                         reduced_transform_set_flag);
1344
122k
        } else {
1345
122k
            *cr_txb_coeff_bits = av1_cost_skip_txb(
1346
122k
                ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cr_txb_skip_context);
1347
122k
        }
1348
127k
    }
1349
1350
127k
    return return_error;
1351
127k
}
1352
1353
EbErrorType svt_aom_full_cost_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1354
7.54k
                                        uint64_t* y_distortion, uint64_t lambda, uint64_t* y_coeff_bits) {
1355
7.54k
    EbErrorType return_error = EB_ErrorNone;
1356
1357
7.54k
    uint64_t coeff_rate = (*y_coeff_bits + (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[0][0]);
1358
1359
    // Assign full cost
1360
    // Use context index 0 for the partition rate as an approximation to skip call to
1361
    // av1_partition_rate_cost Partition cost is only needed for > 4x4 blocks, but light-PD0 assumes
1362
    // 4x4 blocks are disallowed
1363
7.54k
    *(cand_bf->full_cost) = RDCOST(
1364
7.54k
        lambda, coeff_rate + ctx->md_rate_est_ctx->partition_fac_bits[0][PARTITION_NONE], y_distortion[0]);
1365
7.54k
    return return_error;
1366
7.54k
}
1367
1368
/*********************************************************************************
1369
 * svt_aom_av1_full_cost function is used to estimate the cost of a candidate mode
1370
 * for full mode decision module.
1371
 **********************************************************************************/
1372
void svt_aom_full_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1373
                       uint64_t lambda, uint64_t y_distortion[DIST_TOTAL][DIST_CALC_TOTAL],
1374
                       uint64_t cb_distortion[DIST_TOTAL][DIST_CALC_TOTAL],
1375
                       uint64_t cr_distortion[DIST_TOTAL][DIST_CALC_TOTAL], uint64_t* y_coeff_bits,
1376
244k
                       uint64_t* cb_coeff_bits, uint64_t* cr_coeff_bits) {
1377
244k
    const uint8_t skip_coeff_ctx        = ctx->skip_coeff_ctx;
1378
244k
    const bool    update_full_cost_ssim = ctx->tune_ssim_level > SSIM_LVL_0 ? true : false;
1379
1380
    // Get the TX size rate for skip and non-skip block. Need both to make non-skip decision
1381
244k
    uint64_t non_skip_tx_size_bits = 0, skip_tx_size_bits = 0;
1382
244k
    if (!ctx->shut_fast_rate && pcs->ppcs->frm_hdr.tx_mode == TX_MODE_SELECT) {
1383
126k
        if (cand_bf->block_has_coeff) {
1384
5.87k
            non_skip_tx_size_bits = svt_aom_get_tx_size_bits(
1385
5.87k
                cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 1);
1386
5.87k
        }
1387
1388
126k
        skip_tx_size_bits = svt_aom_get_tx_size_bits(
1389
126k
            cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 0);
1390
126k
    }
1391
1392
244k
    assert(IMPLIES(is_inter_mode(cand_bf->cand->block_mi.mode), skip_tx_size_bits == 0));
1393
1394
    // Decide if block should be signalled as skip (send no coeffs)
1395
244k
    if (!svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && ctx->blk_skip_decision &&
1396
7.54k
        cand_bf->block_has_coeff && is_inter_mode(cand_bf->cand->block_mi.mode)) {
1397
0
        const uint64_t non_skip_cost = RDCOST(
1398
0
            lambda,
1399
0
            (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits +
1400
0
             (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]),
1401
0
            (y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0]));
1402
1403
0
        const uint64_t skip_cost = RDCOST(
1404
0
            lambda,
1405
0
            ((uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1]) + skip_tx_size_bits,
1406
0
            (y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] + cr_distortion[DIST_SSD][1]));
1407
1408
        // Update signals to correspond to skip_mode values (no coeffs, etc.)
1409
0
        if (skip_cost < non_skip_cost) {
1410
0
            y_distortion[DIST_SSD][0]  = y_distortion[DIST_SSD][1];
1411
0
            cb_distortion[DIST_SSD][0] = cb_distortion[DIST_SSD][1];
1412
0
            cr_distortion[DIST_SSD][0] = cr_distortion[DIST_SSD][1];
1413
1414
0
            y_distortion[DIST_SSIM][0]  = y_distortion[DIST_SSIM][1];
1415
0
            cb_distortion[DIST_SSIM][0] = cb_distortion[DIST_SSIM][1];
1416
0
            cr_distortion[DIST_SSIM][0] = cr_distortion[DIST_SSIM][1];
1417
0
            cand_bf->block_has_coeff    = 0;
1418
0
            cand_bf->y_has_coeff        = 0;
1419
0
            cand_bf->u_has_coeff        = 0;
1420
0
            cand_bf->v_has_coeff        = 0;
1421
0
            cand_bf->cnt_nz_coeff       = 0;
1422
1423
            // For inter modes, signalling skip means no TX depth is used and the TX type will be DCT_DCT
1424
0
            cand_bf->cand->block_mi.tx_depth = 0;
1425
0
            cand_bf->cand->transform_type_uv = DCT_DCT;
1426
0
            memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0]));
1427
0
            memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData));
1428
0
            memset(&cand_bf->eob, 0, sizeof(EobData));
1429
0
        }
1430
0
    }
1431
1432
244k
    uint64_t coeff_rate = 0;
1433
244k
    if (cand_bf->block_has_coeff) {
1434
7.95k
        coeff_rate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits +
1435
7.95k
                      (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]);
1436
236k
    } else {
1437
236k
        coeff_rate = ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1] + skip_tx_size_bits;
1438
236k
    }
1439
1440
244k
    uint64_t mode_rate            = cand_bf->fast_luma_rate + cand_bf->fast_chroma_rate + coeff_rate;
1441
244k
    uint64_t mode_distortion      = y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0];
1442
244k
    uint64_t mode_ssim_distortion = update_full_cost_ssim
1443
244k
        ? y_distortion[DIST_SSIM][0] + cb_distortion[DIST_SSIM][0] + cr_distortion[DIST_SSIM][0]
1444
244k
        : 0;
1445
244k
    uint64_t mode_cost            = RDCOST(lambda, mode_rate, mode_distortion);
1446
1447
    // If skip_mode is allowed for this candidate, check cost of skip mode compared to regular cost
1448
244k
    if (cand_bf->cand->skip_mode_allowed == true) {
1449
0
        const uint8_t skip_mode_ctx = ctx->skip_mode_ctx;
1450
1451
        // Skip mode cost
1452
0
        const uint64_t skip_mode_rate       = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1];
1453
0
        const uint64_t skip_mode_distortion = y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] +
1454
0
            cr_distortion[DIST_SSD][1];
1455
0
        const uint64_t skip_mode_ssim_distortion = update_full_cost_ssim
1456
0
            ? y_distortion[DIST_SSIM][1] + cb_distortion[DIST_SSIM][1] + cr_distortion[DIST_SSIM][1]
1457
0
            : 0;
1458
0
        const uint64_t skip_mode_cost            = RDCOST(lambda, skip_mode_rate, skip_mode_distortion);
1459
1460
0
        cand_bf->cand->block_mi.skip_mode = false;
1461
0
        if (skip_mode_cost <= mode_cost) {
1462
            // Update candidate cost
1463
0
            mode_cost                         = skip_mode_cost;
1464
0
            mode_rate                         = skip_mode_rate;
1465
0
            mode_distortion                   = skip_mode_distortion;
1466
0
            mode_ssim_distortion              = skip_mode_ssim_distortion;
1467
0
            cand_bf->cand->block_mi.skip_mode = true;
1468
1469
            // Update signals to correspond to skip_mode values (no coeffs, etc.)
1470
0
            cand_bf->block_has_coeff         = 0;
1471
0
            cand_bf->y_has_coeff             = 0;
1472
0
            cand_bf->u_has_coeff             = 0;
1473
0
            cand_bf->v_has_coeff             = 0;
1474
0
            cand_bf->cnt_nz_coeff            = 0;
1475
0
            cand_bf->cand->block_mi.tx_depth = 0;
1476
0
            memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0]));
1477
0
            cand_bf->cand->transform_type_uv = DCT_DCT;
1478
0
            memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData));
1479
0
            memset(&cand_bf->eob, 0, sizeof(EobData));
1480
0
        }
1481
0
    }
1482
1483
    // Assign full cost
1484
244k
    *(cand_bf->full_cost) = mode_cost;
1485
244k
    cand_bf->total_rate   = mode_rate;
1486
244k
    cand_bf->full_dist    = (uint32_t)mode_distortion;
1487
244k
    if (update_full_cost_ssim) {
1488
0
        assert(ctx->pd_pass == PD_PASS_1);
1489
0
        assert(ctx->md_stage == MD_STAGE_3);
1490
0
        *(cand_bf->full_cost_ssim) = RDCOST(lambda, mode_rate, mode_ssim_distortion);
1491
0
    }
1492
244k
    return;
1493
244k
}
1494
1495
/************************************************************
1496
 * Coding Loop Context Generation
1497
 ************************************************************/
1498
244k
void svt_aom_coding_loop_context_generation(PictureControlSet* pcs, ModeDecisionContext* ctx) {
1499
244k
    BlkStruct*   blk_ptr = ctx->blk_ptr;
1500
244k
    MacroBlockD* xd      = blk_ptr->av1xd;
1501
244k
    if (!ctx->shut_fast_rate) {
1502
126k
        if (pcs->slice_type == I_SLICE) {
1503
126k
            svt_aom_get_kf_y_mode_ctx(xd, &ctx->intra_luma_top_ctx, &ctx->intra_luma_left_ctx);
1504
126k
        }
1505
126k
        ctx->is_inter_ctx  = svt_av1_get_intra_inter_context(xd);
1506
126k
        ctx->skip_mode_ctx = av1_get_skip_mode_context(xd);
1507
126k
    }
1508
    // Collect Neighbor ref cout
1509
245k
    if (pcs->slice_type != I_SLICE || pcs->ppcs->frm_hdr.allow_intrabc) {
1510
0
        svt_aom_collect_neighbors_ref_counts_new(blk_ptr->av1xd);
1511
0
    }
1512
1513
    // Skip Coeff Context
1514
244k
    ctx->skip_coeff_ctx = ctx->rate_est_ctrls.update_skip_coeff_ctx ? av1_get_skip_context(xd) : 0;
1515
244k
}
1516
1517
383k
static INLINE int block_signals_txsize(BlockSize bsize) {
1518
383k
    return bsize > BLOCK_4X4;
1519
383k
}
1520
1521
0
static INLINE int get_vartx_max_txsize(/*const MbModeInfo *xd,*/ BlockSize bsize, int plane) {
1522
    /* if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;*/
1523
0
    const TxSize max_txsize = blocksize_to_txsize[bsize];
1524
0
    if (plane == 0) {
1525
0
        return max_txsize; // luma
1526
0
    }
1527
0
    return av1_get_adjusted_tx_size(max_txsize); // chroma
1528
0
}
1529
1530
0
static INLINE int max_block_wide(const MacroBlockD* xd, BlockSize bsize, int plane) {
1531
0
    int max_blocks_wide = block_size_wide[bsize];
1532
1533
0
    if (xd->mb_to_right_edge < 0) {
1534
0
        max_blocks_wide += gcc_right_shift(xd->mb_to_right_edge, 3 + !!plane);
1535
0
    }
1536
1537
    // Scale the width in the transform block unit.
1538
0
    return max_blocks_wide >> tx_size_wide_log2[0];
1539
0
}
1540
1541
0
static INLINE int max_block_high(const MacroBlockD* xd, BlockSize bsize, int plane) {
1542
0
    int max_blocks_high = block_size_high[bsize];
1543
1544
0
    if (xd->mb_to_bottom_edge < 0) {
1545
0
        max_blocks_high += gcc_right_shift(xd->mb_to_bottom_edge, 3 + !!plane);
1546
0
    }
1547
1548
    // Scale the height in the transform block unit.
1549
0
    return max_blocks_high >> tx_size_high_log2[0];
1550
0
}
1551
1552
static INLINE void txfm_partition_update(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, TxSize tx_size,
1553
0
                                         TxSize txb_size) {
1554
0
    BlockSize bsize = txsize_to_bsize[txb_size];
1555
0
    assert(bsize < BLOCK_SIZES_ALL);
1556
0
    int     bh  = mi_size_high[bsize];
1557
0
    int     bw  = mi_size_wide[bsize];
1558
0
    uint8_t txw = tx_size_wide[tx_size];
1559
0
    uint8_t txh = tx_size_high[tx_size];
1560
0
    int     i;
1561
0
    for (i = 0; i < bh; ++i) {
1562
0
        left_ctx[i] = txh;
1563
0
    }
1564
0
    for (i = 0; i < bw; ++i) {
1565
0
        above_ctx[i] = txw;
1566
0
    }
1567
0
}
1568
1569
0
static INLINE TxSize get_sqr_tx_size(int tx_dim) {
1570
0
    switch (tx_dim) {
1571
0
    case 128:
1572
0
    case 64:
1573
0
        return TX_64X64;
1574
0
        break;
1575
0
    case 32:
1576
0
        return TX_32X32;
1577
0
        break;
1578
0
    case 16:
1579
0
        return TX_16X16;
1580
0
        break;
1581
0
    case 8:
1582
0
        return TX_8X8;
1583
0
        break;
1584
0
    default:
1585
0
        return TX_4X4;
1586
0
    }
1587
0
}
1588
1589
static INLINE int txfm_partition_context(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, BlockSize bsize,
1590
0
                                         TxSize tx_size) {
1591
0
    const uint8_t txw      = tx_size_wide[tx_size];
1592
0
    const uint8_t txh      = tx_size_high[tx_size];
1593
0
    const int     above    = *above_ctx < txw;
1594
0
    const int     left     = *left_ctx < txh;
1595
0
    int           category = TXFM_PARTITION_CONTEXTS;
1596
1597
    // dummy return, not used by others.
1598
0
    if (tx_size == TX_4X4) {
1599
0
        return 0;
1600
0
    }
1601
1602
0
    TxSize max_tx_size = get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize]));
1603
1604
0
    if (max_tx_size >= TX_8X8) {
1605
0
        category = (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) +
1606
0
            (TX_SIZES - 1 - max_tx_size) * 2;
1607
0
    }
1608
0
    assert(category != TXFM_PARTITION_CONTEXTS);
1609
0
    return category * 3 + above + left;
1610
0
}
1611
1612
static uint64_t cost_tx_size_vartx(MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, int depth, int blk_row,
1613
                                   int blk_col, MdRateEstimationContext* md_rate_est_ctx, FRAME_CONTEXT* ec_ctx,
1614
0
                                   uint8_t allow_update_cdf) {
1615
0
    uint64_t  bits            = 0;
1616
0
    const int max_blocks_high = max_block_high(xd, mbmi->bsize, 0);
1617
0
    const int max_blocks_wide = max_block_wide(xd, mbmi->bsize, 0);
1618
1619
0
    if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) {
1620
0
        return bits;
1621
0
    }
1622
1623
0
    if (depth == MAX_VARTX_DEPTH) {
1624
0
        txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size);
1625
1626
0
        return bits;
1627
0
    }
1628
1629
0
    const int ctx = txfm_partition_context(
1630
0
        xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, mbmi->bsize, tx_size);
1631
0
    const int write_txfm_partition = (tx_size == tx_depth_to_tx_size[mbmi->block_mi.tx_depth][mbmi->bsize]);
1632
0
    if (write_txfm_partition) {
1633
0
        bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][0];
1634
1635
0
        if (allow_update_cdf) {
1636
0
            update_cdf(ec_ctx->txfm_partition_cdf[ctx], 0, 2);
1637
0
        }
1638
1639
0
        txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size);
1640
1641
0
    } else {
1642
0
        assert(tx_size < TX_SIZES_ALL);
1643
0
        const TxSize sub_txs = eb_sub_tx_size_map[tx_size];
1644
0
        const int    bsw     = eb_tx_size_wide_unit[sub_txs];
1645
0
        const int    bsh     = eb_tx_size_high_unit[sub_txs];
1646
1647
0
        bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][1];
1648
1649
0
        if (allow_update_cdf) {
1650
0
            update_cdf(ec_ctx->txfm_partition_cdf[ctx], 1, 2);
1651
0
        }
1652
1653
0
        if (sub_txs == TX_4X4) {
1654
0
            txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, sub_txs, tx_size);
1655
1656
0
            return bits;
1657
0
        }
1658
1659
0
        assert(bsw > 0 && bsh > 0);
1660
0
        for (int row = 0; row < eb_tx_size_high_unit[tx_size]; row += bsh) {
1661
0
            for (int col = 0; col < eb_tx_size_wide_unit[tx_size]; col += bsw) {
1662
0
                int offsetr = blk_row + row;
1663
0
                int offsetc = blk_col + col;
1664
0
                bits += cost_tx_size_vartx(
1665
0
                    xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, md_rate_est_ctx, ec_ctx, allow_update_cdf);
1666
0
            }
1667
0
        }
1668
0
    }
1669
0
    return bits;
1670
0
}
1671
1672
741k
static INLINE void set_txfm_ctx(TXFM_CONTEXT* txfm_ctx, uint8_t txs, int len) {
1673
741k
    int i;
1674
2.38M
    for (i = 0; i < len; ++i) {
1675
1.64M
        txfm_ctx[i] = txs;
1676
1.64M
    }
1677
741k
}
1678
1679
372k
static INLINE void set_txfm_ctxs(TxSize tx_size, int n8_w, int n8_h, int skip, const MacroBlockD* xd) {
1680
372k
    uint8_t bw = tx_size_wide[tx_size];
1681
372k
    uint8_t bh = tx_size_high[tx_size];
1682
1683
372k
    if (skip) {
1684
0
        bw = n8_w * MI_SIZE;
1685
0
        bh = n8_h * MI_SIZE;
1686
0
    }
1687
1688
372k
    set_txfm_ctx(xd->above_txfm_context, bw, n8_w);
1689
372k
    set_txfm_ctx(xd->left_txfm_context, bh, n8_h);
1690
372k
}
1691
1692
11.3k
static INLINE int tx_size_to_depth(TxSize tx_size, BlockSize bsize) {
1693
11.3k
    TxSize ctx_size = blocksize_to_txsize[bsize];
1694
11.3k
    int    depth    = 0;
1695
11.3k
    while (tx_size != ctx_size) {
1696
0
        depth++;
1697
0
        ctx_size = eb_sub_tx_size_map[ctx_size];
1698
0
        assert(depth <= MAX_TX_DEPTH);
1699
0
    }
1700
11.3k
    return depth;
1701
11.3k
}
1702
1703
// Returns a context number for the given MB prediction signal
1704
// The mode info data structure has a one element border above and to the
1705
// left of the entries corresponding to real blocks.
1706
// The prediction flags in these dummy entries are initialized to 0.
1707
11.3k
static INLINE int get_tx_size_context(const MacroBlockD* xd) {
1708
11.3k
    const MbModeInfo*       mbmi        = xd->mi[0];
1709
11.3k
    const MbModeInfo* const above_mbmi  = xd->above_mbmi;
1710
11.3k
    const MbModeInfo* const left_mbmi   = xd->left_mbmi;
1711
11.3k
    const TxSize            max_tx_size = blocksize_to_txsize[mbmi->bsize];
1712
11.3k
    const int               max_tx_wide = tx_size_wide[max_tx_size];
1713
11.3k
    const int               max_tx_high = tx_size_high[max_tx_size];
1714
11.3k
    const int               has_above   = xd->up_available;
1715
11.3k
    const int               has_left    = xd->left_available;
1716
1717
11.3k
    int above = xd->above_txfm_context[0] >= max_tx_wide;
1718
11.3k
    int left  = xd->left_txfm_context[0] >= max_tx_high;
1719
1720
11.3k
    if (has_above) {
1721
2.84k
        if (is_inter_block(&above_mbmi->block_mi)) {
1722
0
            above = block_size_wide[above_mbmi->bsize] >= max_tx_wide;
1723
0
        }
1724
2.84k
    }
1725
1726
11.3k
    if (has_left) {
1727
2.81k
        if (is_inter_block(&left_mbmi->block_mi)) {
1728
0
            left = block_size_high[left_mbmi->bsize] >= max_tx_high;
1729
0
        }
1730
2.81k
    }
1731
1732
11.3k
    if (has_above && has_left) {
1733
148
        return (above + left);
1734
11.1k
    } else if (has_above) {
1735
2.69k
        return above;
1736
8.49k
    } else if (has_left) {
1737
2.66k
        return left;
1738
5.83k
    } else {
1739
5.83k
        return 0;
1740
5.83k
    }
1741
11.3k
}
1742
1743
static uint64_t cost_selected_tx_size(const MacroBlockD* xd, MdRateEstimationContext* md_rate_est_ctx, TxSize tx_size,
1744
11.3k
                                      FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) {
1745
11.3k
    const MbModeInfo* const mbmi  = xd->mi[0];
1746
11.3k
    const BlockSize         bsize = mbmi->bsize;
1747
11.3k
    uint64_t                bits  = 0;
1748
1749
11.3k
    if (block_signals_txsize(bsize)) {
1750
11.3k
        const int tx_size_ctx = get_tx_size_context(xd);
1751
11.3k
        assert(bsize < BLOCK_SIZES_ALL);
1752
11.3k
        const int     depth       = tx_size_to_depth(tx_size, bsize);
1753
11.3k
        const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
1754
11.3k
        bits += md_rate_est_ctx->tx_size_fac_bits[tx_size_cat][tx_size_ctx][depth];
1755
1756
11.3k
        if (allow_update_cdf) {
1757
0
            const int max_depths = bsize_to_max_depth(bsize);
1758
0
            assert(depth >= 0 && depth <= max_depths);
1759
0
            assert(!is_inter_block(&mbmi->block_mi));
1760
0
            assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(/*xd,*/ mbmi)));
1761
0
            update_cdf(ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], depth, max_depths + 1);
1762
0
        }
1763
11.3k
    }
1764
1765
11.3k
    return bits;
1766
11.3k
}
1767
1768
/* Get the TXS rate and update the txfm context.  If allow_update_cdf is true, the TX size CDFs will
1769
be updated. */
1770
uint64_t svt_aom_tx_size_bits(PictureControlSet* pcs, uint8_t segment_id, MdRateEstimationContext* md_rate_est_ctx,
1771
                              MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, TxMode tx_mode, BlockSize bsize,
1772
372k
                              uint8_t skip, FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) {
1773
372k
    uint64_t bits        = 0;
1774
372k
    int      is_inter_tx = is_inter_block(&mbmi->block_mi);
1775
372k
    if (tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) && !(is_inter_tx && skip) &&
1776
372k
        !svt_av1_is_lossless_segment(pcs, segment_id)) {
1777
11.3k
        if (is_inter_tx) { // This implies skip flag is 0.
1778
0
            const TxSize max_tx_size = get_vartx_max_txsize(/*xd,*/ bsize, 0);
1779
0
            const int    txbh        = eb_tx_size_high_unit[max_tx_size];
1780
0
            const int    txbw        = eb_tx_size_wide_unit[max_tx_size];
1781
0
            const int    width       = block_size_wide[bsize] >> tx_size_wide_log2[0];
1782
0
            const int    height      = block_size_high[bsize] >> tx_size_high_log2[0];
1783
0
            int          idx, idy;
1784
0
            for (idy = 0; idy < height; idy += txbh) {
1785
0
                for (idx = 0; idx < width; idx += txbw) {
1786
0
                    bits += cost_tx_size_vartx(
1787
0
                        xd, mbmi, max_tx_size, 0, idy, idx, md_rate_est_ctx, ec_ctx, allow_update_cdf);
1788
0
                }
1789
0
            }
1790
11.3k
        } else {
1791
11.3k
            bits += cost_selected_tx_size(xd, md_rate_est_ctx, tx_size, ec_ctx, allow_update_cdf);
1792
11.3k
            set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, 0, xd);
1793
11.3k
        }
1794
361k
    } else {
1795
361k
        set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, skip && is_inter_block(&mbmi->block_mi), xd);
1796
361k
    }
1797
1798
372k
    return bits;
1799
372k
}
1800
1801
/* Get the TXS rate.  A dummy txfm context array will be used, so context updates will not be saved for
1802
future blocks. */
1803
uint64_t svt_aom_get_tx_size_bits(ModeDecisionCandidateBuffer* candidateBuffer, ModeDecisionContext* ctx,
1804
373k
                                  PictureControlSet* pcs, uint8_t tx_depth, bool block_has_coeff) {
1805
373k
    NeighborArrayUnit* txfm_context_array      = ctx->txfm_context_array;
1806
373k
    uint32_t           txfm_context_left_index = get_neighbor_array_unit_left_index(txfm_context_array, ctx->blk_org_y);
1807
373k
    uint32_t           txfm_context_above_index = get_neighbor_array_unit_top_index(txfm_context_array, ctx->blk_org_x);
1808
1809
373k
    TxMode       tx_mode = pcs->ppcs->frm_hdr.tx_mode;
1810
373k
    MacroBlockD* xd      = ctx->blk_ptr->av1xd;
1811
373k
    BlockSize    bsize   = ctx->blk_geom->bsize;
1812
373k
    const TxSize tx_size = tx_depth_to_tx_size[tx_depth][bsize];
1813
373k
    MbModeInfo*  mbmi    = xd->mi[0];
1814
1815
373k
    svt_memcpy(ctx->above_txfm_context,
1816
373k
               &(txfm_context_array->top_array[txfm_context_above_index]),
1817
373k
               (ctx->blk_geom->bwidth >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT));
1818
373k
    svt_memcpy(ctx->left_txfm_context,
1819
373k
               &(txfm_context_array->left_array[txfm_context_left_index]),
1820
373k
               (ctx->blk_geom->bheight >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT));
1821
1822
373k
    xd->above_txfm_context      = ctx->above_txfm_context;
1823
373k
    xd->left_txfm_context       = ctx->left_txfm_context;
1824
373k
    mbmi->bsize                 = ctx->blk_geom->bsize;
1825
373k
    mbmi->block_mi.use_intrabc  = candidateBuffer->cand->block_mi.use_intrabc;
1826
373k
    mbmi->block_mi.ref_frame[0] = candidateBuffer->cand->block_mi.ref_frame[0];
1827
373k
    mbmi->block_mi.tx_depth     = tx_depth;
1828
1829
373k
    const uint64_t bits = svt_aom_tx_size_bits(pcs,
1830
373k
                                               ctx->blk_ptr->segment_id,
1831
373k
                                               ctx->md_rate_est_ctx,
1832
373k
                                               xd,
1833
373k
                                               mbmi,
1834
373k
                                               tx_size,
1835
373k
                                               tx_mode,
1836
373k
                                               bsize,
1837
373k
                                               !block_has_coeff,
1838
373k
                                               NULL,
1839
373k
                                               0);
1840
373k
    return bits;
1841
373k
}
1842
1843
/*
1844
 * av1_partition_rate_cost function is used to generate the rate of signaling the
1845
 * partition type for a given block.
1846
 */
1847
int64_t svt_aom_partition_rate_cost(PictureParentControlSet* ppcs, const BlockSize bsize, const int mi_row,
1848
                                    const int mi_col, MdRateEstimationContext* md_rate_est_ctx, PartitionType p,
1849
344k
                                    const PartitionContextType left_ctx, const PartitionContextType above_ctx) {
1850
344k
    if (bsize < BLOCK_8X8) {
1851
0
        return 0;
1852
0
    }
1853
344k
    assert(bsize < BLOCK_SIZES_ALL && mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
1854
1855
344k
    const int hbs      = mi_size_wide[bsize] >> 1;
1856
344k
    const int has_rows = (mi_row + hbs) < ppcs->av1_cm->mi_rows;
1857
344k
    const int has_cols = (mi_col + hbs) < ppcs->av1_cm->mi_cols;
1858
    // Don't consider invalid partitions or blocks outside the picture
1859
344k
    if (!has_rows && !has_cols) {
1860
840
        return 0;
1861
840
    }
1862
1863
343k
    const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
1864
343k
    assert(bsl >= 0);
1865
1866
343k
    const int      above = (above_ctx >> bsl) & 1, left = (left_ctx >> bsl) & 1;
1867
343k
    const uint32_t context_index = (left * 2 + above) + bsl * PARTITION_PLOFFSET;
1868
1869
343k
    uint64_t split_rate = 0;
1870
1871
343k
    if (has_rows && has_cols) {
1872
327k
        split_rate = (uint64_t)md_rate_est_ctx->partition_fac_bits[context_index][p];
1873
327k
    } else if (!has_rows && has_cols) {
1874
        // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per
1875
        // section 8.3.2 of the AV1 spec (Cdf selection process).  Therefore, only update partition ctx 4+,
1876
        // which corresponds to the paritition CDFs for 16x16 and larger blocks
1877
8.24k
        assert(bsize != BLOCK_8X8);
1878
8.24k
        split_rate = bsize == BLOCK_128X128
1879
8.24k
            ? (uint64_t)md_rate_est_ctx->partition_vert_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT]
1880
8.24k
            : (uint64_t)md_rate_est_ctx->partition_vert_alike_fac_bits[context_index][p == PARTITION_SPLIT];
1881
8.24k
    } else {
1882
        // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per
1883
        // section 8.3.2 of the AV1 spec (Cdf selection process).  Therefore, only update partition ctx 4+,
1884
        // which corresponds to the paritition CDFs for 16x16 and larger blocks
1885
7.75k
        assert(bsize != BLOCK_8X8);
1886
7.75k
        split_rate = bsize == BLOCK_128X128
1887
7.75k
            ? (uint64_t)md_rate_est_ctx->partition_horz_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT]
1888
7.75k
            : (uint64_t)md_rate_est_ctx->partition_horz_alike_fac_bits[context_index][p == PARTITION_SPLIT];
1889
7.75k
    }
1890
1891
343k
    return split_rate;
1892
344k
}