Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/rd_cost.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 2 Clause License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
/***************************************
14
* Includes
15
***************************************/
16
#include "rd_cost.h"
17
#include "common_utils.h"
18
#include "aom_dsp_rtcd.h"
19
#include "svt_log.h"
20
#include "enc_inter_prediction.h"
21
#include "full_loop.h"
22
#include "entropy_coding.h"
23
24
#include <assert.h>
25
26
0
#define MV_COST_WEIGHT 108
27
int svt_aom_get_reference_mode_context_new(const MacroBlockD* xd);
28
int svt_av1_get_pred_context_uni_comp_ref_p(const MacroBlockD* xd);
29
int svt_av1_get_pred_context_uni_comp_ref_p1(const MacroBlockD* xd);
30
int svt_av1_get_pred_context_uni_comp_ref_p2(const MacroBlockD* xd);
31
int svt_aom_get_comp_reference_type_context_new(const MacroBlockD* xd);
32
33
int  svt_aom_get_palette_bsize_ctx(BlockSize bsize);
34
int  svt_aom_get_palette_mode_ctx(const MacroBlockD* xd);
35
int  svt_aom_write_uniform_cost(int n, int v);
36
int  svt_get_palette_cache_y(const MacroBlockD* const xd, uint16_t* cache);
37
int  svt_av1_palette_color_cost_y(const PaletteModeInfo* const pmi, uint16_t* color_cache, const int palette_size,
38
                                  int n_cache, int bit_depth);
39
int  svt_av1_cost_color_map(ModeDecisionCandidate* cand, MdRateEstimationContext* rate_table,
40
41
                            BlkStruct* blk_ptr, int plane, BlockSize bsize, COLOR_MAP_TYPE type);
42
void svt_aom_get_block_dimensions(BlockSize bsize, int plane, const MacroBlockD* xd, int* width, int* height,
43
                                  int* rows_within_bounds, int* cols_within_bounds);
44
int  svt_aom_allow_palette(int allow_screen_content_tools, BlockSize bsize);
45
int  svt_aom_allow_intrabc(const FrameHeader* frm_hdr, SliceType slice_type);
46
47
0
MvJointType svt_av1_get_mv_joint(const Mv* mv) {
48
0
    if (mv->y == 0) {
49
0
        return mv->x == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
50
0
    } else {
51
0
        return mv->x == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
52
0
    }
53
0
}
54
55
0
static int32_t mv_cost(const Mv* mv, const int32_t* joint_cost, const int32_t* const comp_cost[2]) {
56
0
    int32_t jn_c = svt_av1_get_mv_joint(mv);
57
0
    int32_t res  = joint_cost[jn_c] + comp_cost[0][CLIP3(MV_LOW, MV_UPP, mv->y)] +
58
0
        comp_cost[1][CLIP3(MV_LOW, MV_UPP, mv->x)];
59
0
    return res;
60
0
}
61
62
0
int32_t svt_av1_mv_bit_cost_light(const Mv* mv, const Mv* ref) {
63
0
    const uint32_t factor     = 50;
64
0
    const uint32_t absmvdiffx = ABS(mv->x - ref->x);
65
0
    const uint32_t absmvdiffy = ABS(mv->y - ref->y);
66
0
    const uint32_t mv_rate    = 1296 + (factor * (absmvdiffx + absmvdiffy));
67
0
    return mv_rate;
68
0
}
69
70
int32_t svt_av1_mv_bit_cost(const Mv* mv, const Mv* ref, const int32_t* mvjcost, const int32_t* const mvcost[2],
71
0
                            int32_t weight) {
72
    // Restrict the size of the MV diff to be within the max AV1 range.  If the MV diff
73
    // is outside this range, the diff will index beyond the cost array, causing a seg fault.
74
    // Both the MVs and the MV diffs should be within the allowable range for accessing the MV cost
75
    // infrastructure.
76
0
    const int16_t x         = MIN(MAX(mv->x - ref->x, MV_LOW), MV_UPP);
77
0
    const int16_t y         = MIN(MAX(mv->y - ref->y, MV_LOW), MV_UPP);
78
0
    Mv            temp_diff = {{x, y}};
79
80
0
    return ROUND_POWER_OF_TWO(mv_cost(&temp_diff, mvjcost, mvcost) * weight, 7);
81
0
}
82
83
/////////////////////////////COEFFICIENT CALCULATION //////////////////////////////////////////////
84
10.5k
static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
85
10.5k
    if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
86
10.5k
        const int32_t r      = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
87
10.5k
        const int32_t length = get_msb(r) + 1;
88
10.5k
        return av1_cost_literal(2 * length - 1);
89
10.5k
    }
90
0
    return 0;
91
10.5k
}
92
93
void svt_av1_txb_init_levels_c(const TranLow* const coeff, const int32_t width, const int32_t height,
94
0
                               uint8_t* const levels) {
95
0
    uint8_t* ls = levels;
96
97
0
    for (int32_t i = 0; i < height; i++) {
98
0
        for (int32_t j = 0; j < width; j++) {
99
0
            *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
100
0
        }
101
0
        for (int32_t j = 0; j < TX_PAD_HOR; j++) {
102
0
            *ls++ = 0;
103
0
        }
104
0
    }
105
0
}
106
107
static int32_t av1_transform_type_rate_estimation(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* fc,
108
                                                  ModeDecisionCandidateBuffer* cand_bf, bool is_inter,
109
                                                  TxSize transform_size, TxType transform_type,
110
0
                                                  bool reduced_tx_set_used) {
111
    // const MbModeInfo *mbmi = &xd->mi[0]->mbmi;
112
    // const int32_t is_inter = is_inter_block(mbmi);
113
114
0
    if (get_ext_tx_types(transform_size, is_inter, reduced_tx_set_used) >
115
0
        1 /*&&    !xd->lossless[xd->mi[0]->mbmi.segment_id]  WE ARE NOT LOSSLESS*/) {
116
0
        const TxSize square_tx_size = txsize_sqr_map[transform_size];
117
0
        assert(square_tx_size < EXT_TX_SIZES);
118
119
0
        const int32_t ext_tx_set = get_ext_tx_set(transform_size, is_inter, reduced_tx_set_used);
120
0
        if (is_inter) {
121
0
            if (ext_tx_set > 0) {
122
0
                if (allow_update_cdf) {
123
0
                    const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
124
125
0
                    update_cdf(fc->inter_ext_tx_cdf[ext_tx_set][square_tx_size],
126
0
                               av1_ext_tx_ind[tx_set_type][transform_type],
127
0
                               av1_num_ext_tx_set[tx_set_type]);
128
0
                }
129
0
                return ctx->md_rate_est_ctx->inter_tx_type_fac_bits[ext_tx_set][square_tx_size][transform_type];
130
0
            }
131
0
        } else {
132
0
            if (ext_tx_set > 0) {
133
0
                PredictionMode intra_dir;
134
0
                if (cand_bf->cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) {
135
0
                    intra_dir = fimode_to_intradir[cand_bf->cand->block_mi.filter_intra_mode];
136
0
                } else {
137
0
                    intra_dir = cand_bf->cand->block_mi.mode;
138
0
                }
139
0
                assert(intra_dir < INTRA_MODES);
140
0
                const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
141
142
0
                if (allow_update_cdf) {
143
0
                    update_cdf(fc->intra_ext_tx_cdf[ext_tx_set][square_tx_size][intra_dir],
144
0
                               av1_ext_tx_ind[tx_set_type][transform_type],
145
0
                               av1_num_ext_tx_set[tx_set_type]);
146
0
                }
147
0
                return ctx->md_rate_est_ctx
148
0
                    ->intra_tx_type_fac_bits[ext_tx_set][square_tx_size][intra_dir][transform_type];
149
0
            }
150
0
        }
151
0
    }
152
0
    return 0;
153
0
}
154
155
// Update the eob-related CDFs. Function assumes allow_update_cdf is true
156
// as the only action of the function is to update the CDFs.
157
0
static void update_eob_context(int eob, TxSize tx_size, TxClass tx_class, PlaneType plane, FRAME_CONTEXT* ec_ctx) {
158
0
    int          eob_extra;
159
0
    const int    eob_pt  = get_eob_pos_token(eob, &eob_extra);
160
0
    const TxSize txs_ctx = (TxSize)((txsize_sqr_map[tx_size] + txsize_sqr_up_map[tx_size] + 1) >> 1);
161
0
    assert(txs_ctx < TX_SIZES);
162
0
    const int eob_multi_size = txsize_log2_minus4[tx_size];
163
0
    const int eob_multi_ctx  = (tx_class == TX_CLASS_2D) ? 0 : 1;
164
165
0
    switch (eob_multi_size) {
166
0
    case 0:
167
0
        update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
168
0
        break;
169
0
    case 1:
170
0
        update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
171
0
        break;
172
0
    case 2:
173
0
        update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
174
0
        break;
175
0
    case 3:
176
0
        update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1, 8);
177
0
        break;
178
0
    case 4:
179
0
        update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1, 9);
180
0
        break;
181
0
    case 5:
182
0
        update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1, 10);
183
0
        break;
184
0
    case 6:
185
0
    default:
186
0
        update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1, 11);
187
0
        break;
188
0
    }
189
190
0
    if (eob_pt > 2) {
191
0
        const int cnt = eob_pt - 3;
192
0
        const int bit = (eob_extra >> cnt) & 1;
193
0
        update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][cnt], bit, 2);
194
0
    }
195
0
}
196
197
// Transform end of block bit estimation
198
24.1k
int get_eob_cost(int eob, const LvMapEobCost* txb_eob_costs, const LvMapCoeffCost* txb_costs, TxClass tx_class) {
199
24.1k
    int       eob_extra;
200
24.1k
    const int eob_pt        = get_eob_pos_token(eob, &eob_extra);
201
24.1k
    const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
202
24.1k
    int       eob_cost      = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
203
204
24.1k
    if (eob_pt > 2) {
205
0
        const int cnt = eob_pt - 3;
206
0
        const int bit = (eob_extra >> cnt) & 1;
207
0
        eob_cost += txb_costs->eob_extra_cost[cnt][bit];
208
0
        eob_cost += av1_cost_literal(cnt);
209
0
    }
210
24.1k
    return eob_cost;
211
24.1k
}
212
213
static INLINE int32_t av1_cost_skip_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
214
278k
                                        TxSize transform_size, PlaneType plane_type, int16_t txb_skip_ctx) {
215
278k
    const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1);
216
278k
    assert(txs_ctx < TX_SIZES);
217
278k
    const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type];
218
278k
    if (allow_update_cdf) {
219
0
        update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 1, 2);
220
0
    }
221
278k
    return coeff_costs->txb_skip_cost[txb_skip_ctx][1];
222
278k
}
223
224
static INLINE int32_t av1_cost_coeffs_txb_loop_cost_one_eob(const TranLow* const qcoeff, int8_t* const coeff_contexts,
225
12.4k
                                                            const LvMapCoeffCost* coeff_costs, int16_t dc_sign_ctx) {
226
12.4k
    const TranLow v         = qcoeff[0];
227
12.4k
    const int32_t level     = abs(v);
228
12.4k
    const int32_t coeff_ctx = coeff_contexts[0];
229
230
12.4k
    assert((AOMMIN(level, 3) - 1) >= 0);
231
12.4k
    int32_t cost = coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
232
233
12.4k
    if (v != 0) {
234
12.4k
        const int32_t sign = (v < 0) ? 1 : 0;
235
        // sign bit cost
236
12.4k
        cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
237
238
12.4k
        if (level > NUM_BASE_LEVELS) {
239
11.3k
            const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
240
241
11.3k
            if (base_range < COEFF_BASE_RANGE) {
242
780
                cost += coeff_costs->lps_cost[0][base_range];
243
10.5k
            } else {
244
10.5k
                cost += coeff_costs->lps_cost[0][COEFF_BASE_RANGE];
245
10.5k
            }
246
247
11.3k
            if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
248
10.5k
                cost += get_golomb_cost(level);
249
10.5k
            }
250
11.3k
        }
251
12.4k
    }
252
12.4k
    return cost;
253
12.4k
}
254
255
static INLINE int32_t av1_cost_coeffs_txb_loop_cost_eob(ModeDecisionContext* md_ctx, uint16_t eob,
256
                                                        const int16_t* const scan, const TranLow* const qcoeff,
257
                                                        int8_t* const coeff_contexts, const LvMapCoeffCost* coeff_costs,
258
                                                        int16_t dc_sign_ctx, uint8_t* const levels, const int32_t bwl,
259
12.4k
                                                        TxType transform_type) {
260
12.4k
    const uint32_t cost_literal = av1_cost_literal(1);
261
12.4k
    int32_t        cost         = 0;
262
263
    //Optimized/simplified function when eob is 1
264
12.4k
    if (eob == 1) {
265
12.4k
        return av1_cost_coeffs_txb_loop_cost_one_eob(qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx);
266
12.4k
    }
267
268
    //  first (eob - 1) index
269
18.4E
    {
270
18.4E
        const int32_t pos       = scan[eob - 1];
271
18.4E
        const TranLow v         = qcoeff[pos];
272
18.4E
        const int32_t level     = abs(v);
273
18.4E
        const int32_t coeff_ctx = coeff_contexts[pos];
274
275
18.4E
        assert((AOMMIN(level, 3) - 1) >= 0);
276
18.4E
        cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
277
278
18.4E
        if (v != 0) {
279
0
            cost += cost_literal;
280
0
            if (level > NUM_BASE_LEVELS) {
281
0
                int32_t       ctx        = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]);
282
0
                const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
283
284
0
                if (base_range < COEFF_BASE_RANGE) {
285
0
                    cost += coeff_costs->lps_cost[ctx][base_range];
286
0
                } else {
287
0
                    cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
288
0
                }
289
290
0
                if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
291
0
                    cost += get_golomb_cost(level);
292
0
                }
293
0
            }
294
0
        }
295
18.4E
    }
296
    // last (0) index
297
18.4E
    {
298
18.4E
        const TranLow v         = qcoeff[0];
299
18.4E
        const int32_t level     = abs(v);
300
18.4E
        const int32_t coeff_ctx = coeff_contexts[0];
301
302
18.4E
        cost += coeff_costs->base_cost[coeff_ctx][AOMMIN(level, 3)];
303
304
18.4E
        if (v != 0) {
305
0
            const int32_t sign = (v < 0) ? 1 : 0;
306
            // sign bit cost
307
308
0
            cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
309
310
0
            if (level > NUM_BASE_LEVELS) {
311
0
                int32_t       ctx        = get_br_ctx(levels, 0, bwl, tx_type_to_class[transform_type]);
312
0
                const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
313
314
0
                if (base_range < COEFF_BASE_RANGE) {
315
0
                    cost += coeff_costs->lps_cost[ctx][base_range];
316
0
                } else {
317
0
                    cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
318
0
                }
319
320
0
                if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
321
0
                    cost += get_golomb_cost(level);
322
0
                }
323
0
            }
324
0
        }
325
18.4E
    }
326
18.4E
    int32_t c;
327
    /* Optimized Loop, omitted first (eob - 1) and last (0) index */
328
    // Estimate the rate of the first(eob / fast_coeff_est_level) coeff(s), DC and last coeff only
329
18.4E
    int32_t  c_start = MIN(eob - 2, eob / MAX(1, (int)(md_ctx->mds_fast_coeff_est_level - md_ctx->mds_subres_step)));
330
18.4E
    uint32_t cost_literal_cnt = 0;
331
18.4E
    for (c = c_start; c >= 1; --c) {
332
0
        const int32_t pos = scan[c];
333
0
        cost_literal_cnt += !!(qcoeff[pos]);
334
0
        const int32_t level = abs(qcoeff[pos]);
335
0
        if (level > NUM_BASE_LEVELS) {
336
0
            int32_t       ctx        = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]);
337
0
            const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
338
339
0
            cost += coeff_costs->base_cost[coeff_contexts[pos]][3];
340
0
            if (base_range < COEFF_BASE_RANGE) {
341
0
                cost += coeff_costs->lps_cost[ctx][base_range];
342
0
            } else {
343
0
                cost += get_golomb_cost(level) + coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
344
0
            }
345
0
        } else {
346
0
            cost += coeff_costs->base_cost[coeff_contexts[pos]][level];
347
0
        }
348
0
    }
349
18.4E
    cost += cost_literal_cnt * cost_literal;
350
351
18.4E
    return cost;
352
12.4k
}
353
354
// Note: don't call this function when eob is 0.
355
uint64_t svt_av1_cost_coeffs_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
356
                                 ModeDecisionCandidateBuffer* cand_bf, const TranLow* const qcoeff, uint16_t eob,
357
                                 PlaneType plane_type, TxSize transform_size, TxType transform_type,
358
                                 int16_t txb_skip_ctx, int16_t dc_sign_ctx, bool reduced_transform_set_flag)
359
360
12.4k
{
361
    //Note: there is a different version of this function in AOM that seems to be efficient as its name is:
362
    //warehouse_efficients_txb
363
364
12.4k
    const TxSize  txs_ctx  = get_txsize_entropy_ctx(transform_size);
365
12.4k
    const TxClass tx_class = tx_type_to_class[transform_type];
366
12.4k
    int32_t       cost;
367
12.4k
    const int32_t bwl    = get_txb_bwl(transform_size);
368
12.4k
    const int32_t width  = get_txb_wide(transform_size);
369
12.4k
    const int32_t height = get_txb_high(transform_size);
370
371
12.4k
    const ScanOrder* const scan_order     = get_scan_order(transform_size, transform_type);
372
12.4k
    const int16_t* const   scan           = scan_order->scan;
373
12.4k
    uint8_t* const         levels         = set_levels(ctx->md_levels_buf, width, height);
374
12.4k
    int8_t* const          coeff_contexts = ctx->md_coeff_contexts;
375
12.4k
    assert(txs_ctx < TX_SIZES);
376
12.4k
    const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type];
377
378
12.4k
    const int32_t             eob_multi_size = txsize_log2_minus4[transform_size];
379
12.4k
    const LvMapEobCost* const eob_bits       = &ctx->md_rate_est_ctx->eob_frac_bits[eob_multi_size][plane_type];
380
    // eob must be greater than 0 here.
381
12.4k
    assert(eob > 0);
382
12.4k
    cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
383
384
12.4k
    if (allow_update_cdf) {
385
0
        update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], eob == 0, 2);
386
0
    }
387
388
12.4k
    if (eob > 1) {
389
0
        svt_av1_txb_init_levels(qcoeff,
390
0
                                width,
391
0
                                height,
392
0
                                levels); // NM - Needs to be optimized - to be combined with the quantisation.
393
0
    }
394
12.4k
    const bool is_inter = is_inter_mode(cand_bf->cand->block_mi.mode);
395
    // Transform type bit estimation
396
12.4k
    cost += plane_type > PLANE_TYPE_Y ? 0
397
12.4k
                                      : av1_transform_type_rate_estimation(ctx,
398
0
                                                                           allow_update_cdf,
399
0
                                                                           ec_ctx,
400
0
                                                                           cand_bf,
401
0
                                                                           is_inter,
402
0
                                                                           transform_size,
403
0
                                                                           transform_type,
404
0
                                                                           reduced_transform_set_flag);
405
406
    // Transform eob bit estimation
407
12.4k
    cost += get_eob_cost(eob, eob_bits, coeff_costs, tx_class);
408
12.4k
    if (allow_update_cdf) {
409
0
        update_eob_context(eob, transform_size, tx_class, plane_type, ec_ctx);
410
0
    }
411
    // Transform non-zero coeff bit estimation
412
12.4k
    svt_av1_get_nz_map_contexts(levels,
413
12.4k
                                scan,
414
12.4k
                                eob,
415
12.4k
                                transform_size,
416
12.4k
                                tx_class,
417
12.4k
                                coeff_contexts); // NM - Assembly version is available in AOM
418
12.4k
    assert(eob <= width * height);
419
12.4k
    if (allow_update_cdf) {
420
0
        for (int c = eob - 1; c >= 0; --c) {
421
0
            const int     pos       = scan[c];
422
0
            const int     coeff_ctx = coeff_contexts[pos];
423
0
            const TranLow v         = qcoeff[pos];
424
0
            const TranLow level     = abs(v);
425
0
            if (c == eob - 1) {
426
0
                assert(coeff_ctx < 4);
427
0
                update_cdf(ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3) - 1, 3);
428
0
            } else {
429
0
                update_cdf(ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3), 4);
430
0
            }
431
432
0
            {
433
0
                if (c == eob - 1) {
434
0
                    assert(coeff_ctx < 4);
435
0
                }
436
0
            }
437
438
0
            if (level > NUM_BASE_LEVELS) {
439
0
                const int base_range = level - 1 - NUM_BASE_LEVELS;
440
0
                int       br_ctx;
441
0
                if (eob == 1) {
442
0
                    br_ctx = 0;
443
0
                } else {
444
0
                    br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
445
0
                }
446
447
0
                for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
448
0
                    const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
449
0
                    update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx], k, BR_CDF_SIZE);
450
0
                    for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
451
0
                        if (lps == k) {
452
0
                            break;
453
0
                        }
454
0
                    }
455
0
                    if (k < BR_CDF_SIZE - 1) {
456
0
                        break;
457
0
                    }
458
0
                }
459
0
            }
460
0
        }
461
462
0
        if (qcoeff[0] != 0) {
463
0
            update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], qcoeff[0] < 0, 2);
464
0
        }
465
466
        //TODO: CHKN  for 128x128 where we need more than one TXb, we need to update the txb_context(dc_sign+skip_ctx) in a Txb basis.
467
468
0
        return 0;
469
0
    }
470
471
12.4k
    cost += av1_cost_coeffs_txb_loop_cost_eob(
472
12.4k
        ctx, eob, scan, qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx, levels, bwl, transform_type);
473
12.4k
    return cost;
474
12.4k
}
475
476
uint64_t svt_aom_get_intra_uv_fast_rate(PictureControlSet* pcs, ModeDecisionContext* ctx,
477
145k
                                        ModeDecisionCandidateBuffer* cand_bf, bool use_accurate_cfl) {
478
145k
    const BlockGeom* const blk_geom = ctx->blk_geom;
479
145k
    ModeDecisionCandidate* cand     = cand_bf->cand;
480
145k
    assert(ctx->has_uv);
481
145k
    assert(!(svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc));
482
145k
    MdRateEstimationContext* md_rate_est_ctx = ctx->md_rate_est_ctx;
483
145k
    const uint8_t            is_cfl_allowed  = (blk_geom->bwidth <= 32 && blk_geom->bheight <= 32) ? 1 : 0;
484
145k
    PredictionMode           intra_mode      = (PredictionMode)cand->block_mi.mode;
485
    // If CFL alphas are not known yet, calculate the chroma mode bits based on DC Mode. If CFL is selected the chroma mode bits must be updated later
486
145k
    const UvPredictionMode chroma_mode = cand->block_mi.uv_mode == UV_CFL_PRED && !use_accurate_cfl
487
145k
        ? UV_DC_PRED
488
145k
        : cand->block_mi.uv_mode;
489
145k
    const uint32_t         mi_row      = ctx->blk_org_y >> MI_SIZE_LOG2;
490
145k
    const uint32_t         mi_col      = ctx->blk_org_x >> MI_SIZE_LOG2;
491
    // Subsampling assumes YUV 420 content
492
145k
    const uint8_t ss_x = 1;
493
145k
    const uint8_t ss_y = 1;
494
495
145k
    uint64_t chroma_rate = 0;
496
    // Estimate chroma nominal intra mode bits
497
145k
    chroma_rate += (uint64_t)md_rate_est_ctx->intra_uv_mode_fac_bits[is_cfl_allowed][intra_mode][chroma_mode];
498
499
    // Estimate chroma angular mode bits; angular offset only allow for bsize >= 8x8
500
145k
    if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(get_uv_mode(chroma_mode))) {
501
0
        chroma_rate +=
502
0
            md_rate_est_ctx->angle_delta_fac_bits[chroma_mode - V_PRED]
503
0
                                                 [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_UV]];
504
0
    }
505
506
    // Estimate CFL factor bits when CFL is used
507
145k
    if (chroma_mode == UV_CFL_PRED) {
508
0
        chroma_rate += (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_U]
509
0
                                                                    [CFL_IDX_U(cand->block_mi.cfl_alpha_idx)] +
510
0
            (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_V]
511
0
                                                         [CFL_IDX_V(cand->block_mi.cfl_alpha_idx)];
512
0
    }
513
514
    // Estimate chroma palette mode bits (currently not supported, so just cost of signalling off)
515
145k
    if (chroma_mode == UV_DC_PRED &&
516
145k
        svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) &&
517
0
        is_chroma_reference(mi_row, mi_col, blk_geom->bsize, ss_x, ss_y)) {
518
0
        const int use_palette_y  = cand->palette_info && (cand->palette_size[0] > 0);
519
0
        const int use_palette_uv = cand->palette_info && (cand->palette_size[1] > 0);
520
0
        chroma_rate += ctx->md_rate_est_ctx->palette_uv_mode_fac_bits[use_palette_y][use_palette_uv];
521
0
    }
522
523
145k
    return chroma_rate;
524
145k
}
525
526
uint64_t svt_aom_intra_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
527
146k
                                 uint64_t lambda, uint64_t luma_distortion) {
528
146k
    const BlockGeom*       blk_geom = ctx->blk_geom;
529
146k
    BlkStruct*             blk_ptr  = ctx->blk_ptr;
530
146k
    ModeDecisionCandidate* cand     = cand_bf->cand;
531
146k
    if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc) {
532
0
        uint64_t rate = 0;
533
534
0
        Mv         mv        = {.as_int = cand->block_mi.mv[0].as_int};
535
0
        Mv         ref_mv    = {.as_int = cand->pred_mv[0].as_int};
536
0
        const int* dvcost[2] = {(int*)&ctx->md_rate_est_ctx->dv_cost[0][MV_MAX],
537
0
                                (int*)&ctx->md_rate_est_ctx->dv_cost[1][MV_MAX]};
538
0
        int32_t    mv_rate   = svt_av1_mv_bit_cost(
539
0
            &mv, &ref_mv, ctx->md_rate_est_ctx->dv_joint_cost, dvcost, MV_COST_WEIGHT_SUB);
540
541
0
        rate                      = mv_rate + ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc];
542
0
        cand_bf->fast_luma_rate   = rate;
543
0
        cand_bf->fast_chroma_rate = 0;
544
0
        return (RDCOST(lambda, rate, luma_distortion));
545
146k
    } else {
546
        // Number of bits for each synatax element
547
146k
        uint64_t       intra_mode_bits_num          = 0;
548
146k
        uint64_t       intra_luma_mode_bits_num     = 0;
549
146k
        uint64_t       intra_luma_ang_mode_bits_num = 0;
550
146k
        uint64_t       intra_filter_mode_bits_num   = 0;
551
146k
        uint64_t       skip_mode_rate               = 0;
552
146k
        const uint8_t  skip_mode_ctx                = ctx->skip_mode_ctx;
553
146k
        PredictionMode intra_mode                   = (PredictionMode)cand->block_mi.mode;
554
        // Luma and chroma rate
555
146k
        uint32_t rate;
556
146k
        uint32_t luma_rate   = 0;
557
146k
        uint32_t chroma_rate = 0;
558
146k
        intra_mode_bits_num  = pcs->slice_type != I_SLICE
559
146k
             ? (uint64_t)ctx->md_rate_est_ctx->mb_mode_fac_bits[eb_size_group_lookup[blk_geom->bsize]][intra_mode]
560
146k
             : ZERO_COST;
561
562
146k
        skip_mode_rate = pcs->slice_type != I_SLICE && pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag &&
563
0
                is_comp_ref_allowed(blk_geom->bsize)
564
146k
            ? (uint64_t)ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0]
565
146k
            : ZERO_COST;
566
        // Estimate luma nominal intra mode bits for key frame
567
146k
        intra_luma_mode_bits_num = pcs->slice_type == I_SLICE
568
146k
            ? (uint64_t)
569
146k
                  ctx->md_rate_est_ctx->y_mode_fac_bits[ctx->intra_luma_top_ctx][ctx->intra_luma_left_ctx][intra_mode]
570
146k
            : ZERO_COST;
571
        // Estimate luma angular mode bits
572
146k
        if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(cand->block_mi.mode)) {
573
0
            assert((intra_mode - V_PRED) < 8);
574
0
            assert((intra_mode - V_PRED) >= 0);
575
0
            intra_luma_ang_mode_bits_num =
576
0
                ctx->md_rate_est_ctx->angle_delta_fac_bits[intra_mode - V_PRED]
577
0
                                                          [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_Y]];
578
0
        }
579
146k
        if (svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) &&
580
0
            intra_mode == DC_PRED) {
581
0
            const int use_palette = cand->palette_info ? (cand->palette_size[0] > 0) : 0;
582
0
            const int bsize_ctx   = svt_aom_get_palette_bsize_ctx(blk_geom->bsize);
583
0
            const int mode_ctx    = svt_aom_get_palette_mode_ctx(blk_ptr->av1xd);
584
0
            intra_luma_mode_bits_num += ctx->md_rate_est_ctx->palette_ymode_fac_bits[bsize_ctx][mode_ctx][use_palette];
585
0
            if (use_palette) {
586
0
                const uint8_t* const color_map = cand->palette_info->color_idx_map;
587
0
                int                  block_width, block_height, rows, cols;
588
0
                svt_aom_get_block_dimensions(
589
0
                    blk_geom->bsize, 0, blk_ptr->av1xd, &block_width, &block_height, &rows, &cols);
590
0
                const int plt_size = cand->palette_size[0];
591
0
                int       palette_mode_cost =
592
0
                    ctx->md_rate_est_ctx->palette_ysize_fac_bits[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
593
0
                    svt_aom_write_uniform_cost(plt_size, color_map[0]);
594
0
                uint16_t  color_cache[2 * PALETTE_MAX_SIZE];
595
0
                const int n_cache = svt_get_palette_cache_y(blk_ptr->av1xd, color_cache);
596
0
                palette_mode_cost += svt_av1_palette_color_cost_y(&cand->palette_info->pmi,
597
0
                                                                  color_cache,
598
0
                                                                  cand->palette_size[0],
599
0
                                                                  n_cache,
600
0
                                                                  pcs->ppcs->scs->encoder_bit_depth);
601
0
                palette_mode_cost += svt_av1_cost_color_map(
602
0
                    cand, ctx->md_rate_est_ctx, blk_ptr, 0, blk_geom->bsize, PALETTE_MAP);
603
0
                intra_luma_mode_bits_num += palette_mode_cost;
604
0
            }
605
0
        }
606
607
146k
        if (svt_aom_filter_intra_allowed(pcs->ppcs->scs->seq_header.filter_intra_level,
608
146k
                                         blk_geom->bsize,
609
146k
                                         cand->palette_info ? cand->palette_size[0] : 0,
610
146k
                                         intra_mode)) {
611
0
            intra_filter_mode_bits_num =
612
0
                ctx->md_rate_est_ctx
613
0
                    ->filter_intra_fac_bits[blk_geom->bsize][cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES];
614
0
            if (cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) {
615
0
                intra_filter_mode_bits_num +=
616
0
                    ctx->md_rate_est_ctx->filter_intra_mode_fac_bits[cand->block_mi.filter_intra_mode];
617
0
            }
618
0
        }
619
146k
        if (ctx->has_uv) {
620
            // CFL info not known in fasta loop, so assume DC mode when CFL is allowed
621
145k
            chroma_rate = (uint32_t)svt_aom_get_intra_uv_fast_rate(pcs, ctx, cand_bf, 0);
622
145k
        }
623
624
146k
        uint32_t is_inter_rate = pcs->slice_type != I_SLICE
625
146k
            ? ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][0]
626
146k
            : 0;
627
146k
        luma_rate              = (uint32_t)(intra_mode_bits_num + skip_mode_rate + intra_luma_mode_bits_num +
628
146k
                               intra_luma_ang_mode_bits_num + is_inter_rate + intra_filter_mode_bits_num);
629
146k
        if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type)) {
630
0
            svt_aom_assert_err(cand->block_mi.use_intrabc == 0, "this block ibc should be off\n");
631
0
            luma_rate += ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc];
632
0
        }
633
        // Keep the Fast Luma and Chroma rate for future use
634
146k
        cand_bf->fast_luma_rate   = luma_rate;
635
146k
        cand_bf->fast_chroma_rate = chroma_rate;
636
146k
        rate                      = luma_rate + chroma_rate;
637
        // Assign fast cost
638
146k
        return (RDCOST(lambda, rate, luma_distortion));
639
146k
    }
640
146k
}
641
642
// This function encodes the reference frame
643
uint64_t estimate_ref_frame_type_bits(ModeDecisionContext* ctx, BlkStruct* blk_ptr, uint8_t ref_frame_type,
644
0
                                      bool is_compound) {
645
0
    uint64_t ref_rate_bits = 0;
646
647
0
    MbModeInfo* const mbmi = blk_ptr->av1xd->mi[0];
648
0
    MvReferenceFrame  ref_type[2];
649
0
    av1_set_ref_frame(ref_type, ref_frame_type);
650
0
    mbmi->block_mi.ref_frame[0] = ref_type[0];
651
0
    mbmi->block_mi.ref_frame[1] = ref_type[1];
652
    //const int is_compound = svt_aom_has_second_ref(mbmi);
653
0
    {
654
0
        if (is_compound) {
655
0
            const CompReferenceType comp_ref_type = has_uni_comp_refs(&mbmi->block_mi) ? UNIDIR_COMP_REFERENCE
656
0
                                                                                       : BIDIR_COMP_REFERENCE;
657
658
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_type_fac_bits[svt_aom_get_comp_reference_type_context_new(
659
0
                blk_ptr->av1xd)][comp_ref_type];
660
            /*aom_write_symbol(w, comp_ref_type,
661
               svt_aom_get_comp_reference_type_cdf(blk_ptr->av1xd), 2);*/
662
663
0
            if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
664
                // SVT_LOG("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n");
665
0
                const int bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME;
666
667
0
                ref_rate_bits += ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p(
668
0
                    blk_ptr->av1xd)][0][bit];
669
                // blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][0];
670
                // WRITE_REF_BIT(bit, uni_comp_ref_p);
671
672
0
                if (!bit) {
673
0
                    assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
674
0
                    const int bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME ||
675
0
                        mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
676
0
                    ref_rate_bits +=
677
0
                        ctx->md_rate_est_ctx
678
0
                            ->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p1(blk_ptr->av1xd)][1][bit1];
679
                    // ref_rate_d = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][1];
680
                    // WRITE_REF_BIT(bit1, uni_comp_ref_p1);
681
0
                    if (bit1) {
682
0
                        const int bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
683
0
                        ref_rate_bits +=
684
0
                            ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p2(
685
0
                                blk_ptr->av1xd)][2][bit2];
686
687
                        // ref_rate_e = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][2];
688
                        //WRITE_REF_BIT(bit2, uni_comp_ref_p2);
689
0
                    }
690
0
                }
691
0
                return ref_rate_bits;
692
0
            }
693
694
0
            assert(comp_ref_type == BIDIR_COMP_REFERENCE);
695
696
0
            const int bit = (mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME || mbmi->block_mi.ref_frame[0] == LAST3_FRAME);
697
0
            const int pred_ctx = svt_av1_get_pred_context_comp_ref_p(blk_ptr->av1xd);
698
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_fac_bits[pred_ctx][0][bit];
699
            // ref_rate_f = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_ctx][0];
700
            // WRITE_REF_BIT(bit, comp_ref_p);
701
702
0
            if (!bit) {
703
0
                const int bit1 = mbmi->block_mi.ref_frame[0] == LAST2_FRAME;
704
0
                ref_rate_bits += ctx->md_rate_est_ctx
705
0
                                     ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p1(blk_ptr->av1xd)][1][bit1];
706
                // ref_rate_g = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][1];
707
                // WRITE_REF_BIT(bit1, comp_ref_p1);
708
0
            } else {
709
0
                const int bit2 = mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME;
710
0
                ref_rate_bits += ctx->md_rate_est_ctx
711
0
                                     ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p2(blk_ptr->av1xd)][2][bit2];
712
                // ref_rate_h = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][2];
713
                // WRITE_REF_BIT(bit2, comp_ref_p2);
714
0
            }
715
716
0
            const int bit_bwd    = mbmi->block_mi.ref_frame[1] == ALTREF_FRAME;
717
0
            const int pred_ctx_2 = svt_av1_get_pred_context_comp_bwdref_p(blk_ptr->av1xd);
718
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[pred_ctx_2][0][bit_bwd];
719
            // ref_rate_i = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_ctx_2][0];
720
            // WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
721
722
0
            if (!bit_bwd) {
723
0
                ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[svt_av1_get_pred_context_comp_bwdref_p1(
724
0
                    blk_ptr->av1xd)][1][ref_type[1] == ALTREF2_FRAME];
725
                // ref_rate_j = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_context][1];
726
                // WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
727
0
            }
728
0
        } else {
729
0
            const int bit0 = (mbmi->block_mi.ref_frame[0] <= ALTREF_FRAME &&
730
0
                              mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME);
731
0
            ref_rate_bits += ctx->md_rate_est_ctx
732
0
                                 ->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0][bit0];
733
            // ref_rate_k =
734
            // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0];
735
            // WRITE_REF_BIT(bit0, single_ref_p1);
736
737
0
            if (bit0) {
738
0
                const int bit1 = mbmi->block_mi.ref_frame[0] == ALTREF_FRAME;
739
0
                ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p2(
740
0
                    blk_ptr->av1xd)][1][bit1];
741
                // ref_rate_l =
742
                // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p2(blk_ptr->av1xd)][1];
743
                // WRITE_REF_BIT(bit1, single_ref_p2);
744
0
                if (!bit1) {
745
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p6(
746
0
                        blk_ptr->av1xd)][5][ref_frame_type == ALTREF2_FRAME];
747
                    // ref_rate_m =
748
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p6(blk_ptr->av1xd)][5];
749
                    // WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
750
0
                }
751
0
            } else {
752
0
                const int bit2 = (mbmi->block_mi.ref_frame[0] == LAST3_FRAME ||
753
0
                                  mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME);
754
0
                ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p3(
755
0
                    blk_ptr->av1xd)][2][bit2];
756
                // ref_rate_n =
757
                // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p3(blk_ptr->av1xd)][2];
758
                // WRITE_REF_BIT(bit2, single_ref_p3);
759
0
                if (!bit2) {
760
0
                    const int bit3 = mbmi->block_mi.ref_frame[0] != LAST_FRAME;
761
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p4(
762
0
                        blk_ptr->av1xd)][3][bit3];
763
                    // ref_rate_o =
764
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p4(blk_ptr->av1xd)][3];
765
                    // WRITE_REF_BIT(bit3, single_ref_p4);
766
0
                } else {
767
0
                    const int bit4 = mbmi->block_mi.ref_frame[0] != LAST3_FRAME;
768
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p5(
769
0
                        blk_ptr->av1xd)][4][bit4];
770
                    // ref_rate_p =
771
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p5(blk_ptr->av1xd)][4];
772
                    // WRITE_REF_BIT(bit4, single_ref_p5);
773
0
                }
774
0
            }
775
0
        }
776
0
    }
777
0
    return ref_rate_bits;
778
0
}
779
780
int svt_aom_get_comp_group_idx_context_enc(const MacroBlockD* xd);
781
int is_any_masked_compound_used(BlockSize bsize);
782
783
static INLINE uint32_t get_compound_mode_rate(PictureControlSet* pcs, ModeDecisionContext* ctx,
784
0
                                              ModeDecisionCandidate* cand, BlockSize bsize) {
785
0
    BlkStruct*          blk_ptr   = ctx->blk_ptr;
786
0
    SequenceControlSet* scs       = pcs->ppcs->scs;
787
0
    uint32_t            comp_rate = 0;
788
0
    MbModeInfo* const   mbmi      = blk_ptr->av1xd->mi[0];
789
0
    MvReferenceFrame    rf[2]     = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
790
0
    mbmi->block_mi.ref_frame[0]   = rf[0];
791
0
    mbmi->block_mi.ref_frame[1]   = rf[1];
792
793
    //NOTE  :  Make sure, any cuPtr data is already set before   usage
794
795
0
    if (has_second_ref(&mbmi->block_mi)) {
796
0
        const int masked_compound_used = is_any_masked_compound_used(bsize) && scs->seq_header.enable_masked_compound;
797
798
0
        if (masked_compound_used) {
799
0
            const int ctx_comp_group_idx = svt_aom_get_comp_group_idx_context_enc(blk_ptr->av1xd);
800
0
            comp_rate =
801
0
                ctx->md_rate_est_ctx->comp_group_idx_fac_bits[ctx_comp_group_idx][cand->block_mi.comp_group_idx];
802
0
        } else {
803
0
            assert(cand->block_mi.comp_group_idx == 0);
804
0
        }
805
806
0
        if (cand->block_mi.comp_group_idx == 0) {
807
0
            if (cand->block_mi.compound_idx) {
808
0
                assert(cand->block_mi.interinter_comp.type == COMPOUND_AVERAGE);
809
0
            }
810
811
0
            if (scs->seq_header.order_hint_info.enable_jnt_comp) {
812
0
                const int comp_index_ctx = svt_aom_get_comp_index_context_enc(pcs->ppcs,
813
0
                                                                              pcs->ppcs->cur_order_hint,
814
0
                                                                              pcs->ppcs->ref_order_hint[rf[0] - 1],
815
0
                                                                              pcs->ppcs->ref_order_hint[rf[1] - 1],
816
0
                                                                              blk_ptr->av1xd);
817
0
                comp_rate += ctx->md_rate_est_ctx->comp_idx_fac_bits[comp_index_ctx][cand->block_mi.compound_idx];
818
0
            } else {
819
0
                assert(cand->block_mi.compound_idx == 1);
820
0
            }
821
0
        } else {
822
0
            assert(pcs->ppcs->frm_hdr.reference_mode != SINGLE_REFERENCE &&
823
0
                   is_inter_compound_mode(cand->block_mi.mode));
824
0
            assert(masked_compound_used);
825
            // compound_diffwtd, wedge
826
0
            assert(cand->block_mi.interinter_comp.type == COMPOUND_WEDGE ||
827
0
                   cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD);
828
829
0
            if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
830
0
                comp_rate += ctx->md_rate_est_ctx
831
0
                                 ->compound_type_fac_bits[bsize][cand->block_mi.interinter_comp.type - COMPOUND_WEDGE];
832
0
            }
833
834
0
            if (cand->block_mi.interinter_comp.type == COMPOUND_WEDGE) {
835
0
                assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
836
0
                comp_rate +=
837
0
                    ctx->md_rate_est_ctx->wedge_idx_fac_bits[bsize][cand->block_mi.interinter_comp.wedge_index];
838
0
                comp_rate += av1_cost_literal(1);
839
0
            } else {
840
0
                assert(cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD);
841
0
                comp_rate += av1_cost_literal(1);
842
0
            }
843
0
        }
844
0
    }
845
846
0
    return comp_rate;
847
0
}
848
849
int32_t svt_aom_get_switchable_rate(BlockModeInfo* block_mi, const FrameHeader* const frm_hdr, ModeDecisionContext* ctx,
850
0
                                    const bool enable_dual_filter) {
851
0
    if (frm_hdr->interpolation_filter != SWITCHABLE) {
852
0
        return 0;
853
0
    }
854
855
0
    int32_t   inter_filter_cost = 0;
856
0
    const int max_dir           = enable_dual_filter ? 2 : 1;
857
0
    for (int dir = 0; dir < max_dir; ++dir) {
858
0
        const int32_t pred_ctx = svt_aom_get_pred_context_switchable_interp(
859
0
            block_mi->ref_frame[0], block_mi->ref_frame[1], ctx->blk_ptr->av1xd, dir);
860
0
        const InterpFilter filter = av1_extract_interp_filter(block_mi->interp_filters, dir);
861
0
        assert(pred_ctx < SWITCHABLE_FILTER_CONTEXTS);
862
0
        assert(filter < SWITCHABLE_FILTERS);
863
0
        inter_filter_cost += ctx->md_rate_est_ctx->switchable_interp_fac_bitss[pred_ctx][filter];
864
0
    }
865
0
    return inter_filter_cost;
866
0
}
867
868
int svt_aom_is_interintra_wedge_used(BlockSize bsize);
869
870
static uint64_t av1_inter_fast_cost_light(ModeDecisionContext* ctx, BlkStruct* blk_ptr,
871
                                          ModeDecisionCandidateBuffer* cand_bf, uint64_t luma_distortion,
872
0
                                          uint64_t lambda, PictureControlSet* pcs, CandidateMv* ref_mv_stack) {
873
0
    ModeDecisionCandidate* cand = cand_bf->cand;
874
    // NM - fast inter cost estimation
875
0
    MdRateEstimationContext* r = ctx->md_rate_est_ctx;
876
    //_mm_prefetch(p, _MM_HINT_T2);
877
    // Luma rate
878
0
    uint32_t             luma_rate           = 0;
879
0
    uint64_t             mv_rate             = 0;
880
0
    const PredictionMode inter_mode          = (PredictionMode)cand->block_mi.mode;
881
0
    const uint8_t        have_nearmv         = have_nearmv_in_inter_mode(inter_mode);
882
0
    uint64_t             inter_mode_bits_num = 0;
883
0
    const uint8_t        skip_mode_ctx       = ctx->skip_mode_ctx;
884
0
    MvReferenceFrame     rf[2]               = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
885
0
    const int8_t         ref_frame_type      = av1_ref_frame_type(rf);
886
0
    const uint8_t        is_compound         = is_inter_compound_mode(cand->block_mi.mode);
887
0
    const uint32_t       mode_context        = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf);
888
0
    uint64_t             reference_picture_bits_num = 0;
889
0
    if (ctx->approx_inter_rate < 2) {
890
0
        reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type];
891
0
    }
892
0
    if (is_compound) {
893
0
        assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES);
894
0
        inter_mode_bits_num += r->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)];
895
0
    } else {
896
0
        int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
897
        //aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2);
898
0
        inter_mode_bits_num += r->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
899
0
        if (inter_mode != NEWMV) {
900
0
            const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
901
            //aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx], 2);
902
0
            inter_mode_bits_num += r->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV];
903
0
            if (inter_mode != GLOBALMV) {
904
0
                int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
905
                /*aom_write_symbol(ec_writer, mode != NEARESTMV, frame_context->refmv_cdf[refmv_ctx], 2);*/
906
0
                inter_mode_bits_num += r->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV];
907
0
            }
908
0
        }
909
0
    }
910
0
    if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv) {
911
        //drLIdex cost estimation
912
0
        const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
913
0
        if (new_mv) {
914
0
            int32_t idx;
915
0
            for (idx = 0; idx < 2; ++idx) {
916
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
917
0
                    uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx);
918
0
                    inter_mode_bits_num += r->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx];
919
0
                    if (cand->drl_index == idx) {
920
0
                        break;
921
0
                    }
922
0
                }
923
0
            }
924
0
        }
925
0
        if (have_nearmv) {
926
0
            int32_t idx;
927
0
            for (idx = 1; idx < 3; ++idx) {
928
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
929
0
                    uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx);
930
0
                    inter_mode_bits_num += r->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)];
931
0
                    if (cand->drl_index == (idx - 1)) {
932
0
                        break;
933
0
                    }
934
0
                }
935
0
            }
936
0
        }
937
0
    }
938
0
    if (svt_aom_have_newmv_in_inter_mode(inter_mode)) {
939
0
        const uint16_t factor = pcs->ppcs->frm_hdr.allow_screen_content_tools ? 20 : 50;
940
0
        if (is_compound) {
941
0
            mv_rate = 0;
942
0
            if (inter_mode == NEW_NEWMV) {
943
0
                for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) {
944
0
                    Mv             mv         = cand->block_mi.mv[ref_list_idx];
945
0
                    Mv             ref_mv     = cand->pred_mv[ref_list_idx];
946
0
                    const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
947
0
                    const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
948
0
                    mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
949
0
                }
950
0
            } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
951
                // New MV is second ref
952
0
                Mv             mv         = cand->block_mi.mv[1];
953
0
                Mv             ref_mv     = cand->pred_mv[1];
954
0
                const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
955
0
                const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
956
0
                mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
957
0
            } else {
958
0
                assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
959
                // New MV is first ref
960
0
                Mv             mv         = cand->block_mi.mv[0];
961
0
                Mv             ref_mv     = cand->pred_mv[0];
962
0
                const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
963
0
                const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
964
0
                mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
965
0
            }
966
0
        } else {
967
0
            assert(!is_compound); // single ref inter prediction
968
            // unipred MV stored in idx0
969
0
            Mv             mv         = cand->block_mi.mv[0];
970
0
            Mv             ref_mv     = cand->pred_mv[0];
971
0
            const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
972
0
            const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
973
0
            mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
974
0
        }
975
0
    }
976
    // Get the interpolation filter rate if IFS is performed at MDS0.  Otherwise, the filter is unknown, so the rate will be updated after IFS is performed.
977
0
    uint32_t ifs_rate = 0;
978
0
    if (ctx->ifs_ctrls.level == IFS_MDS0 &&
979
0
        av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) &&
980
0
        pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE) {
981
0
        ifs_rate = svt_aom_get_switchable_rate(
982
0
            &cand_bf->cand->block_mi, &pcs->ppcs->frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter);
983
0
    }
984
0
    uint32_t is_inter_rate = r->intra_inter_fac_bits[ctx->is_inter_ctx][1];
985
986
0
    uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag &&
987
0
            is_comp_ref_allowed(ctx->blk_geom->bsize)
988
0
        ? r->skip_mode_fac_bits[skip_mode_ctx][0]
989
0
        : 0;
990
0
    luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate +
991
0
                           ifs_rate);
992
    // Keep the Fast Luma and Chroma rate for future use
993
0
    cand_bf->fast_luma_rate   = luma_rate;
994
0
    cand_bf->fast_chroma_rate = 0;
995
    // Assign fast cost
996
0
    if (cand->skip_mode_allowed) {
997
0
        skip_mode_rate = r->skip_mode_fac_bits[skip_mode_ctx][1];
998
0
        if (skip_mode_rate < luma_rate) {
999
0
            return (RDCOST(lambda, skip_mode_rate, luma_distortion));
1000
0
        }
1001
0
    }
1002
0
    return (RDCOST(lambda, luma_rate, luma_distortion));
1003
0
}
1004
1005
uint64_t svt_aom_inter_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1006
0
                                 uint64_t lambda, uint64_t luma_distortion) {
1007
0
    const BlockGeom*       blk_geom       = ctx->blk_geom;
1008
0
    BlkStruct*             blk_ptr        = ctx->blk_ptr;
1009
0
    ModeDecisionCandidate* cand           = cand_bf->cand;
1010
0
    MvReferenceFrame       rf[2]          = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
1011
0
    const int8_t           ref_frame_type = av1_ref_frame_type(cand->block_mi.ref_frame);
1012
0
    CandidateMv*           ref_mv_stack   = &(ctx->ref_mv_stack[ref_frame_type][0]);
1013
1014
0
    if (ctx->approx_inter_rate) {
1015
0
        return av1_inter_fast_cost_light(ctx, blk_ptr, cand_bf, luma_distortion, lambda, pcs, ref_mv_stack);
1016
0
    }
1017
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
1018
1019
    // Luma rate
1020
0
    uint32_t       luma_rate  = 0;
1021
0
    uint64_t       mv_rate    = 0;
1022
0
    PredictionMode inter_mode = (PredictionMode)cand->block_mi.mode;
1023
1024
0
    uint64_t inter_mode_bits_num = 0;
1025
1026
0
    const uint8_t skip_mode_ctx              = ctx->skip_mode_ctx;
1027
0
    const uint8_t is_compound                = is_inter_compound_mode(cand->block_mi.mode);
1028
0
    uint32_t      mode_context               = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf);
1029
0
    uint64_t      reference_picture_bits_num = 0;
1030
1031
    //Reference Type and Mode Bit estimation
1032
0
    reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type];
1033
0
    if (is_compound) {
1034
0
        assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES);
1035
0
        inter_mode_bits_num +=
1036
0
            ctx->md_rate_est_ctx->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)];
1037
0
    } else {
1038
        // uint32_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
1039
        // inter_mode_bits_num = cand_bf->cand->md_rate_est_ctx->new_mv_mode_fac_bits[mode_ctx][0];
1040
1041
0
        int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
1042
        // aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2);
1043
0
        inter_mode_bits_num += ctx->md_rate_est_ctx->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
1044
0
        if (inter_mode != NEWMV) {
1045
0
            const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
1046
            // aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx],
1047
            // 2);
1048
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV];
1049
0
            if (inter_mode != GLOBALMV) {
1050
0
                int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
1051
                /*aom_write_symbol(ec_writer, mode != NEARESTMV,
1052
                 * frame_context->refmv_cdf[refmv_ctx], 2);*/
1053
0
                inter_mode_bits_num += ctx->md_rate_est_ctx->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV];
1054
0
            }
1055
0
        }
1056
0
    }
1057
0
    if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) {
1058
        //drLIdex cost estimation
1059
0
        const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
1060
0
        if (new_mv) {
1061
0
            int32_t idx;
1062
0
            for (idx = 0; idx < 2; ++idx) {
1063
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
1064
0
                    uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx);
1065
0
                    inter_mode_bits_num += ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx];
1066
0
                    if (cand->drl_index == idx) {
1067
0
                        break;
1068
0
                    }
1069
0
                }
1070
0
            }
1071
0
        }
1072
1073
0
        if (have_nearmv_in_inter_mode(inter_mode)) {
1074
0
            int32_t idx;
1075
0
            for (idx = 1; idx < 3; ++idx) {
1076
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
1077
0
                    uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx);
1078
0
                    inter_mode_bits_num +=
1079
0
                        ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)];
1080
1081
0
                    if (cand->drl_index == (idx - 1)) {
1082
0
                        break;
1083
0
                    }
1084
0
                }
1085
0
            }
1086
0
        }
1087
0
    }
1088
1089
0
    if (svt_aom_have_newmv_in_inter_mode(inter_mode)) {
1090
0
        if (is_compound) {
1091
0
            mv_rate = 0;
1092
1093
0
            if (inter_mode == NEW_NEWMV) {
1094
0
                for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) {
1095
0
                    Mv mv     = cand->block_mi.mv[ref_list_idx];
1096
0
                    Mv ref_mv = cand->pred_mv[ref_list_idx];
1097
0
                    mv_rate += svt_av1_mv_bit_cost(&mv,
1098
0
                                                   &ref_mv,
1099
0
                                                   ctx->md_rate_est_ctx->nmv_vec_cost,
1100
0
                                                   ctx->md_rate_est_ctx->nmvcoststack,
1101
0
                                                   MV_COST_WEIGHT);
1102
0
                }
1103
0
            } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
1104
0
                Mv mv     = cand->block_mi.mv[1];
1105
0
                Mv ref_mv = cand->pred_mv[1];
1106
0
                mv_rate += svt_av1_mv_bit_cost(&mv,
1107
0
                                               &ref_mv,
1108
0
                                               ctx->md_rate_est_ctx->nmv_vec_cost,
1109
0
                                               ctx->md_rate_est_ctx->nmvcoststack,
1110
0
                                               MV_COST_WEIGHT);
1111
0
            } else {
1112
0
                assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
1113
0
                Mv mv     = cand->block_mi.mv[0];
1114
0
                Mv ref_mv = cand->pred_mv[0];
1115
0
                mv_rate += svt_av1_mv_bit_cost(&mv,
1116
0
                                               &ref_mv,
1117
0
                                               ctx->md_rate_est_ctx->nmv_vec_cost,
1118
0
                                               ctx->md_rate_est_ctx->nmvcoststack,
1119
0
                                               MV_COST_WEIGHT);
1120
0
            }
1121
0
        } else {
1122
0
            assert(!is_compound); // single ref inter prediction
1123
            // unipred MVs stored in idx0
1124
0
            Mv mv     = cand->block_mi.mv[0];
1125
0
            Mv ref_mv = cand->pred_mv[0];
1126
0
            mv_rate   = svt_av1_mv_bit_cost(
1127
0
                &mv, &ref_mv, ctx->md_rate_est_ctx->nmv_vec_cost, ctx->md_rate_est_ctx->nmvcoststack, MV_COST_WEIGHT);
1128
0
        }
1129
0
    }
1130
    // inter intra mode rate
1131
0
    if (pcs->ppcs->scs->seq_header.enable_interintra_compound &&
1132
        /* Check if inter-intra is allowed for current block size / mode (even if the feature is off
1133
        * for the current block, we still need to signal inter-intra off.
1134
        */
1135
0
        svt_is_interintra_allowed(true, blk_geom->bsize, cand->block_mi.mode, rf)) {
1136
0
        const int interintra  = cand->block_mi.is_interintra_used;
1137
0
        const int bsize_group = eb_size_group_lookup[blk_geom->bsize];
1138
1139
0
        inter_mode_bits_num +=
1140
0
            ctx->md_rate_est_ctx->inter_intra_fac_bits[bsize_group][cand->block_mi.is_interintra_used];
1141
1142
0
        if (interintra) {
1143
0
            inter_mode_bits_num +=
1144
0
                ctx->md_rate_est_ctx->inter_intra_mode_fac_bits[bsize_group][cand->block_mi.interintra_mode];
1145
1146
0
            if (svt_aom_is_interintra_wedge_used(blk_geom->bsize)) {
1147
0
                inter_mode_bits_num +=
1148
0
                    ctx->md_rate_est_ctx
1149
0
                        ->wedge_inter_intra_fac_bits[blk_geom->bsize][cand->block_mi.use_wedge_interintra];
1150
1151
0
                if (cand->block_mi.use_wedge_interintra) {
1152
0
                    inter_mode_bits_num +=
1153
0
                        ctx->md_rate_est_ctx
1154
0
                            ->wedge_idx_fac_bits[blk_geom->bsize][cand->block_mi.interintra_wedge_index];
1155
0
                }
1156
0
            }
1157
0
        }
1158
0
    }
1159
0
    if (is_inter_singleref_mode(inter_mode) && frm_hdr->is_motion_mode_switchable && rf[1] != INTRA_FRAME) {
1160
0
        assert(!cand->block_mi.is_interintra_used);
1161
0
        const MotionMode motion_mode_rd           = cand->block_mi.motion_mode;
1162
0
        const BlockSize  bsize                    = blk_geom->bsize;
1163
0
        const MotionMode last_motion_mode_allowed = svt_aom_motion_mode_allowed(
1164
0
            pcs, cand->block_mi.num_proj_ref, blk_ptr->overlappable_neighbors, bsize, rf[0], rf[1], inter_mode);
1165
0
        switch (last_motion_mode_allowed) {
1166
0
        case SIMPLE_TRANSLATION:
1167
0
            break;
1168
0
        case OBMC_CAUSAL:
1169
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits1[bsize][motion_mode_rd == OBMC_CAUSAL];
1170
0
            break;
1171
0
        default:
1172
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits[bsize][motion_mode_rd];
1173
0
        }
1174
0
    }
1175
    // this func return 0 if masked=0 and distance=0
1176
0
    inter_mode_bits_num += get_compound_mode_rate(pcs, ctx, cand, blk_geom->bsize);
1177
    // Get the interpolation filter rate if IFS is performed at MDS0.  Otherwise, the filter is unknown, so the rate will be updated after IFS is performed.
1178
0
    uint32_t ifs_rate = 0;
1179
0
    if (ctx->ifs_ctrls.level == IFS_MDS0 &&
1180
0
        av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) &&
1181
0
        frm_hdr->interpolation_filter == SWITCHABLE) {
1182
0
        ifs_rate = svt_aom_get_switchable_rate(
1183
0
            &cand_bf->cand->block_mi, frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter);
1184
0
    }
1185
0
    uint32_t is_inter_rate  = ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][1];
1186
0
    uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && is_comp_ref_allowed(blk_geom->bsize)
1187
0
        ? ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0]
1188
0
        : 0;
1189
0
    luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate +
1190
0
                           ifs_rate);
1191
    // Keep the Fast Luma and Chroma rate for future use
1192
0
    cand_bf->fast_luma_rate   = luma_rate;
1193
0
    cand_bf->fast_chroma_rate = 0;
1194
    // Assign fast cost
1195
0
    if (cand->skip_mode_allowed) {
1196
0
        skip_mode_rate = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1];
1197
0
        if (skip_mode_rate < luma_rate) {
1198
0
            return (RDCOST(lambda, skip_mode_rate, luma_distortion));
1199
0
        }
1200
0
    }
1201
0
    return (RDCOST(lambda, luma_rate, luma_distortion));
1202
0
}
1203
1204
/*
1205
 */
1206
EbErrorType svt_aom_txb_estimate_coeff_bits_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1207
                                                      uint32_t txb_origin_index, EbPictureBufferDesc* coeff_buffer_sb,
1208
0
                                                      uint32_t y_eob, uint64_t* y_txb_coeff_bits, TxSize txsize) {
1209
0
    if (y_eob) {
1210
0
        *y_txb_coeff_bits = svt_av1_cost_coeffs_txb(
1211
0
            ctx,
1212
0
            0,
1213
0
            0,
1214
0
            cand_bf,
1215
0
            (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)],
1216
0
            (uint16_t)y_eob,
1217
0
            PLANE_TYPE_Y,
1218
0
            txsize,
1219
0
            DCT_DCT,
1220
0
            0,
1221
0
            0,
1222
0
            0);
1223
1224
0
        *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step;
1225
1226
0
    } else {
1227
0
        *y_txb_coeff_bits = av1_cost_skip_txb(ctx, 0, 0, txsize, PLANE_TYPE_Y, 0);
1228
0
    }
1229
1230
0
    return EB_ErrorNone;
1231
0
}
1232
1233
EbErrorType svt_aom_txb_estimate_coeff_bits(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
1234
                                            PictureControlSet* pcs, ModeDecisionCandidateBuffer* cand_bf,
1235
                                            uint32_t txb_origin_index, uint32_t txb_chroma_origin_index,
1236
                                            EbPictureBufferDesc* coeff_buffer_sb, uint32_t y_eob, uint32_t cb_eob,
1237
                                            uint32_t cr_eob, uint64_t* y_txb_coeff_bits, uint64_t* cb_txb_coeff_bits,
1238
                                            uint64_t* cr_txb_coeff_bits, TxSize txsize, TxSize txsize_uv,
1239
146k
                                            TxType tx_type, TxType tx_type_uv, COMPONENT_TYPE component_type) {
1240
146k
    EbErrorType return_error = EB_ErrorNone;
1241
1242
146k
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
1243
1244
146k
    int32_t* coeff_buffer;
1245
146k
    int16_t  luma_txb_skip_context = ctx->luma_txb_skip_context;
1246
146k
    int16_t  luma_dc_sign_context  = ctx->luma_dc_sign_context;
1247
146k
    int16_t  cb_txb_skip_context   = ctx->cb_txb_skip_context;
1248
146k
    int16_t  cb_dc_sign_context    = ctx->cb_dc_sign_context;
1249
146k
    int16_t  cr_txb_skip_context   = ctx->cr_txb_skip_context;
1250
146k
    int16_t  cr_dc_sign_context    = ctx->cr_dc_sign_context;
1251
1252
146k
    bool reduced_transform_set_flag = frm_hdr->reduced_tx_set ? true : false;
1253
1254
    //Estimate the rate of the transform type and coefficient for Luma
1255
1256
146k
    if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
1257
0
        if (y_eob) {
1258
0
            coeff_buffer = (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)];
1259
1260
0
            *y_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1261
0
                                                        allow_update_cdf,
1262
0
                                                        ec_ctx,
1263
0
                                                        cand_bf,
1264
0
                                                        coeff_buffer,
1265
0
                                                        (uint16_t)y_eob,
1266
0
                                                        PLANE_TYPE_Y,
1267
0
                                                        txsize,
1268
0
                                                        tx_type,
1269
0
                                                        luma_txb_skip_context,
1270
0
                                                        luma_dc_sign_context,
1271
0
                                                        reduced_transform_set_flag);
1272
0
            *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step;
1273
0
        } else {
1274
0
            *y_txb_coeff_bits = av1_cost_skip_txb(
1275
0
                ctx, allow_update_cdf, ec_ctx, txsize, PLANE_TYPE_Y, luma_txb_skip_context);
1276
0
        }
1277
0
    }
1278
    // Estimate the rate of the transform type and coefficient for chroma Cb
1279
1280
146k
    if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA ||
1281
146k
        component_type == COMPONENT_ALL) {
1282
146k
        if (cb_eob) {
1283
6.22k
            coeff_buffer = (int32_t*)&coeff_buffer_sb->u_buffer[txb_chroma_origin_index * sizeof(int32_t)];
1284
1285
6.22k
            *cb_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1286
6.22k
                                                         allow_update_cdf,
1287
6.22k
                                                         ec_ctx,
1288
6.22k
                                                         cand_bf,
1289
6.22k
                                                         coeff_buffer,
1290
6.22k
                                                         (uint16_t)cb_eob,
1291
6.22k
                                                         PLANE_TYPE_UV,
1292
6.22k
                                                         txsize_uv,
1293
6.22k
                                                         tx_type_uv,
1294
6.22k
                                                         cb_txb_skip_context,
1295
6.22k
                                                         cb_dc_sign_context,
1296
6.22k
                                                         reduced_transform_set_flag);
1297
139k
        } else {
1298
139k
            *cb_txb_coeff_bits = av1_cost_skip_txb(
1299
139k
                ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cb_txb_skip_context);
1300
139k
        }
1301
146k
    }
1302
1303
146k
    if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA ||
1304
145k
        component_type == COMPONENT_ALL) {
1305
        //Estimate the rate of the transform type and coefficient for chroma Cr
1306
145k
        if (cr_eob) {
1307
6.22k
            coeff_buffer = (int32_t*)&coeff_buffer_sb->v_buffer[txb_chroma_origin_index * sizeof(int32_t)];
1308
1309
6.22k
            *cr_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1310
6.22k
                                                         allow_update_cdf,
1311
6.22k
                                                         ec_ctx,
1312
6.22k
                                                         cand_bf,
1313
6.22k
                                                         coeff_buffer,
1314
6.22k
                                                         (uint16_t)cr_eob,
1315
6.22k
                                                         PLANE_TYPE_UV,
1316
6.22k
                                                         txsize_uv,
1317
6.22k
                                                         tx_type_uv,
1318
6.22k
                                                         cr_txb_skip_context,
1319
6.22k
                                                         cr_dc_sign_context,
1320
6.22k
                                                         reduced_transform_set_flag);
1321
139k
        } else {
1322
139k
            *cr_txb_coeff_bits = av1_cost_skip_txb(
1323
139k
                ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cr_txb_skip_context);
1324
139k
        }
1325
145k
    }
1326
1327
146k
    return return_error;
1328
146k
}
1329
1330
EbErrorType svt_aom_full_cost_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1331
8.21k
                                        uint64_t* y_distortion, uint64_t lambda, uint64_t* y_coeff_bits) {
1332
8.21k
    EbErrorType return_error = EB_ErrorNone;
1333
1334
8.21k
    uint64_t coeff_rate = (*y_coeff_bits + (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[0][0]);
1335
1336
    // Assign full cost
1337
    // Use context index 0 for the partition rate as an approximation to skip call to
1338
    // av1_partition_rate_cost Partition cost is only needed for > 4x4 blocks, but light-PD0 assumes
1339
    // 4x4 blocks are disallowed
1340
8.21k
    *(cand_bf->full_cost) = RDCOST(
1341
8.21k
        lambda, coeff_rate + ctx->md_rate_est_ctx->partition_fac_bits[0][PARTITION_NONE], y_distortion[0]);
1342
8.21k
    return return_error;
1343
8.21k
}
1344
1345
/*********************************************************************************
1346
 * svt_aom_av1_full_cost function is used to estimate the cost of a candidate mode
1347
 * for full mode decision module.
1348
 **********************************************************************************/
1349
void svt_aom_full_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1350
                       uint64_t lambda, uint64_t y_distortion[DIST_TOTAL][DIST_CALC_TOTAL],
1351
                       uint64_t cb_distortion[DIST_TOTAL][DIST_CALC_TOTAL],
1352
                       uint64_t cr_distortion[DIST_TOTAL][DIST_CALC_TOTAL], uint64_t* y_coeff_bits,
1353
282k
                       uint64_t* cb_coeff_bits, uint64_t* cr_coeff_bits) {
1354
282k
    const uint8_t skip_coeff_ctx        = ctx->skip_coeff_ctx;
1355
282k
    const bool    update_full_cost_ssim = ctx->tune_ssim_level > SSIM_LVL_0 ? true : false;
1356
1357
    // Get the TX size rate for skip and non-skip block. Need both to make non-skip decision
1358
282k
    uint64_t non_skip_tx_size_bits = 0, skip_tx_size_bits = 0;
1359
282k
    if (!ctx->shut_fast_rate && pcs->ppcs->frm_hdr.tx_mode == TX_MODE_SELECT) {
1360
145k
        if (cand_bf->block_has_coeff) {
1361
6.65k
            non_skip_tx_size_bits = svt_aom_get_tx_size_bits(
1362
6.65k
                cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 1);
1363
6.65k
        }
1364
1365
145k
        skip_tx_size_bits = svt_aom_get_tx_size_bits(
1366
145k
            cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 0);
1367
145k
    }
1368
1369
282k
    assert(IMPLIES(is_inter_mode(cand_bf->cand->block_mi.mode), skip_tx_size_bits == 0));
1370
1371
    // Decide if block should be signalled as skip (send no coeffs)
1372
282k
    if (!svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && ctx->blk_skip_decision &&
1373
8.21k
        cand_bf->block_has_coeff && is_inter_mode(cand_bf->cand->block_mi.mode)) {
1374
0
        const uint64_t non_skip_cost = RDCOST(
1375
0
            lambda,
1376
0
            (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits +
1377
0
             (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]),
1378
0
            (y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0]));
1379
1380
0
        const uint64_t skip_cost = RDCOST(
1381
0
            lambda,
1382
0
            ((uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1]) + skip_tx_size_bits,
1383
0
            (y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] + cr_distortion[DIST_SSD][1]));
1384
1385
        // Update signals to correspond to skip_mode values (no coeffs, etc.)
1386
0
        if (skip_cost < non_skip_cost) {
1387
0
            y_distortion[DIST_SSD][0]  = y_distortion[DIST_SSD][1];
1388
0
            cb_distortion[DIST_SSD][0] = cb_distortion[DIST_SSD][1];
1389
0
            cr_distortion[DIST_SSD][0] = cr_distortion[DIST_SSD][1];
1390
1391
0
            y_distortion[DIST_SSIM][0]  = y_distortion[DIST_SSIM][1];
1392
0
            cb_distortion[DIST_SSIM][0] = cb_distortion[DIST_SSIM][1];
1393
0
            cr_distortion[DIST_SSIM][0] = cr_distortion[DIST_SSIM][1];
1394
0
            cand_bf->block_has_coeff    = 0;
1395
0
            cand_bf->y_has_coeff        = 0;
1396
0
            cand_bf->u_has_coeff        = 0;
1397
0
            cand_bf->v_has_coeff        = 0;
1398
0
            cand_bf->cnt_nz_coeff       = 0;
1399
1400
            // For inter modes, signalling skip means no TX depth is used and the TX type will be DCT_DCT
1401
0
            cand_bf->cand->block_mi.tx_depth = 0;
1402
0
            cand_bf->cand->transform_type_uv = DCT_DCT;
1403
0
            memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0]));
1404
0
            memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData));
1405
0
            memset(&cand_bf->eob, 0, sizeof(EobData));
1406
0
        }
1407
0
    }
1408
1409
282k
    uint64_t coeff_rate = 0;
1410
282k
    if (cand_bf->block_has_coeff) {
1411
9.04k
        coeff_rate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits +
1412
9.04k
                      (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]);
1413
273k
    } else {
1414
273k
        coeff_rate = ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1] + skip_tx_size_bits;
1415
273k
    }
1416
1417
282k
    uint64_t mode_rate            = cand_bf->fast_luma_rate + cand_bf->fast_chroma_rate + coeff_rate;
1418
282k
    uint64_t mode_distortion      = y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0];
1419
282k
    uint64_t mode_ssim_distortion = update_full_cost_ssim
1420
282k
        ? y_distortion[DIST_SSIM][0] + cb_distortion[DIST_SSIM][0] + cr_distortion[DIST_SSIM][0]
1421
282k
        : 0;
1422
282k
    uint64_t mode_cost            = RDCOST(lambda, mode_rate, mode_distortion);
1423
1424
    // If skip_mode is allowed for this candidate, check cost of skip mode compared to regular cost
1425
282k
    if (cand_bf->cand->skip_mode_allowed == true) {
1426
0
        const uint8_t skip_mode_ctx = ctx->skip_mode_ctx;
1427
1428
        // Skip mode cost
1429
0
        const uint64_t skip_mode_rate       = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1];
1430
0
        const uint64_t skip_mode_distortion = y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] +
1431
0
            cr_distortion[DIST_SSD][1];
1432
0
        const uint64_t skip_mode_ssim_distortion = update_full_cost_ssim
1433
0
            ? y_distortion[DIST_SSIM][1] + cb_distortion[DIST_SSIM][1] + cr_distortion[DIST_SSIM][1]
1434
0
            : 0;
1435
0
        const uint64_t skip_mode_cost            = RDCOST(lambda, skip_mode_rate, skip_mode_distortion);
1436
1437
0
        cand_bf->cand->block_mi.skip_mode = false;
1438
0
        if (skip_mode_cost <= mode_cost) {
1439
            // Update candidate cost
1440
0
            mode_cost                         = skip_mode_cost;
1441
0
            mode_rate                         = skip_mode_rate;
1442
0
            mode_distortion                   = skip_mode_distortion;
1443
0
            mode_ssim_distortion              = skip_mode_ssim_distortion;
1444
0
            cand_bf->cand->block_mi.skip_mode = true;
1445
1446
            // Update signals to correspond to skip_mode values (no coeffs, etc.)
1447
0
            cand_bf->block_has_coeff         = 0;
1448
0
            cand_bf->y_has_coeff             = 0;
1449
0
            cand_bf->u_has_coeff             = 0;
1450
0
            cand_bf->v_has_coeff             = 0;
1451
0
            cand_bf->cnt_nz_coeff            = 0;
1452
0
            cand_bf->cand->block_mi.tx_depth = 0;
1453
0
            memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0]));
1454
0
            cand_bf->cand->transform_type_uv = DCT_DCT;
1455
0
            memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData));
1456
0
            memset(&cand_bf->eob, 0, sizeof(EobData));
1457
0
        }
1458
0
    }
1459
1460
    // Assign full cost
1461
282k
    *(cand_bf->full_cost) = mode_cost;
1462
282k
    cand_bf->total_rate   = mode_rate;
1463
282k
    cand_bf->full_dist    = (uint32_t)mode_distortion;
1464
282k
    if (update_full_cost_ssim) {
1465
0
        assert(ctx->pd_pass == PD_PASS_1);
1466
0
        assert(ctx->md_stage == MD_STAGE_3);
1467
0
        *(cand_bf->full_cost_ssim) = RDCOST(lambda, mode_rate, mode_ssim_distortion);
1468
0
    }
1469
282k
    return;
1470
282k
}
1471
1472
/************************************************************
1473
 * Coding Loop Context Generation
1474
 ************************************************************/
1475
278k
void svt_aom_coding_loop_context_generation(PictureControlSet* pcs, ModeDecisionContext* ctx) {
1476
278k
    BlkStruct*   blk_ptr = ctx->blk_ptr;
1477
278k
    MacroBlockD* xd      = blk_ptr->av1xd;
1478
278k
    if (!ctx->shut_fast_rate) {
1479
144k
        if (pcs->slice_type == I_SLICE) {
1480
144k
            svt_aom_get_kf_y_mode_ctx(xd, &ctx->intra_luma_top_ctx, &ctx->intra_luma_left_ctx);
1481
144k
        }
1482
144k
        ctx->is_inter_ctx  = svt_av1_get_intra_inter_context(xd);
1483
144k
        ctx->skip_mode_ctx = av1_get_skip_mode_context(xd);
1484
144k
    }
1485
    // Collect Neighbor ref cout
1486
279k
    if (pcs->slice_type != I_SLICE || pcs->ppcs->frm_hdr.allow_intrabc) {
1487
0
        svt_aom_collect_neighbors_ref_counts_new(blk_ptr->av1xd);
1488
0
    }
1489
1490
    // Skip Coeff Context
1491
278k
    ctx->skip_coeff_ctx = ctx->rate_est_ctrls.update_skip_coeff_ctx ? av1_get_skip_context(xd) : 0;
1492
278k
}
1493
1494
440k
static INLINE int block_signals_txsize(BlockSize bsize) {
1495
440k
    return bsize > BLOCK_4X4;
1496
440k
}
1497
1498
0
static INLINE int get_vartx_max_txsize(/*const MbModeInfo *xd,*/ BlockSize bsize, int plane) {
1499
    /* if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;*/
1500
0
    const TxSize max_txsize = blocksize_to_txsize[bsize];
1501
0
    if (plane == 0) {
1502
0
        return max_txsize; // luma
1503
0
    }
1504
0
    return av1_get_adjusted_tx_size(max_txsize); // chroma
1505
0
}
1506
1507
0
static INLINE int max_block_wide(const MacroBlockD* xd, BlockSize bsize, int plane) {
1508
0
    int max_blocks_wide = block_size_wide[bsize];
1509
1510
0
    if (xd->mb_to_right_edge < 0) {
1511
0
        max_blocks_wide += gcc_right_shift(xd->mb_to_right_edge, 3 + !!plane);
1512
0
    }
1513
1514
    // Scale the width in the transform block unit.
1515
0
    return max_blocks_wide >> tx_size_wide_log2[0];
1516
0
}
1517
1518
0
static INLINE int max_block_high(const MacroBlockD* xd, BlockSize bsize, int plane) {
1519
0
    int max_blocks_high = block_size_high[bsize];
1520
1521
0
    if (xd->mb_to_bottom_edge < 0) {
1522
0
        max_blocks_high += gcc_right_shift(xd->mb_to_bottom_edge, 3 + !!plane);
1523
0
    }
1524
1525
    // Scale the height in the transform block unit.
1526
0
    return max_blocks_high >> tx_size_high_log2[0];
1527
0
}
1528
1529
static INLINE void txfm_partition_update(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, TxSize tx_size,
1530
0
                                         TxSize txb_size) {
1531
0
    BlockSize bsize = txsize_to_bsize[txb_size];
1532
0
    assert(bsize < BLOCK_SIZES_ALL);
1533
0
    int     bh  = mi_size_high[bsize];
1534
0
    int     bw  = mi_size_wide[bsize];
1535
0
    uint8_t txw = tx_size_wide[tx_size];
1536
0
    uint8_t txh = tx_size_high[tx_size];
1537
0
    int     i;
1538
0
    for (i = 0; i < bh; ++i) {
1539
0
        left_ctx[i] = txh;
1540
0
    }
1541
0
    for (i = 0; i < bw; ++i) {
1542
0
        above_ctx[i] = txw;
1543
0
    }
1544
0
}
1545
1546
0
static INLINE TxSize get_sqr_tx_size(int tx_dim) {
1547
0
    switch (tx_dim) {
1548
0
    case 128:
1549
0
    case 64:
1550
0
        return TX_64X64;
1551
0
        break;
1552
0
    case 32:
1553
0
        return TX_32X32;
1554
0
        break;
1555
0
    case 16:
1556
0
        return TX_16X16;
1557
0
        break;
1558
0
    case 8:
1559
0
        return TX_8X8;
1560
0
        break;
1561
0
    default:
1562
0
        return TX_4X4;
1563
0
    }
1564
0
}
1565
1566
static INLINE int txfm_partition_context(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, BlockSize bsize,
1567
0
                                         TxSize tx_size) {
1568
0
    const uint8_t txw      = tx_size_wide[tx_size];
1569
0
    const uint8_t txh      = tx_size_high[tx_size];
1570
0
    const int     above    = *above_ctx < txw;
1571
0
    const int     left     = *left_ctx < txh;
1572
0
    int           category = TXFM_PARTITION_CONTEXTS;
1573
1574
    // dummy return, not used by others.
1575
0
    if (tx_size == TX_4X4) {
1576
0
        return 0;
1577
0
    }
1578
1579
0
    TxSize max_tx_size = get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize]));
1580
1581
0
    if (max_tx_size >= TX_8X8) {
1582
0
        category = (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) +
1583
0
            (TX_SIZES - 1 - max_tx_size) * 2;
1584
0
    }
1585
0
    assert(category != TXFM_PARTITION_CONTEXTS);
1586
0
    return category * 3 + above + left;
1587
0
}
1588
1589
static uint64_t cost_tx_size_vartx(MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, int depth, int blk_row,
1590
                                   int blk_col, MdRateEstimationContext* md_rate_est_ctx, FRAME_CONTEXT* ec_ctx,
1591
0
                                   uint8_t allow_update_cdf) {
1592
0
    uint64_t  bits            = 0;
1593
0
    const int max_blocks_high = max_block_high(xd, mbmi->bsize, 0);
1594
0
    const int max_blocks_wide = max_block_wide(xd, mbmi->bsize, 0);
1595
1596
0
    if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) {
1597
0
        return bits;
1598
0
    }
1599
1600
0
    if (depth == MAX_VARTX_DEPTH) {
1601
0
        txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size);
1602
1603
0
        return bits;
1604
0
    }
1605
1606
0
    const int ctx = txfm_partition_context(
1607
0
        xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, mbmi->bsize, tx_size);
1608
0
    const int write_txfm_partition = (tx_size == tx_depth_to_tx_size[mbmi->block_mi.tx_depth][mbmi->bsize]);
1609
0
    if (write_txfm_partition) {
1610
0
        bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][0];
1611
1612
0
        if (allow_update_cdf) {
1613
0
            update_cdf(ec_ctx->txfm_partition_cdf[ctx], 0, 2);
1614
0
        }
1615
1616
0
        txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size);
1617
1618
0
    } else {
1619
0
        assert(tx_size < TX_SIZES_ALL);
1620
0
        const TxSize sub_txs = eb_sub_tx_size_map[tx_size];
1621
0
        const int    bsw     = eb_tx_size_wide_unit[sub_txs];
1622
0
        const int    bsh     = eb_tx_size_high_unit[sub_txs];
1623
1624
0
        bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][1];
1625
1626
0
        if (allow_update_cdf) {
1627
0
            update_cdf(ec_ctx->txfm_partition_cdf[ctx], 1, 2);
1628
0
        }
1629
1630
0
        if (sub_txs == TX_4X4) {
1631
0
            txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, sub_txs, tx_size);
1632
1633
0
            return bits;
1634
0
        }
1635
1636
0
        assert(bsw > 0 && bsh > 0);
1637
0
        for (int row = 0; row < eb_tx_size_high_unit[tx_size]; row += bsh) {
1638
0
            for (int col = 0; col < eb_tx_size_wide_unit[tx_size]; col += bsw) {
1639
0
                int offsetr = blk_row + row;
1640
0
                int offsetc = blk_col + col;
1641
0
                bits += cost_tx_size_vartx(
1642
0
                    xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, md_rate_est_ctx, ec_ctx, allow_update_cdf);
1643
0
            }
1644
0
        }
1645
0
    }
1646
0
    return bits;
1647
0
}
1648
1649
849k
static INLINE void set_txfm_ctx(TXFM_CONTEXT* txfm_ctx, uint8_t txs, int len) {
1650
849k
    int i;
1651
2.73M
    for (i = 0; i < len; ++i) {
1652
1.88M
        txfm_ctx[i] = txs;
1653
1.88M
    }
1654
849k
}
1655
1656
426k
static INLINE void set_txfm_ctxs(TxSize tx_size, int n8_w, int n8_h, int skip, const MacroBlockD* xd) {
1657
426k
    uint8_t bw = tx_size_wide[tx_size];
1658
426k
    uint8_t bh = tx_size_high[tx_size];
1659
1660
426k
    if (skip) {
1661
0
        bw = n8_w * MI_SIZE;
1662
0
        bh = n8_h * MI_SIZE;
1663
0
    }
1664
1665
426k
    set_txfm_ctx(xd->above_txfm_context, bw, n8_w);
1666
426k
    set_txfm_ctx(xd->left_txfm_context, bh, n8_h);
1667
426k
}
1668
1669
12.4k
static INLINE int tx_size_to_depth(TxSize tx_size, BlockSize bsize) {
1670
12.4k
    TxSize ctx_size = blocksize_to_txsize[bsize];
1671
12.4k
    int    depth    = 0;
1672
12.4k
    while (tx_size != ctx_size) {
1673
0
        depth++;
1674
0
        ctx_size = eb_sub_tx_size_map[ctx_size];
1675
0
        assert(depth <= MAX_TX_DEPTH);
1676
0
    }
1677
12.4k
    return depth;
1678
12.4k
}
1679
1680
// Returns a context number for the given MB prediction signal
1681
// The mode info data structure has a one element border above and to the
1682
// left of the entries corresponding to real blocks.
1683
// The prediction flags in these dummy entries are initialized to 0.
1684
12.4k
static INLINE int get_tx_size_context(const MacroBlockD* xd) {
1685
12.4k
    const MbModeInfo*       mbmi        = xd->mi[0];
1686
12.4k
    const MbModeInfo* const above_mbmi  = xd->above_mbmi;
1687
12.4k
    const MbModeInfo* const left_mbmi   = xd->left_mbmi;
1688
12.4k
    const TxSize            max_tx_size = blocksize_to_txsize[mbmi->bsize];
1689
12.4k
    const int               max_tx_wide = tx_size_wide[max_tx_size];
1690
12.4k
    const int               max_tx_high = tx_size_high[max_tx_size];
1691
12.4k
    const int               has_above   = xd->up_available;
1692
12.4k
    const int               has_left    = xd->left_available;
1693
1694
12.4k
    int above = xd->above_txfm_context[0] >= max_tx_wide;
1695
12.4k
    int left  = xd->left_txfm_context[0] >= max_tx_high;
1696
1697
12.4k
    if (has_above) {
1698
2.99k
        if (is_inter_block(&above_mbmi->block_mi)) {
1699
0
            above = block_size_wide[above_mbmi->bsize] >= max_tx_wide;
1700
0
        }
1701
2.99k
    }
1702
1703
12.4k
    if (has_left) {
1704
3.13k
        if (is_inter_block(&left_mbmi->block_mi)) {
1705
0
            left = block_size_high[left_mbmi->bsize] >= max_tx_high;
1706
0
        }
1707
3.13k
    }
1708
1709
12.4k
    if (has_above && has_left) {
1710
213
        return (above + left);
1711
12.2k
    } else if (has_above) {
1712
2.78k
        return above;
1713
9.48k
    } else if (has_left) {
1714
2.91k
        return left;
1715
6.56k
    } else {
1716
6.56k
        return 0;
1717
6.56k
    }
1718
12.4k
}
1719
1720
static uint64_t cost_selected_tx_size(const MacroBlockD* xd, MdRateEstimationContext* md_rate_est_ctx, TxSize tx_size,
1721
12.4k
                                      FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) {
1722
12.4k
    const MbModeInfo* const mbmi  = xd->mi[0];
1723
12.4k
    const BlockSize         bsize = mbmi->bsize;
1724
12.4k
    uint64_t                bits  = 0;
1725
1726
12.4k
    if (block_signals_txsize(bsize)) {
1727
12.4k
        const int tx_size_ctx = get_tx_size_context(xd);
1728
12.4k
        assert(bsize < BLOCK_SIZES_ALL);
1729
12.4k
        const int     depth       = tx_size_to_depth(tx_size, bsize);
1730
12.4k
        const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
1731
12.4k
        bits += md_rate_est_ctx->tx_size_fac_bits[tx_size_cat][tx_size_ctx][depth];
1732
1733
12.4k
        if (allow_update_cdf) {
1734
0
            const int max_depths = bsize_to_max_depth(bsize);
1735
0
            assert(depth >= 0 && depth <= max_depths);
1736
0
            assert(!is_inter_block(&mbmi->block_mi));
1737
0
            assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(/*xd,*/ mbmi)));
1738
0
            update_cdf(ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], depth, max_depths + 1);
1739
0
        }
1740
12.4k
    }
1741
1742
12.4k
    return bits;
1743
12.4k
}
1744
1745
/* Get the TXS rate and update the txfm context.  If allow_update_cdf is true, the TX size CDFs will
1746
be updated. */
1747
uint64_t svt_aom_tx_size_bits(PictureControlSet* pcs, uint8_t segment_id, MdRateEstimationContext* md_rate_est_ctx,
1748
                              MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, TxMode tx_mode, BlockSize bsize,
1749
428k
                              uint8_t skip, FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) {
1750
428k
    uint64_t bits        = 0;
1751
428k
    int      is_inter_tx = is_inter_block(&mbmi->block_mi);
1752
428k
    if (tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) && !(is_inter_tx && skip) &&
1753
427k
        !svt_av1_is_lossless_segment(pcs, segment_id)) {
1754
12.4k
        if (is_inter_tx) { // This implies skip flag is 0.
1755
0
            const TxSize max_tx_size = get_vartx_max_txsize(/*xd,*/ bsize, 0);
1756
0
            const int    txbh        = eb_tx_size_high_unit[max_tx_size];
1757
0
            const int    txbw        = eb_tx_size_wide_unit[max_tx_size];
1758
0
            const int    width       = block_size_wide[bsize] >> tx_size_wide_log2[0];
1759
0
            const int    height      = block_size_high[bsize] >> tx_size_high_log2[0];
1760
0
            int          idx, idy;
1761
0
            for (idy = 0; idy < height; idy += txbh) {
1762
0
                for (idx = 0; idx < width; idx += txbw) {
1763
0
                    bits += cost_tx_size_vartx(
1764
0
                        xd, mbmi, max_tx_size, 0, idy, idx, md_rate_est_ctx, ec_ctx, allow_update_cdf);
1765
0
                }
1766
0
            }
1767
12.4k
        } else {
1768
12.4k
            bits += cost_selected_tx_size(xd, md_rate_est_ctx, tx_size, ec_ctx, allow_update_cdf);
1769
12.4k
            set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, 0, xd);
1770
12.4k
        }
1771
415k
    } else {
1772
415k
        set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, skip && is_inter_block(&mbmi->block_mi), xd);
1773
415k
    }
1774
1775
428k
    return bits;
1776
428k
}
1777
1778
/* Get the TXS rate.  A dummy txfm context array will be used, so context updates will not be saved for
1779
future blocks. */
1780
uint64_t svt_aom_get_tx_size_bits(ModeDecisionCandidateBuffer* candidateBuffer, ModeDecisionContext* ctx,
1781
428k
                                  PictureControlSet* pcs, uint8_t tx_depth, bool block_has_coeff) {
1782
428k
    NeighborArrayUnit* txfm_context_array = ctx->txfm_context_array;
1783
428k
    const uint8_t*     txfm_above_ptr     = svt_aom_na_top_ptr_pu(txfm_context_array, ctx->blk_org_x);
1784
428k
    const uint8_t*     txfm_left_ptr      = svt_aom_na_left_ptr_pu(txfm_context_array, ctx->blk_org_y);
1785
1786
428k
    TxMode       tx_mode = pcs->ppcs->frm_hdr.tx_mode;
1787
428k
    MacroBlockD* xd      = ctx->blk_ptr->av1xd;
1788
428k
    BlockSize    bsize   = ctx->blk_geom->bsize;
1789
428k
    const TxSize tx_size = tx_depth_to_tx_size[tx_depth][bsize];
1790
428k
    MbModeInfo*  mbmi    = xd->mi[0];
1791
1792
428k
    svt_memcpy(ctx->above_txfm_context, txfm_above_ptr, (ctx->blk_geom->bwidth >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT));
1793
428k
    svt_memcpy(ctx->left_txfm_context, txfm_left_ptr, (ctx->blk_geom->bheight >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT));
1794
1795
428k
    xd->above_txfm_context      = ctx->above_txfm_context;
1796
428k
    xd->left_txfm_context       = ctx->left_txfm_context;
1797
428k
    mbmi->bsize                 = ctx->blk_geom->bsize;
1798
428k
    mbmi->block_mi.use_intrabc  = candidateBuffer->cand->block_mi.use_intrabc;
1799
428k
    mbmi->block_mi.ref_frame[0] = candidateBuffer->cand->block_mi.ref_frame[0];
1800
428k
    mbmi->block_mi.tx_depth     = tx_depth;
1801
1802
428k
    const uint64_t bits = svt_aom_tx_size_bits(pcs,
1803
428k
                                               ctx->blk_ptr->segment_id,
1804
428k
                                               ctx->md_rate_est_ctx,
1805
428k
                                               xd,
1806
428k
                                               mbmi,
1807
428k
                                               tx_size,
1808
428k
                                               tx_mode,
1809
428k
                                               bsize,
1810
428k
                                               !block_has_coeff,
1811
428k
                                               NULL,
1812
428k
                                               0);
1813
428k
    return bits;
1814
428k
}
1815
1816
/*
1817
 * av1_partition_rate_cost function is used to generate the rate of signaling the
1818
 * partition type for a given block.
1819
 */
1820
int64_t svt_aom_partition_rate_cost(PictureParentControlSet* ppcs, const BlockSize bsize, const int mi_row,
1821
                                    const int mi_col, MdRateEstimationContext* md_rate_est_ctx, PartitionType p,
1822
393k
                                    const PartitionContextType left_ctx, const PartitionContextType above_ctx) {
1823
393k
    if (bsize < BLOCK_8X8) {
1824
0
        return 0;
1825
0
    }
1826
393k
    assert(bsize < BLOCK_SIZES_ALL && mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
1827
1828
393k
    const int hbs      = mi_size_wide[bsize] >> 1;
1829
393k
    const int has_rows = (mi_row + hbs) < ppcs->av1_cm->mi_rows;
1830
393k
    const int has_cols = (mi_col + hbs) < ppcs->av1_cm->mi_cols;
1831
    // Don't consider invalid partitions or blocks outside the picture
1832
393k
    if (!has_rows && !has_cols) {
1833
912
        return 0;
1834
912
    }
1835
1836
392k
    const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
1837
392k
    assert(bsl >= 0);
1838
1839
392k
    const int      above = (above_ctx >> bsl) & 1, left = (left_ctx >> bsl) & 1;
1840
392k
    const uint32_t context_index = (left * 2 + above) + bsl * PARTITION_PLOFFSET;
1841
1842
392k
    uint64_t split_rate = 0;
1843
1844
392k
    if (has_rows && has_cols) {
1845
373k
        split_rate = (uint64_t)md_rate_est_ctx->partition_fac_bits[context_index][p];
1846
373k
    } else if (!has_rows && has_cols) {
1847
        // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per
1848
        // section 8.3.2 of the AV1 spec (Cdf selection process).  Therefore, only update partition ctx 4+,
1849
        // which corresponds to the paritition CDFs for 16x16 and larger blocks
1850
9.25k
        assert(bsize != BLOCK_8X8);
1851
9.25k
        split_rate = bsize == BLOCK_128X128
1852
9.25k
            ? (uint64_t)md_rate_est_ctx->partition_vert_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT]
1853
9.25k
            : (uint64_t)md_rate_est_ctx->partition_vert_alike_fac_bits[context_index][p == PARTITION_SPLIT];
1854
9.25k
    } else {
1855
        // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per
1856
        // section 8.3.2 of the AV1 spec (Cdf selection process).  Therefore, only update partition ctx 4+,
1857
        // which corresponds to the paritition CDFs for 16x16 and larger blocks
1858
9.08k
        assert(bsize != BLOCK_8X8);
1859
9.08k
        split_rate = bsize == BLOCK_128X128
1860
9.08k
            ? (uint64_t)md_rate_est_ctx->partition_horz_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT]
1861
9.08k
            : (uint64_t)md_rate_est_ctx->partition_horz_alike_fac_bits[context_index][p == PARTITION_SPLIT];
1862
9.08k
    }
1863
1864
392k
    return split_rate;
1865
393k
}