Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/svt-av1/Source/Lib/Codec/rd_cost.c
Line
Count
Source
1
/*
2
* Copyright(c) 2019 Intel Corporation
3
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
4
*
5
* This source code is subject to the terms of the BSD 2 Clause License and
6
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7
* was not distributed with this source code in the LICENSE file, you can
8
* obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9
* Media Patent License 1.0 was not distributed with this source code in the
10
* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11
*/
12
13
/***************************************
14
* Includes
15
***************************************/
16
#include "rd_cost.h"
17
#include "common_utils.h"
18
#include "aom_dsp_rtcd.h"
19
#include "svt_log.h"
20
#include "enc_inter_prediction.h"
21
#include "full_loop.h"
22
#include "entropy_coding.h"
23
24
#include <assert.h>
25
26
0
#define MV_COST_WEIGHT 108
27
int svt_aom_get_reference_mode_context_new(const MacroBlockD* xd);
28
int svt_av1_get_pred_context_uni_comp_ref_p(const MacroBlockD* xd);
29
int svt_av1_get_pred_context_uni_comp_ref_p1(const MacroBlockD* xd);
30
int svt_av1_get_pred_context_uni_comp_ref_p2(const MacroBlockD* xd);
31
int svt_aom_get_comp_reference_type_context_new(const MacroBlockD* xd);
32
33
int  svt_aom_get_palette_bsize_ctx(BlockSize bsize);
34
int  svt_aom_get_palette_mode_ctx(const MacroBlockD* xd);
35
int  svt_aom_write_uniform_cost(int n, int v);
36
int  svt_get_palette_cache_y(const MacroBlockD* const xd, uint16_t* cache);
37
int  svt_av1_palette_color_cost_y(const PaletteModeInfo* const pmi, uint16_t* color_cache, const int palette_size,
38
                                  int n_cache, int bit_depth);
39
int  svt_av1_cost_color_map(ModeDecisionCandidate* cand, MdRateEstimationContext* rate_table,
40
41
                            BlkStruct* blk_ptr, int plane, BlockSize bsize, COLOR_MAP_TYPE type);
42
void svt_aom_get_block_dimensions(BlockSize bsize, int plane, const MacroBlockD* xd, int* width, int* height,
43
                                  int* rows_within_bounds, int* cols_within_bounds);
44
int  svt_aom_allow_palette(int allow_screen_content_tools, BlockSize bsize);
45
int  svt_aom_allow_intrabc(const FrameHeader* frm_hdr, SliceType slice_type);
46
47
0
MvJointType svt_av1_get_mv_joint(const Mv* mv) {
48
0
    if (mv->y == 0) {
49
0
        return mv->x == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ;
50
0
    } else {
51
0
        return mv->x == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ;
52
0
    }
53
0
}
54
55
0
static int32_t mv_cost(const Mv* mv, const int32_t* joint_cost, const int32_t* const comp_cost[2]) {
56
0
    int32_t jn_c = svt_av1_get_mv_joint(mv);
57
0
    int32_t res  = joint_cost[jn_c] + comp_cost[0][CLIP3(MV_LOW, MV_UPP, mv->y)] +
58
0
        comp_cost[1][CLIP3(MV_LOW, MV_UPP, mv->x)];
59
0
    return res;
60
0
}
61
62
0
int32_t svt_av1_mv_bit_cost_light(const Mv* mv, const Mv* ref) {
63
0
    const uint32_t factor     = 50;
64
0
    const uint32_t absmvdiffx = ABS(mv->x - ref->x);
65
0
    const uint32_t absmvdiffy = ABS(mv->y - ref->y);
66
0
    const uint32_t mv_rate    = 1296 + (factor * (absmvdiffx + absmvdiffy));
67
0
    return mv_rate;
68
0
}
69
70
int32_t svt_av1_mv_bit_cost(const Mv* mv, const Mv* ref, const int32_t* mvjcost, const int32_t* const mvcost[2],
71
0
                            int32_t weight) {
72
    // Restrict the size of the MV diff to be within the max AV1 range.  If the MV diff
73
    // is outside this range, the diff will index beyond the cost array, causing a seg fault.
74
    // Both the MVs and the MV diffs should be within the allowable range for accessing the MV cost
75
    // infrastructure.
76
0
    const int16_t x         = MIN(MAX(mv->x - ref->x, MV_LOW), MV_UPP);
77
0
    const int16_t y         = MIN(MAX(mv->y - ref->y, MV_LOW), MV_UPP);
78
0
    Mv            temp_diff = {{x, y}};
79
80
0
    return ROUND_POWER_OF_TWO(mv_cost(&temp_diff, mvjcost, mvcost) * weight, 7);
81
0
}
82
83
/////////////////////////////COEFFICIENT CALCULATION //////////////////////////////////////////////
84
0
static INLINE int32_t get_golomb_cost(int32_t abs_qc) {
85
0
    if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
86
0
        const int32_t r      = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS;
87
0
        const int32_t length = get_msb(r) + 1;
88
0
        return av1_cost_literal(2 * length - 1);
89
0
    }
90
0
    return 0;
91
0
}
92
93
void svt_av1_txb_init_levels_c(const TranLow* const coeff, const int32_t width, const int32_t height,
94
0
                               uint8_t* const levels) {
95
0
    uint8_t* ls = levels;
96
97
0
    for (int32_t i = 0; i < height; i++) {
98
0
        for (int32_t j = 0; j < width; j++) {
99
0
            *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX);
100
0
        }
101
0
        for (int32_t j = 0; j < TX_PAD_HOR; j++) {
102
0
            *ls++ = 0;
103
0
        }
104
0
    }
105
0
}
106
107
static int32_t av1_transform_type_rate_estimation(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* fc,
108
                                                  ModeDecisionCandidateBuffer* cand_bf, bool is_inter,
109
                                                  TxSize transform_size, TxType transform_type,
110
0
                                                  bool reduced_tx_set_used) {
111
    // const MbModeInfo *mbmi = &xd->mi[0]->mbmi;
112
    // const int32_t is_inter = is_inter_block(mbmi);
113
114
0
    if (get_ext_tx_types(transform_size, is_inter, reduced_tx_set_used) >
115
0
        1 /*&&    !xd->lossless[xd->mi[0]->mbmi.segment_id]  WE ARE NOT LOSSLESS*/) {
116
0
        const TxSize square_tx_size = txsize_sqr_map[transform_size];
117
0
        assert(square_tx_size < EXT_TX_SIZES);
118
119
0
        const int32_t ext_tx_set = get_ext_tx_set(transform_size, is_inter, reduced_tx_set_used);
120
0
        if (is_inter) {
121
0
            if (ext_tx_set > 0) {
122
0
                if (allow_update_cdf) {
123
0
                    const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
124
125
0
                    update_cdf(fc->inter_ext_tx_cdf[ext_tx_set][square_tx_size],
126
0
                               av1_ext_tx_ind[tx_set_type][transform_type],
127
0
                               av1_num_ext_tx_set[tx_set_type]);
128
0
                }
129
0
                return ctx->md_rate_est_ctx->inter_tx_type_fac_bits[ext_tx_set][square_tx_size][transform_type];
130
0
            }
131
0
        } else {
132
0
            if (ext_tx_set > 0) {
133
0
                PredictionMode intra_dir;
134
0
                if (cand_bf->cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) {
135
0
                    intra_dir = fimode_to_intradir[cand_bf->cand->block_mi.filter_intra_mode];
136
0
                } else {
137
0
                    intra_dir = cand_bf->cand->block_mi.mode;
138
0
                }
139
0
                assert(intra_dir < INTRA_MODES);
140
0
                const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used);
141
142
0
                if (allow_update_cdf) {
143
0
                    update_cdf(fc->intra_ext_tx_cdf[ext_tx_set][square_tx_size][intra_dir],
144
0
                               av1_ext_tx_ind[tx_set_type][transform_type],
145
0
                               av1_num_ext_tx_set[tx_set_type]);
146
0
                }
147
0
                return ctx->md_rate_est_ctx
148
0
                    ->intra_tx_type_fac_bits[ext_tx_set][square_tx_size][intra_dir][transform_type];
149
0
            }
150
0
        }
151
0
    }
152
0
    return 0;
153
0
}
154
155
// Update the eob-related CDFs. Function assumes allow_update_cdf is true
156
// as the only action of the function is to update the CDFs.
157
0
static void update_eob_context(int eob, TxSize tx_size, TxClass tx_class, PlaneType plane, FRAME_CONTEXT* ec_ctx) {
158
0
    int          eob_extra;
159
0
    const int    eob_pt  = get_eob_pos_token(eob, &eob_extra);
160
0
    const TxSize txs_ctx = (TxSize)((txsize_sqr_map[tx_size] + txsize_sqr_up_map[tx_size] + 1) >> 1);
161
0
    assert(txs_ctx < TX_SIZES);
162
0
    const int eob_multi_size = txsize_log2_minus4[tx_size];
163
0
    const int eob_multi_ctx  = (tx_class == TX_CLASS_2D) ? 0 : 1;
164
165
0
    switch (eob_multi_size) {
166
0
    case 0:
167
0
        update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5);
168
0
        break;
169
0
    case 1:
170
0
        update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6);
171
0
        break;
172
0
    case 2:
173
0
        update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7);
174
0
        break;
175
0
    case 3:
176
0
        update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1, 8);
177
0
        break;
178
0
    case 4:
179
0
        update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1, 9);
180
0
        break;
181
0
    case 5:
182
0
        update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1, 10);
183
0
        break;
184
0
    case 6:
185
0
    default:
186
0
        update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1, 11);
187
0
        break;
188
0
    }
189
190
0
    if (eob_pt > 2) {
191
0
        const int cnt = eob_pt - 3;
192
0
        const int bit = (eob_extra >> cnt) & 1;
193
0
        update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][cnt], bit, 2);
194
0
    }
195
0
}
196
197
// Transform end of block bit estimation
198
9.79k
int get_eob_cost(int eob, const LvMapEobCost* txb_eob_costs, const LvMapCoeffCost* txb_costs, TxClass tx_class) {
199
9.79k
    int       eob_extra;
200
9.79k
    const int eob_pt        = get_eob_pos_token(eob, &eob_extra);
201
18.4E
    const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1;
202
9.79k
    int       eob_cost      = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1];
203
204
9.79k
    if (eob_pt > 2) {
205
0
        const int cnt = eob_pt - 3;
206
0
        const int bit = (eob_extra >> cnt) & 1;
207
0
        eob_cost += txb_costs->eob_extra_cost[cnt][bit];
208
0
        eob_cost += av1_cost_literal(cnt);
209
0
    }
210
9.79k
    return eob_cost;
211
9.79k
}
212
213
static INLINE int32_t av1_cost_skip_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
214
0
                                        TxSize transform_size, PlaneType plane_type, int16_t txb_skip_ctx) {
215
0
    const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1);
216
0
    assert(txs_ctx < TX_SIZES);
217
0
    const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type];
218
0
    if (allow_update_cdf) {
219
0
        update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 1, 2);
220
0
    }
221
0
    return coeff_costs->txb_skip_cost[txb_skip_ctx][1];
222
0
}
223
224
static INLINE int32_t av1_cost_coeffs_txb_loop_cost_one_eob(const TranLow* const qcoeff, int8_t* const coeff_contexts,
225
0
                                                            const LvMapCoeffCost* coeff_costs, int16_t dc_sign_ctx) {
226
0
    const TranLow v         = qcoeff[0];
227
0
    const int32_t level     = abs(v);
228
0
    const int32_t coeff_ctx = coeff_contexts[0];
229
230
0
    assert((AOMMIN(level, 3) - 1) >= 0);
231
0
    int32_t cost = coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
232
233
0
    if (v != 0) {
234
0
        const int32_t sign = (v < 0) ? 1 : 0;
235
        // sign bit cost
236
0
        cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
237
238
0
        if (level > NUM_BASE_LEVELS) {
239
0
            const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
240
241
0
            if (base_range < COEFF_BASE_RANGE) {
242
0
                cost += coeff_costs->lps_cost[0][base_range];
243
0
            } else {
244
0
                cost += coeff_costs->lps_cost[0][COEFF_BASE_RANGE];
245
0
            }
246
247
0
            if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
248
0
                cost += get_golomb_cost(level);
249
0
            }
250
0
        }
251
0
    }
252
0
    return cost;
253
0
}
254
255
static INLINE int32_t av1_cost_coeffs_txb_loop_cost_eob(ModeDecisionContext* md_ctx, uint16_t eob,
256
                                                        const int16_t* const scan, const TranLow* const qcoeff,
257
                                                        int8_t* const coeff_contexts, const LvMapCoeffCost* coeff_costs,
258
                                                        int16_t dc_sign_ctx, uint8_t* const levels, const int32_t bwl,
259
0
                                                        TxType transform_type) {
260
0
    const uint32_t cost_literal = av1_cost_literal(1);
261
0
    int32_t        cost         = 0;
262
263
    //Optimized/simplified function when eob is 1
264
0
    if (eob == 1) {
265
0
        return av1_cost_coeffs_txb_loop_cost_one_eob(qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx);
266
0
    }
267
268
    //  first (eob - 1) index
269
0
    {
270
0
        const int32_t pos       = scan[eob - 1];
271
0
        const TranLow v         = qcoeff[pos];
272
0
        const int32_t level     = abs(v);
273
0
        const int32_t coeff_ctx = coeff_contexts[pos];
274
275
0
        assert((AOMMIN(level, 3) - 1) >= 0);
276
0
        cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1];
277
278
0
        if (v != 0) {
279
0
            cost += cost_literal;
280
0
            if (level > NUM_BASE_LEVELS) {
281
0
                int32_t       ctx        = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]);
282
0
                const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
283
284
0
                if (base_range < COEFF_BASE_RANGE) {
285
0
                    cost += coeff_costs->lps_cost[ctx][base_range];
286
0
                } else {
287
0
                    cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
288
0
                }
289
290
0
                if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
291
0
                    cost += get_golomb_cost(level);
292
0
                }
293
0
            }
294
0
        }
295
0
    }
296
    // last (0) index
297
0
    {
298
0
        const TranLow v         = qcoeff[0];
299
0
        const int32_t level     = abs(v);
300
0
        const int32_t coeff_ctx = coeff_contexts[0];
301
302
0
        cost += coeff_costs->base_cost[coeff_ctx][AOMMIN(level, 3)];
303
304
0
        if (v != 0) {
305
0
            const int32_t sign = (v < 0) ? 1 : 0;
306
            // sign bit cost
307
308
0
            cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign];
309
310
0
            if (level > NUM_BASE_LEVELS) {
311
0
                int32_t       ctx        = get_br_ctx(levels, 0, bwl, tx_type_to_class[transform_type]);
312
0
                const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
313
314
0
                if (base_range < COEFF_BASE_RANGE) {
315
0
                    cost += coeff_costs->lps_cost[ctx][base_range];
316
0
                } else {
317
0
                    cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
318
0
                }
319
320
0
                if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) {
321
0
                    cost += get_golomb_cost(level);
322
0
                }
323
0
            }
324
0
        }
325
0
    }
326
0
    int32_t c;
327
    /* Optimized Loop, omitted first (eob - 1) and last (0) index */
328
    // Estimate the rate of the first(eob / fast_coeff_est_level) coeff(s), DC and last coeff only
329
0
    int32_t  c_start = MIN(eob - 2, eob / MAX(1, (int)(md_ctx->mds_fast_coeff_est_level - md_ctx->mds_subres_step)));
330
0
    uint32_t cost_literal_cnt = 0;
331
0
    for (c = c_start; c >= 1; --c) {
332
0
        const int32_t pos = scan[c];
333
0
        cost_literal_cnt += !!(qcoeff[pos]);
334
0
        const int32_t level = abs(qcoeff[pos]);
335
0
        if (level > NUM_BASE_LEVELS) {
336
0
            int32_t       ctx        = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]);
337
0
            const int32_t base_range = level - 1 - NUM_BASE_LEVELS;
338
339
0
            cost += coeff_costs->base_cost[coeff_contexts[pos]][3];
340
0
            if (base_range < COEFF_BASE_RANGE) {
341
0
                cost += coeff_costs->lps_cost[ctx][base_range];
342
0
            } else {
343
0
                cost += get_golomb_cost(level) + coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE];
344
0
            }
345
0
        } else {
346
0
            cost += coeff_costs->base_cost[coeff_contexts[pos]][level];
347
0
        }
348
0
    }
349
0
    cost += cost_literal_cnt * cost_literal;
350
351
0
    return cost;
352
0
}
353
354
// Note: don't call this function when eob is 0.
355
uint64_t svt_av1_cost_coeffs_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
356
                                 ModeDecisionCandidateBuffer* cand_bf, const TranLow* const qcoeff, uint16_t eob,
357
                                 PlaneType plane_type, TxSize transform_size, TxType transform_type,
358
                                 int16_t txb_skip_ctx, int16_t dc_sign_ctx, bool reduced_transform_set_flag)
359
360
0
{
361
    //Note: there is a different version of this function in AOM that seems to be efficient as its name is:
362
    //warehouse_efficients_txb
363
364
0
    const TxSize  txs_ctx  = get_txsize_entropy_ctx(transform_size);
365
0
    const TxClass tx_class = tx_type_to_class[transform_type];
366
0
    int32_t       cost;
367
0
    const int32_t bwl    = get_txb_bwl(transform_size);
368
0
    const int32_t width  = get_txb_wide(transform_size);
369
0
    const int32_t height = get_txb_high(transform_size);
370
371
0
    const ScanOrder* const scan_order     = get_scan_order(transform_size, transform_type);
372
0
    const int16_t* const   scan           = scan_order->scan;
373
0
    uint8_t* const         levels         = set_levels(ctx->md_levels_buf, width, height);
374
0
    int8_t* const          coeff_contexts = ctx->md_coeff_contexts;
375
0
    assert(txs_ctx < TX_SIZES);
376
0
    const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type];
377
378
0
    const int32_t             eob_multi_size = txsize_log2_minus4[transform_size];
379
0
    const LvMapEobCost* const eob_bits       = &ctx->md_rate_est_ctx->eob_frac_bits[eob_multi_size][plane_type];
380
    // eob must be greater than 0 here.
381
0
    assert(eob > 0);
382
0
    cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0];
383
384
0
    if (allow_update_cdf) {
385
0
        update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], eob == 0, 2);
386
0
    }
387
388
0
    if (eob > 1) {
389
0
        svt_av1_txb_init_levels(qcoeff,
390
0
                                width,
391
0
                                height,
392
0
                                levels); // NM - Needs to be optimized - to be combined with the quantisation.
393
0
    }
394
0
    const bool is_inter = is_inter_mode(cand_bf->cand->block_mi.mode);
395
    // Transform type bit estimation
396
0
    cost += plane_type > PLANE_TYPE_Y ? 0
397
0
                                      : av1_transform_type_rate_estimation(ctx,
398
0
                                                                           allow_update_cdf,
399
0
                                                                           ec_ctx,
400
0
                                                                           cand_bf,
401
0
                                                                           is_inter,
402
0
                                                                           transform_size,
403
0
                                                                           transform_type,
404
0
                                                                           reduced_transform_set_flag);
405
406
    // Transform eob bit estimation
407
0
    cost += get_eob_cost(eob, eob_bits, coeff_costs, tx_class);
408
0
    if (allow_update_cdf) {
409
0
        update_eob_context(eob, transform_size, tx_class, plane_type, ec_ctx);
410
0
    }
411
    // Transform non-zero coeff bit estimation
412
0
    svt_av1_get_nz_map_contexts(levels,
413
0
                                scan,
414
0
                                eob,
415
0
                                transform_size,
416
0
                                tx_class,
417
0
                                coeff_contexts); // NM - Assembly version is available in AOM
418
0
    assert(eob <= width * height);
419
0
    if (allow_update_cdf) {
420
0
        for (int c = eob - 1; c >= 0; --c) {
421
0
            const int     pos       = scan[c];
422
0
            const int     coeff_ctx = coeff_contexts[pos];
423
0
            const TranLow v         = qcoeff[pos];
424
0
            const TranLow level     = abs(v);
425
0
            if (c == eob - 1) {
426
0
                assert(coeff_ctx < 4);
427
0
                update_cdf(ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3) - 1, 3);
428
0
            } else {
429
0
                update_cdf(ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3), 4);
430
0
            }
431
432
0
            {
433
0
                if (c == eob - 1) {
434
0
                    assert(coeff_ctx < 4);
435
0
                }
436
0
            }
437
438
0
            if (level > NUM_BASE_LEVELS) {
439
0
                const int base_range = level - 1 - NUM_BASE_LEVELS;
440
0
                int       br_ctx;
441
0
                if (eob == 1) {
442
0
                    br_ctx = 0;
443
0
                } else {
444
0
                    br_ctx = get_br_ctx(levels, pos, bwl, tx_class);
445
0
                }
446
447
0
                for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) {
448
0
                    const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1);
449
0
                    update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx], k, BR_CDF_SIZE);
450
0
                    for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) {
451
0
                        if (lps == k) {
452
0
                            break;
453
0
                        }
454
0
                    }
455
0
                    if (k < BR_CDF_SIZE - 1) {
456
0
                        break;
457
0
                    }
458
0
                }
459
0
            }
460
0
        }
461
462
0
        if (qcoeff[0] != 0) {
463
0
            update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], qcoeff[0] < 0, 2);
464
0
        }
465
466
        //TODO: CHKN  for 128x128 where we need more than one TXb, we need to update the txb_context(dc_sign+skip_ctx) in a Txb basis.
467
468
0
        return 0;
469
0
    }
470
471
0
    cost += av1_cost_coeffs_txb_loop_cost_eob(
472
0
        ctx, eob, scan, qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx, levels, bwl, transform_type);
473
0
    return cost;
474
0
}
475
476
uint64_t svt_aom_get_intra_uv_fast_rate(PictureControlSet* pcs, ModeDecisionContext* ctx,
477
118k
                                        ModeDecisionCandidateBuffer* cand_bf, bool use_accurate_cfl) {
478
118k
    const BlockGeom* const blk_geom = ctx->blk_geom;
479
118k
    ModeDecisionCandidate* cand     = cand_bf->cand;
480
118k
    assert(ctx->has_uv);
481
118k
    assert(!(svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc));
482
118k
    MdRateEstimationContext* md_rate_est_ctx = ctx->md_rate_est_ctx;
483
118k
    const uint8_t            is_cfl_allowed  = (blk_geom->bwidth <= 32 && blk_geom->bheight <= 32) ? 1 : 0;
484
118k
    PredictionMode           intra_mode      = (PredictionMode)cand->block_mi.mode;
485
    // If CFL alphas are not known yet, calculate the chroma mode bits based on DC Mode. If CFL is selected the chroma mode bits must be updated later
486
118k
    const UvPredictionMode chroma_mode = cand->block_mi.uv_mode == UV_CFL_PRED && !use_accurate_cfl
487
118k
        ? UV_DC_PRED
488
118k
        : cand->block_mi.uv_mode;
489
118k
    const uint32_t         mi_row      = ctx->blk_org_y >> MI_SIZE_LOG2;
490
118k
    const uint32_t         mi_col      = ctx->blk_org_x >> MI_SIZE_LOG2;
491
    // Subsampling assumes YUV 420 content
492
118k
    const uint8_t ss_x = 1;
493
118k
    const uint8_t ss_y = 1;
494
495
118k
    uint64_t chroma_rate = 0;
496
    // Estimate chroma nominal intra mode bits
497
118k
    chroma_rate += (uint64_t)md_rate_est_ctx->intra_uv_mode_fac_bits[is_cfl_allowed][intra_mode][chroma_mode];
498
499
    // Estimate chroma angular mode bits; angular offset only allow for bsize >= 8x8
500
118k
    if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(get_uv_mode(chroma_mode))) {
501
0
        chroma_rate +=
502
0
            md_rate_est_ctx->angle_delta_fac_bits[chroma_mode - V_PRED]
503
0
                                                 [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_UV]];
504
0
    }
505
506
    // Estimate CFL factor bits when CFL is used
507
118k
    if (chroma_mode == UV_CFL_PRED) {
508
0
        chroma_rate += (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_U]
509
0
                                                                    [CFL_IDX_U(cand->block_mi.cfl_alpha_idx)] +
510
0
            (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_V]
511
0
                                                         [CFL_IDX_V(cand->block_mi.cfl_alpha_idx)];
512
0
    }
513
514
    // Estimate chroma palette mode bits (currently not supported, so just cost of signalling off)
515
118k
    if (chroma_mode == UV_DC_PRED &&
516
118k
        svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) &&
517
0
        is_chroma_reference(mi_row, mi_col, blk_geom->bsize, ss_x, ss_y)) {
518
0
        const int use_palette_y  = cand->palette_info && (cand->palette_size[0] > 0);
519
0
        const int use_palette_uv = cand->palette_info && (cand->palette_size[1] > 0);
520
0
        chroma_rate += ctx->md_rate_est_ctx->palette_uv_mode_fac_bits[use_palette_y][use_palette_uv];
521
0
    }
522
523
118k
    return chroma_rate;
524
118k
}
525
526
uint64_t svt_aom_intra_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
527
118k
                                 uint64_t lambda, uint64_t luma_distortion) {
528
118k
    const BlockGeom*       blk_geom = ctx->blk_geom;
529
118k
    BlkStruct*             blk_ptr  = ctx->blk_ptr;
530
118k
    ModeDecisionCandidate* cand     = cand_bf->cand;
531
118k
    if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc) {
532
0
        uint64_t rate = 0;
533
534
0
        Mv         mv        = {.as_int = cand->block_mi.mv[0].as_int};
535
0
        Mv         ref_mv    = {.as_int = cand->pred_mv[0].as_int};
536
0
        const int* dvcost[2] = {(int*)&ctx->md_rate_est_ctx->dv_cost[0][MV_MAX],
537
0
                                (int*)&ctx->md_rate_est_ctx->dv_cost[1][MV_MAX]};
538
0
        int32_t    mv_rate   = svt_av1_mv_bit_cost(
539
0
            &mv, &ref_mv, ctx->md_rate_est_ctx->dv_joint_cost, dvcost, MV_COST_WEIGHT_SUB);
540
541
0
        rate                      = mv_rate + ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc];
542
0
        cand_bf->fast_luma_rate   = rate;
543
0
        cand_bf->fast_chroma_rate = 0;
544
0
        return (RDCOST(lambda, rate, luma_distortion));
545
118k
    } else {
546
        // Number of bits for each synatax element
547
118k
        uint64_t       intra_mode_bits_num          = 0;
548
118k
        uint64_t       intra_luma_mode_bits_num     = 0;
549
118k
        uint64_t       intra_luma_ang_mode_bits_num = 0;
550
118k
        uint64_t       intra_filter_mode_bits_num   = 0;
551
118k
        uint64_t       skip_mode_rate               = 0;
552
118k
        const uint8_t  skip_mode_ctx                = ctx->skip_mode_ctx;
553
118k
        PredictionMode intra_mode                   = (PredictionMode)cand->block_mi.mode;
554
        // Luma and chroma rate
555
118k
        uint32_t rate;
556
118k
        uint32_t luma_rate   = 0;
557
118k
        uint32_t chroma_rate = 0;
558
118k
        intra_mode_bits_num  = pcs->slice_type != I_SLICE
559
118k
             ? (uint64_t)ctx->md_rate_est_ctx->mb_mode_fac_bits[eb_size_group_lookup[blk_geom->bsize]][intra_mode]
560
118k
             : ZERO_COST;
561
562
118k
        skip_mode_rate = pcs->slice_type != I_SLICE && pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag &&
563
0
                is_comp_ref_allowed(blk_geom->bsize)
564
118k
            ? (uint64_t)ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0]
565
118k
            : ZERO_COST;
566
        // Estimate luma nominal intra mode bits for key frame
567
118k
        intra_luma_mode_bits_num = pcs->slice_type == I_SLICE
568
118k
            ? (uint64_t)
569
118k
                  ctx->md_rate_est_ctx->y_mode_fac_bits[ctx->intra_luma_top_ctx][ctx->intra_luma_left_ctx][intra_mode]
570
118k
            : ZERO_COST;
571
        // Estimate luma angular mode bits
572
118k
        if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(cand->block_mi.mode)) {
573
0
            assert((intra_mode - V_PRED) < 8);
574
0
            assert((intra_mode - V_PRED) >= 0);
575
0
            intra_luma_ang_mode_bits_num =
576
0
                ctx->md_rate_est_ctx->angle_delta_fac_bits[intra_mode - V_PRED]
577
0
                                                          [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_Y]];
578
0
        }
579
118k
        if (svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) &&
580
0
            intra_mode == DC_PRED) {
581
0
            const int use_palette = cand->palette_info ? (cand->palette_size[0] > 0) : 0;
582
0
            const int bsize_ctx   = svt_aom_get_palette_bsize_ctx(blk_geom->bsize);
583
0
            const int mode_ctx    = svt_aom_get_palette_mode_ctx(blk_ptr->av1xd);
584
0
            intra_luma_mode_bits_num += ctx->md_rate_est_ctx->palette_ymode_fac_bits[bsize_ctx][mode_ctx][use_palette];
585
0
            if (use_palette) {
586
0
                const uint8_t* const color_map = cand->palette_info->color_idx_map;
587
0
                int                  block_width, block_height, rows, cols;
588
0
                svt_aom_get_block_dimensions(
589
0
                    blk_geom->bsize, 0, blk_ptr->av1xd, &block_width, &block_height, &rows, &cols);
590
0
                const int plt_size = cand->palette_size[0];
591
0
                int       palette_mode_cost =
592
0
                    ctx->md_rate_est_ctx->palette_ysize_fac_bits[bsize_ctx][plt_size - PALETTE_MIN_SIZE] +
593
0
                    svt_aom_write_uniform_cost(plt_size, color_map[0]);
594
0
                uint16_t  color_cache[2 * PALETTE_MAX_SIZE];
595
0
                const int n_cache = svt_get_palette_cache_y(blk_ptr->av1xd, color_cache);
596
0
                palette_mode_cost += svt_av1_palette_color_cost_y(&cand->palette_info->pmi,
597
0
                                                                  color_cache,
598
0
                                                                  cand->palette_size[0],
599
0
                                                                  n_cache,
600
0
                                                                  pcs->ppcs->scs->encoder_bit_depth);
601
0
                palette_mode_cost += svt_av1_cost_color_map(
602
0
                    cand, ctx->md_rate_est_ctx, blk_ptr, 0, blk_geom->bsize, PALETTE_MAP);
603
0
                intra_luma_mode_bits_num += palette_mode_cost;
604
0
            }
605
0
        }
606
607
118k
        if (svt_aom_filter_intra_allowed(pcs->ppcs->scs->seq_header.filter_intra_level,
608
118k
                                         blk_geom->bsize,
609
118k
                                         cand->palette_info ? cand->palette_size[0] : 0,
610
118k
                                         intra_mode)) {
611
0
            intra_filter_mode_bits_num =
612
0
                ctx->md_rate_est_ctx
613
0
                    ->filter_intra_fac_bits[blk_geom->bsize][cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES];
614
0
            if (cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) {
615
0
                intra_filter_mode_bits_num +=
616
0
                    ctx->md_rate_est_ctx->filter_intra_mode_fac_bits[cand->block_mi.filter_intra_mode];
617
0
            }
618
0
        }
619
118k
        if (ctx->has_uv) {
620
            // CFL info not known in fasta loop, so assume DC mode when CFL is allowed
621
118k
            chroma_rate = (uint32_t)svt_aom_get_intra_uv_fast_rate(pcs, ctx, cand_bf, 0);
622
118k
        }
623
624
118k
        uint32_t is_inter_rate = pcs->slice_type != I_SLICE
625
118k
            ? ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][0]
626
118k
            : 0;
627
118k
        luma_rate              = (uint32_t)(intra_mode_bits_num + skip_mode_rate + intra_luma_mode_bits_num +
628
118k
                               intra_luma_ang_mode_bits_num + is_inter_rate + intra_filter_mode_bits_num);
629
118k
        if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type)) {
630
0
            svt_aom_assert_err(cand->block_mi.use_intrabc == 0, "this block ibc should be off\n");
631
0
            luma_rate += ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc];
632
0
        }
633
        // Keep the Fast Luma and Chroma rate for future use
634
118k
        cand_bf->fast_luma_rate   = luma_rate;
635
118k
        cand_bf->fast_chroma_rate = chroma_rate;
636
118k
        rate                      = luma_rate + chroma_rate;
637
        // Assign fast cost
638
118k
        return (RDCOST(lambda, rate, luma_distortion));
639
118k
    }
640
118k
}
641
642
// This function encodes the reference frame
643
uint64_t estimate_ref_frame_type_bits(ModeDecisionContext* ctx, BlkStruct* blk_ptr, uint8_t ref_frame_type,
644
0
                                      bool is_compound) {
645
0
    uint64_t ref_rate_bits = 0;
646
647
0
    MbModeInfo* const mbmi = blk_ptr->av1xd->mi[0];
648
0
    MvReferenceFrame  ref_type[2];
649
0
    av1_set_ref_frame(ref_type, ref_frame_type);
650
0
    mbmi->block_mi.ref_frame[0] = ref_type[0];
651
0
    mbmi->block_mi.ref_frame[1] = ref_type[1];
652
    //const int is_compound = svt_aom_has_second_ref(mbmi);
653
0
    {
654
0
        if (is_compound) {
655
0
            const CompReferenceType comp_ref_type = has_uni_comp_refs(&mbmi->block_mi) ? UNIDIR_COMP_REFERENCE
656
0
                                                                                       : BIDIR_COMP_REFERENCE;
657
658
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_type_fac_bits[svt_aom_get_comp_reference_type_context_new(
659
0
                blk_ptr->av1xd)][comp_ref_type];
660
            /*aom_write_symbol(w, comp_ref_type,
661
               svt_aom_get_comp_reference_type_cdf(blk_ptr->av1xd), 2);*/
662
663
0
            if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
664
                // SVT_LOG("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n");
665
0
                const int bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME;
666
667
0
                ref_rate_bits += ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p(
668
0
                    blk_ptr->av1xd)][0][bit];
669
                // blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][0];
670
                // WRITE_REF_BIT(bit, uni_comp_ref_p);
671
672
0
                if (!bit) {
673
0
                    assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME);
674
0
                    const int bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME ||
675
0
                        mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
676
0
                    ref_rate_bits +=
677
0
                        ctx->md_rate_est_ctx
678
0
                            ->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p1(blk_ptr->av1xd)][1][bit1];
679
                    // ref_rate_d = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][1];
680
                    // WRITE_REF_BIT(bit1, uni_comp_ref_p1);
681
0
                    if (bit1) {
682
0
                        const int bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME;
683
0
                        ref_rate_bits +=
684
0
                            ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p2(
685
0
                                blk_ptr->av1xd)][2][bit2];
686
687
                        // ref_rate_e = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][2];
688
                        //WRITE_REF_BIT(bit2, uni_comp_ref_p2);
689
0
                    }
690
0
                }
691
0
                return ref_rate_bits;
692
0
            }
693
694
0
            assert(comp_ref_type == BIDIR_COMP_REFERENCE);
695
696
0
            const int bit = (mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME || mbmi->block_mi.ref_frame[0] == LAST3_FRAME);
697
0
            const int pred_ctx = svt_av1_get_pred_context_comp_ref_p(blk_ptr->av1xd);
698
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_fac_bits[pred_ctx][0][bit];
699
            // ref_rate_f = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_ctx][0];
700
            // WRITE_REF_BIT(bit, comp_ref_p);
701
702
0
            if (!bit) {
703
0
                const int bit1 = mbmi->block_mi.ref_frame[0] == LAST2_FRAME;
704
0
                ref_rate_bits += ctx->md_rate_est_ctx
705
0
                                     ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p1(blk_ptr->av1xd)][1][bit1];
706
                // ref_rate_g = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][1];
707
                // WRITE_REF_BIT(bit1, comp_ref_p1);
708
0
            } else {
709
0
                const int bit2 = mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME;
710
0
                ref_rate_bits += ctx->md_rate_est_ctx
711
0
                                     ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p2(blk_ptr->av1xd)][2][bit2];
712
                // ref_rate_h = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][2];
713
                // WRITE_REF_BIT(bit2, comp_ref_p2);
714
0
            }
715
716
0
            const int bit_bwd    = mbmi->block_mi.ref_frame[1] == ALTREF_FRAME;
717
0
            const int pred_ctx_2 = svt_av1_get_pred_context_comp_bwdref_p(blk_ptr->av1xd);
718
0
            ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[pred_ctx_2][0][bit_bwd];
719
            // ref_rate_i = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_ctx_2][0];
720
            // WRITE_REF_BIT(bit_bwd, comp_bwdref_p);
721
722
0
            if (!bit_bwd) {
723
0
                ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[svt_av1_get_pred_context_comp_bwdref_p1(
724
0
                    blk_ptr->av1xd)][1][ref_type[1] == ALTREF2_FRAME];
725
                // ref_rate_j = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_context][1];
726
                // WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1);
727
0
            }
728
0
        } else {
729
0
            const int bit0 = (mbmi->block_mi.ref_frame[0] <= ALTREF_FRAME &&
730
0
                              mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME);
731
0
            ref_rate_bits += ctx->md_rate_est_ctx
732
0
                                 ->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0][bit0];
733
            // ref_rate_k =
734
            // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0];
735
            // WRITE_REF_BIT(bit0, single_ref_p1);
736
737
0
            if (bit0) {
738
0
                const int bit1 = mbmi->block_mi.ref_frame[0] == ALTREF_FRAME;
739
0
                ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p2(
740
0
                    blk_ptr->av1xd)][1][bit1];
741
                // ref_rate_l =
742
                // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p2(blk_ptr->av1xd)][1];
743
                // WRITE_REF_BIT(bit1, single_ref_p2);
744
0
                if (!bit1) {
745
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p6(
746
0
                        blk_ptr->av1xd)][5][ref_frame_type == ALTREF2_FRAME];
747
                    // ref_rate_m =
748
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p6(blk_ptr->av1xd)][5];
749
                    // WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6);
750
0
                }
751
0
            } else {
752
0
                const int bit2 = (mbmi->block_mi.ref_frame[0] == LAST3_FRAME ||
753
0
                                  mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME);
754
0
                ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p3(
755
0
                    blk_ptr->av1xd)][2][bit2];
756
                // ref_rate_n =
757
                // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p3(blk_ptr->av1xd)][2];
758
                // WRITE_REF_BIT(bit2, single_ref_p3);
759
0
                if (!bit2) {
760
0
                    const int bit3 = mbmi->block_mi.ref_frame[0] != LAST_FRAME;
761
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p4(
762
0
                        blk_ptr->av1xd)][3][bit3];
763
                    // ref_rate_o =
764
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p4(blk_ptr->av1xd)][3];
765
                    // WRITE_REF_BIT(bit3, single_ref_p4);
766
0
                } else {
767
0
                    const int bit4 = mbmi->block_mi.ref_frame[0] != LAST3_FRAME;
768
0
                    ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p5(
769
0
                        blk_ptr->av1xd)][4][bit4];
770
                    // ref_rate_p =
771
                    // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p5(blk_ptr->av1xd)][4];
772
                    // WRITE_REF_BIT(bit4, single_ref_p5);
773
0
                }
774
0
            }
775
0
        }
776
0
    }
777
0
    return ref_rate_bits;
778
0
}
779
780
int svt_aom_get_comp_group_idx_context_enc(const MacroBlockD* xd);
781
int is_any_masked_compound_used(BlockSize bsize);
782
783
static INLINE uint32_t get_compound_mode_rate(PictureControlSet* pcs, ModeDecisionContext* ctx,
784
0
                                              ModeDecisionCandidate* cand, BlockSize bsize) {
785
0
    BlkStruct*          blk_ptr   = ctx->blk_ptr;
786
0
    SequenceControlSet* scs       = pcs->ppcs->scs;
787
0
    uint32_t            comp_rate = 0;
788
0
    MbModeInfo* const   mbmi      = blk_ptr->av1xd->mi[0];
789
0
    MvReferenceFrame    rf[2]     = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
790
0
    mbmi->block_mi.ref_frame[0]   = rf[0];
791
0
    mbmi->block_mi.ref_frame[1]   = rf[1];
792
793
    //NOTE  :  Make sure, any cuPtr data is already set before   usage
794
795
0
    if (has_second_ref(&mbmi->block_mi)) {
796
0
        const int masked_compound_used = is_any_masked_compound_used(bsize) && scs->seq_header.enable_masked_compound;
797
798
0
        if (masked_compound_used) {
799
0
            const int ctx_comp_group_idx = svt_aom_get_comp_group_idx_context_enc(blk_ptr->av1xd);
800
0
            comp_rate =
801
0
                ctx->md_rate_est_ctx->comp_group_idx_fac_bits[ctx_comp_group_idx][cand->block_mi.comp_group_idx];
802
0
        } else {
803
0
            assert(cand->block_mi.comp_group_idx == 0);
804
0
        }
805
806
0
        if (cand->block_mi.comp_group_idx == 0) {
807
0
            if (cand->block_mi.compound_idx) {
808
0
                assert(cand->block_mi.interinter_comp.type == COMPOUND_AVERAGE);
809
0
            }
810
811
0
            if (scs->seq_header.order_hint_info.enable_jnt_comp) {
812
0
                const int comp_index_ctx = svt_aom_get_comp_index_context_enc(pcs->ppcs,
813
0
                                                                              pcs->ppcs->cur_order_hint,
814
0
                                                                              pcs->ppcs->ref_order_hint[rf[0] - 1],
815
0
                                                                              pcs->ppcs->ref_order_hint[rf[1] - 1],
816
0
                                                                              blk_ptr->av1xd);
817
0
                comp_rate += ctx->md_rate_est_ctx->comp_idx_fac_bits[comp_index_ctx][cand->block_mi.compound_idx];
818
0
            } else {
819
0
                assert(cand->block_mi.compound_idx == 1);
820
0
            }
821
0
        } else {
822
0
            assert(pcs->ppcs->frm_hdr.reference_mode != SINGLE_REFERENCE &&
823
0
                   is_inter_compound_mode(cand->block_mi.mode));
824
0
            assert(masked_compound_used);
825
            // compound_diffwtd, wedge
826
0
            assert(cand->block_mi.interinter_comp.type == COMPOUND_WEDGE ||
827
0
                   cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD);
828
829
0
            if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
830
0
                comp_rate += ctx->md_rate_est_ctx
831
0
                                 ->compound_type_fac_bits[bsize][cand->block_mi.interinter_comp.type - COMPOUND_WEDGE];
832
0
            }
833
834
0
            if (cand->block_mi.interinter_comp.type == COMPOUND_WEDGE) {
835
0
                assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
836
0
                comp_rate +=
837
0
                    ctx->md_rate_est_ctx->wedge_idx_fac_bits[bsize][cand->block_mi.interinter_comp.wedge_index];
838
0
                comp_rate += av1_cost_literal(1);
839
0
            } else {
840
0
                assert(cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD);
841
0
                comp_rate += av1_cost_literal(1);
842
0
            }
843
0
        }
844
0
    }
845
846
0
    return comp_rate;
847
0
}
848
849
int32_t svt_aom_get_switchable_rate(BlockModeInfo* block_mi, const FrameHeader* const frm_hdr, ModeDecisionContext* ctx,
850
0
                                    const bool enable_dual_filter) {
851
0
    if (frm_hdr->interpolation_filter != SWITCHABLE) {
852
0
        return 0;
853
0
    }
854
855
0
    int32_t   inter_filter_cost = 0;
856
0
    const int max_dir           = enable_dual_filter ? 2 : 1;
857
0
    for (int dir = 0; dir < max_dir; ++dir) {
858
0
        const int32_t pred_ctx = svt_aom_get_pred_context_switchable_interp(
859
0
            block_mi->ref_frame[0], block_mi->ref_frame[1], ctx->blk_ptr->av1xd, dir);
860
0
        const InterpFilter filter = av1_extract_interp_filter(block_mi->interp_filters, dir);
861
0
        assert(pred_ctx < SWITCHABLE_FILTER_CONTEXTS);
862
0
        assert(filter < SWITCHABLE_FILTERS);
863
0
        inter_filter_cost += ctx->md_rate_est_ctx->switchable_interp_fac_bitss[pred_ctx][filter];
864
0
    }
865
0
    return inter_filter_cost;
866
0
}
867
868
int svt_aom_is_interintra_wedge_used(BlockSize bsize);
869
870
static uint64_t av1_inter_fast_cost_light(ModeDecisionContext* ctx, BlkStruct* blk_ptr,
871
                                          ModeDecisionCandidateBuffer* cand_bf, uint64_t luma_distortion,
872
0
                                          uint64_t lambda, PictureControlSet* pcs, CandidateMv* ref_mv_stack) {
873
0
    ModeDecisionCandidate* cand = cand_bf->cand;
874
    // NM - fast inter cost estimation
875
0
    MdRateEstimationContext* r = ctx->md_rate_est_ctx;
876
    //_mm_prefetch(p, _MM_HINT_T2);
877
    // Luma rate
878
0
    uint32_t             luma_rate           = 0;
879
0
    uint64_t             mv_rate             = 0;
880
0
    const PredictionMode inter_mode          = (PredictionMode)cand->block_mi.mode;
881
0
    const uint8_t        have_nearmv         = have_nearmv_in_inter_mode(inter_mode);
882
0
    uint64_t             inter_mode_bits_num = 0;
883
0
    const uint8_t        skip_mode_ctx       = ctx->skip_mode_ctx;
884
0
    MvReferenceFrame     rf[2]               = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
885
0
    const int8_t         ref_frame_type      = av1_ref_frame_type(rf);
886
0
    const uint8_t        is_compound         = is_inter_compound_mode(cand->block_mi.mode);
887
0
    const uint32_t       mode_context        = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf);
888
0
    uint64_t             reference_picture_bits_num = 0;
889
0
    if (ctx->approx_inter_rate < 2) {
890
0
        reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type];
891
0
    }
892
0
    if (is_compound) {
893
0
        assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES);
894
0
        inter_mode_bits_num += r->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)];
895
0
    } else {
896
0
        int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
897
        //aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2);
898
0
        inter_mode_bits_num += r->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
899
0
        if (inter_mode != NEWMV) {
900
0
            const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
901
            //aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx], 2);
902
0
            inter_mode_bits_num += r->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV];
903
0
            if (inter_mode != GLOBALMV) {
904
0
                int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
905
                /*aom_write_symbol(ec_writer, mode != NEARESTMV, frame_context->refmv_cdf[refmv_ctx], 2);*/
906
0
                inter_mode_bits_num += r->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV];
907
0
            }
908
0
        }
909
0
    }
910
0
    if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv) {
911
        //drLIdex cost estimation
912
0
        const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
913
0
        if (new_mv) {
914
0
            int32_t idx;
915
0
            for (idx = 0; idx < 2; ++idx) {
916
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
917
0
                    uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx);
918
0
                    inter_mode_bits_num += r->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx];
919
0
                    if (cand->drl_index == idx) {
920
0
                        break;
921
0
                    }
922
0
                }
923
0
            }
924
0
        }
925
0
        if (have_nearmv) {
926
0
            int32_t idx;
927
0
            for (idx = 1; idx < 3; ++idx) {
928
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
929
0
                    uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx);
930
0
                    inter_mode_bits_num += r->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)];
931
0
                    if (cand->drl_index == (idx - 1)) {
932
0
                        break;
933
0
                    }
934
0
                }
935
0
            }
936
0
        }
937
0
    }
938
0
    if (svt_aom_have_newmv_in_inter_mode(inter_mode)) {
939
0
        const uint16_t factor = pcs->ppcs->frm_hdr.allow_screen_content_tools ? 20 : 50;
940
0
        if (is_compound) {
941
0
            mv_rate = 0;
942
0
            if (inter_mode == NEW_NEWMV) {
943
0
                for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) {
944
0
                    Mv             mv         = cand->block_mi.mv[ref_list_idx];
945
0
                    Mv             ref_mv     = cand->pred_mv[ref_list_idx];
946
0
                    const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
947
0
                    const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
948
0
                    mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
949
0
                }
950
0
            } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
951
                // New MV is second ref
952
0
                Mv             mv         = cand->block_mi.mv[1];
953
0
                Mv             ref_mv     = cand->pred_mv[1];
954
0
                const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
955
0
                const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
956
0
                mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
957
0
            } else {
958
0
                assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
959
                // New MV is first ref
960
0
                Mv             mv         = cand->block_mi.mv[0];
961
0
                Mv             ref_mv     = cand->pred_mv[0];
962
0
                const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
963
0
                const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
964
0
                mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
965
0
            }
966
0
        } else {
967
0
            assert(!is_compound); // single ref inter prediction
968
            // unipred MV stored in idx0
969
0
            Mv             mv         = cand->block_mi.mv[0];
970
0
            Mv             ref_mv     = cand->pred_mv[0];
971
0
            const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x);
972
0
            const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y);
973
0
            mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy));
974
0
        }
975
0
    }
976
    // Get the interpolation filter rate if IFS is performed at MDS0.  Otherwise, the filter is unknown, so the rate will be updated after IFS is performed.
977
0
    uint32_t ifs_rate = 0;
978
0
    if (ctx->ifs_ctrls.level == IFS_MDS0 &&
979
0
        av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) &&
980
0
        pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE) {
981
0
        ifs_rate = svt_aom_get_switchable_rate(
982
0
            &cand_bf->cand->block_mi, &pcs->ppcs->frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter);
983
0
    }
984
0
    uint32_t is_inter_rate = r->intra_inter_fac_bits[ctx->is_inter_ctx][1];
985
986
0
    uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag &&
987
0
            is_comp_ref_allowed(ctx->blk_geom->bsize)
988
0
        ? r->skip_mode_fac_bits[skip_mode_ctx][0]
989
0
        : 0;
990
0
    luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate +
991
0
                           ifs_rate);
992
    // Keep the Fast Luma and Chroma rate for future use
993
0
    cand_bf->fast_luma_rate   = luma_rate;
994
0
    cand_bf->fast_chroma_rate = 0;
995
    // Assign fast cost
996
0
    if (cand->skip_mode_allowed) {
997
0
        skip_mode_rate = r->skip_mode_fac_bits[skip_mode_ctx][1];
998
0
        if (skip_mode_rate < luma_rate) {
999
0
            return (RDCOST(lambda, skip_mode_rate, luma_distortion));
1000
0
        }
1001
0
    }
1002
0
    return (RDCOST(lambda, luma_rate, luma_distortion));
1003
0
}
1004
1005
uint64_t svt_aom_inter_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1006
0
                                 uint64_t lambda, uint64_t luma_distortion) {
1007
0
    const BlockGeom*       blk_geom       = ctx->blk_geom;
1008
0
    BlkStruct*             blk_ptr        = ctx->blk_ptr;
1009
0
    ModeDecisionCandidate* cand           = cand_bf->cand;
1010
0
    MvReferenceFrame       rf[2]          = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]};
1011
0
    const int8_t           ref_frame_type = av1_ref_frame_type(cand->block_mi.ref_frame);
1012
0
    CandidateMv*           ref_mv_stack   = &(ctx->ref_mv_stack[ref_frame_type][0]);
1013
1014
0
    if (ctx->approx_inter_rate) {
1015
0
        return av1_inter_fast_cost_light(ctx, blk_ptr, cand_bf, luma_distortion, lambda, pcs, ref_mv_stack);
1016
0
    }
1017
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
1018
1019
    // Luma rate
1020
0
    uint32_t       luma_rate  = 0;
1021
0
    uint64_t       mv_rate    = 0;
1022
0
    PredictionMode inter_mode = (PredictionMode)cand->block_mi.mode;
1023
1024
0
    uint64_t inter_mode_bits_num = 0;
1025
1026
0
    const uint8_t skip_mode_ctx              = ctx->skip_mode_ctx;
1027
0
    const uint8_t is_compound                = is_inter_compound_mode(cand->block_mi.mode);
1028
0
    uint32_t      mode_context               = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf);
1029
0
    uint64_t      reference_picture_bits_num = 0;
1030
1031
    //Reference Type and Mode Bit estimation
1032
0
    reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type];
1033
0
    if (is_compound) {
1034
0
        assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES);
1035
0
        inter_mode_bits_num +=
1036
0
            ctx->md_rate_est_ctx->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)];
1037
0
    } else {
1038
        // uint32_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
1039
        // inter_mode_bits_num = cand_bf->cand->md_rate_est_ctx->new_mv_mode_fac_bits[mode_ctx][0];
1040
1041
0
        int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK;
1042
        // aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2);
1043
0
        inter_mode_bits_num += ctx->md_rate_est_ctx->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV];
1044
0
        if (inter_mode != NEWMV) {
1045
0
            const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
1046
            // aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx],
1047
            // 2);
1048
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV];
1049
0
            if (inter_mode != GLOBALMV) {
1050
0
                int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
1051
                /*aom_write_symbol(ec_writer, mode != NEARESTMV,
1052
                 * frame_context->refmv_cdf[refmv_ctx], 2);*/
1053
0
                inter_mode_bits_num += ctx->md_rate_est_ctx->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV];
1054
0
            }
1055
0
        }
1056
0
    }
1057
0
    if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) {
1058
        //drLIdex cost estimation
1059
0
        const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV;
1060
0
        if (new_mv) {
1061
0
            int32_t idx;
1062
0
            for (idx = 0; idx < 2; ++idx) {
1063
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
1064
0
                    uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx);
1065
0
                    inter_mode_bits_num += ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx];
1066
0
                    if (cand->drl_index == idx) {
1067
0
                        break;
1068
0
                    }
1069
0
                }
1070
0
            }
1071
0
        }
1072
1073
0
        if (have_nearmv_in_inter_mode(inter_mode)) {
1074
0
            int32_t idx;
1075
0
            for (idx = 1; idx < 3; ++idx) {
1076
0
                if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) {
1077
0
                    uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx);
1078
0
                    inter_mode_bits_num +=
1079
0
                        ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)];
1080
1081
0
                    if (cand->drl_index == (idx - 1)) {
1082
0
                        break;
1083
0
                    }
1084
0
                }
1085
0
            }
1086
0
        }
1087
0
    }
1088
1089
0
    if (svt_aom_have_newmv_in_inter_mode(inter_mode)) {
1090
0
        if (is_compound) {
1091
0
            mv_rate = 0;
1092
1093
0
            if (inter_mode == NEW_NEWMV) {
1094
0
                for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) {
1095
0
                    Mv mv     = cand->block_mi.mv[ref_list_idx];
1096
0
                    Mv ref_mv = cand->pred_mv[ref_list_idx];
1097
0
                    mv_rate += svt_av1_mv_bit_cost(&mv,
1098
0
                                                   &ref_mv,
1099
0
                                                   ctx->md_rate_est_ctx->nmv_vec_cost,
1100
0
                                                   ctx->md_rate_est_ctx->nmvcoststack,
1101
0
                                                   MV_COST_WEIGHT);
1102
0
                }
1103
0
            } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) {
1104
0
                Mv mv     = cand->block_mi.mv[1];
1105
0
                Mv ref_mv = cand->pred_mv[1];
1106
0
                mv_rate += svt_av1_mv_bit_cost(&mv,
1107
0
                                               &ref_mv,
1108
0
                                               ctx->md_rate_est_ctx->nmv_vec_cost,
1109
0
                                               ctx->md_rate_est_ctx->nmvcoststack,
1110
0
                                               MV_COST_WEIGHT);
1111
0
            } else {
1112
0
                assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV);
1113
0
                Mv mv     = cand->block_mi.mv[0];
1114
0
                Mv ref_mv = cand->pred_mv[0];
1115
0
                mv_rate += svt_av1_mv_bit_cost(&mv,
1116
0
                                               &ref_mv,
1117
0
                                               ctx->md_rate_est_ctx->nmv_vec_cost,
1118
0
                                               ctx->md_rate_est_ctx->nmvcoststack,
1119
0
                                               MV_COST_WEIGHT);
1120
0
            }
1121
0
        } else {
1122
0
            assert(!is_compound); // single ref inter prediction
1123
            // unipred MVs stored in idx0
1124
0
            Mv mv     = cand->block_mi.mv[0];
1125
0
            Mv ref_mv = cand->pred_mv[0];
1126
0
            mv_rate   = svt_av1_mv_bit_cost(
1127
0
                &mv, &ref_mv, ctx->md_rate_est_ctx->nmv_vec_cost, ctx->md_rate_est_ctx->nmvcoststack, MV_COST_WEIGHT);
1128
0
        }
1129
0
    }
1130
    // inter intra mode rate
1131
0
    if (pcs->ppcs->scs->seq_header.enable_interintra_compound &&
1132
        /* Check if inter-intra is allowed for current block size / mode (even if the feature is off
1133
        * for the current block, we still need to signal inter-intra off.
1134
        */
1135
0
        svt_is_interintra_allowed(true, blk_geom->bsize, cand->block_mi.mode, rf)) {
1136
0
        const int interintra  = cand->block_mi.is_interintra_used;
1137
0
        const int bsize_group = eb_size_group_lookup[blk_geom->bsize];
1138
1139
0
        inter_mode_bits_num +=
1140
0
            ctx->md_rate_est_ctx->inter_intra_fac_bits[bsize_group][cand->block_mi.is_interintra_used];
1141
1142
0
        if (interintra) {
1143
0
            inter_mode_bits_num +=
1144
0
                ctx->md_rate_est_ctx->inter_intra_mode_fac_bits[bsize_group][cand->block_mi.interintra_mode];
1145
1146
0
            if (svt_aom_is_interintra_wedge_used(blk_geom->bsize)) {
1147
0
                inter_mode_bits_num +=
1148
0
                    ctx->md_rate_est_ctx
1149
0
                        ->wedge_inter_intra_fac_bits[blk_geom->bsize][cand->block_mi.use_wedge_interintra];
1150
1151
0
                if (cand->block_mi.use_wedge_interintra) {
1152
0
                    inter_mode_bits_num +=
1153
0
                        ctx->md_rate_est_ctx
1154
0
                            ->wedge_idx_fac_bits[blk_geom->bsize][cand->block_mi.interintra_wedge_index];
1155
0
                }
1156
0
            }
1157
0
        }
1158
0
    }
1159
0
    if (is_inter_singleref_mode(inter_mode) && frm_hdr->is_motion_mode_switchable && rf[1] != INTRA_FRAME) {
1160
0
        assert(!cand->block_mi.is_interintra_used);
1161
0
        const MotionMode motion_mode_rd           = cand->block_mi.motion_mode;
1162
0
        const BlockSize  bsize                    = blk_geom->bsize;
1163
0
        const MotionMode last_motion_mode_allowed = svt_aom_motion_mode_allowed(
1164
0
            pcs, cand->block_mi.num_proj_ref, blk_ptr->overlappable_neighbors, bsize, rf[0], rf[1], inter_mode);
1165
0
        switch (last_motion_mode_allowed) {
1166
0
        case SIMPLE_TRANSLATION:
1167
0
            break;
1168
0
        case OBMC_CAUSAL:
1169
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits1[bsize][motion_mode_rd == OBMC_CAUSAL];
1170
0
            break;
1171
0
        default:
1172
0
            inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits[bsize][motion_mode_rd];
1173
0
        }
1174
0
    }
1175
    // this func return 0 if masked=0 and distance=0
1176
0
    inter_mode_bits_num += get_compound_mode_rate(pcs, ctx, cand, blk_geom->bsize);
1177
    // Get the interpolation filter rate if IFS is performed at MDS0.  Otherwise, the filter is unknown, so the rate will be updated after IFS is performed.
1178
0
    uint32_t ifs_rate = 0;
1179
0
    if (ctx->ifs_ctrls.level == IFS_MDS0 &&
1180
0
        av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) &&
1181
0
        frm_hdr->interpolation_filter == SWITCHABLE) {
1182
0
        ifs_rate = svt_aom_get_switchable_rate(
1183
0
            &cand_bf->cand->block_mi, frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter);
1184
0
    }
1185
0
    uint32_t is_inter_rate  = ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][1];
1186
0
    uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && is_comp_ref_allowed(blk_geom->bsize)
1187
0
        ? ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0]
1188
0
        : 0;
1189
0
    luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate +
1190
0
                           ifs_rate);
1191
    // Keep the Fast Luma and Chroma rate for future use
1192
0
    cand_bf->fast_luma_rate   = luma_rate;
1193
0
    cand_bf->fast_chroma_rate = 0;
1194
    // Assign fast cost
1195
0
    if (cand->skip_mode_allowed) {
1196
0
        skip_mode_rate = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1];
1197
0
        if (skip_mode_rate < luma_rate) {
1198
0
            return (RDCOST(lambda, skip_mode_rate, luma_distortion));
1199
0
        }
1200
0
    }
1201
0
    return (RDCOST(lambda, luma_rate, luma_distortion));
1202
0
}
1203
1204
/*
1205
 */
1206
EbErrorType svt_aom_txb_estimate_coeff_bits_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1207
                                                      uint32_t txb_origin_index, EbPictureBufferDesc* coeff_buffer_sb,
1208
0
                                                      uint32_t y_eob, uint64_t* y_txb_coeff_bits, TxSize txsize) {
1209
0
    if (y_eob) {
1210
0
        *y_txb_coeff_bits = svt_av1_cost_coeffs_txb(
1211
0
            ctx,
1212
0
            0,
1213
0
            0,
1214
0
            cand_bf,
1215
0
            (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)],
1216
0
            (uint16_t)y_eob,
1217
0
            PLANE_TYPE_Y,
1218
0
            txsize,
1219
0
            DCT_DCT,
1220
0
            0,
1221
0
            0,
1222
0
            0);
1223
1224
0
        *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step;
1225
1226
0
    } else {
1227
0
        *y_txb_coeff_bits = av1_cost_skip_txb(ctx, 0, 0, txsize, PLANE_TYPE_Y, 0);
1228
0
    }
1229
1230
0
    return EB_ErrorNone;
1231
0
}
1232
1233
EbErrorType svt_aom_txb_estimate_coeff_bits(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx,
1234
                                            PictureControlSet* pcs, ModeDecisionCandidateBuffer* cand_bf,
1235
                                            uint32_t txb_origin_index, uint32_t txb_chroma_origin_index,
1236
                                            EbPictureBufferDesc* coeff_buffer_sb, uint32_t y_eob, uint32_t cb_eob,
1237
                                            uint32_t cr_eob, uint64_t* y_txb_coeff_bits, uint64_t* cb_txb_coeff_bits,
1238
                                            uint64_t* cr_txb_coeff_bits, TxSize txsize, TxSize txsize_uv,
1239
0
                                            TxType tx_type, TxType tx_type_uv, COMPONENT_TYPE component_type) {
1240
0
    EbErrorType return_error = EB_ErrorNone;
1241
1242
0
    FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr;
1243
1244
0
    int32_t* coeff_buffer;
1245
0
    int16_t  luma_txb_skip_context = ctx->luma_txb_skip_context;
1246
0
    int16_t  luma_dc_sign_context  = ctx->luma_dc_sign_context;
1247
0
    int16_t  cb_txb_skip_context   = ctx->cb_txb_skip_context;
1248
0
    int16_t  cb_dc_sign_context    = ctx->cb_dc_sign_context;
1249
0
    int16_t  cr_txb_skip_context   = ctx->cr_txb_skip_context;
1250
0
    int16_t  cr_dc_sign_context    = ctx->cr_dc_sign_context;
1251
1252
0
    bool reduced_transform_set_flag = frm_hdr->reduced_tx_set ? true : false;
1253
1254
    //Estimate the rate of the transform type and coefficient for Luma
1255
1256
0
    if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) {
1257
0
        if (y_eob) {
1258
0
            coeff_buffer = (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)];
1259
1260
0
            *y_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1261
0
                                                        allow_update_cdf,
1262
0
                                                        ec_ctx,
1263
0
                                                        cand_bf,
1264
0
                                                        coeff_buffer,
1265
0
                                                        (uint16_t)y_eob,
1266
0
                                                        PLANE_TYPE_Y,
1267
0
                                                        txsize,
1268
0
                                                        tx_type,
1269
0
                                                        luma_txb_skip_context,
1270
0
                                                        luma_dc_sign_context,
1271
0
                                                        reduced_transform_set_flag);
1272
0
            *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step;
1273
0
        } else {
1274
0
            *y_txb_coeff_bits = av1_cost_skip_txb(
1275
0
                ctx, allow_update_cdf, ec_ctx, txsize, PLANE_TYPE_Y, luma_txb_skip_context);
1276
0
        }
1277
0
    }
1278
    // Estimate the rate of the transform type and coefficient for chroma Cb
1279
1280
0
    if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA ||
1281
0
        component_type == COMPONENT_ALL) {
1282
0
        if (cb_eob) {
1283
0
            coeff_buffer = (int32_t*)&coeff_buffer_sb->u_buffer[txb_chroma_origin_index * sizeof(int32_t)];
1284
1285
0
            *cb_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1286
0
                                                         allow_update_cdf,
1287
0
                                                         ec_ctx,
1288
0
                                                         cand_bf,
1289
0
                                                         coeff_buffer,
1290
0
                                                         (uint16_t)cb_eob,
1291
0
                                                         PLANE_TYPE_UV,
1292
0
                                                         txsize_uv,
1293
0
                                                         tx_type_uv,
1294
0
                                                         cb_txb_skip_context,
1295
0
                                                         cb_dc_sign_context,
1296
0
                                                         reduced_transform_set_flag);
1297
0
        } else {
1298
0
            *cb_txb_coeff_bits = av1_cost_skip_txb(
1299
0
                ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cb_txb_skip_context);
1300
0
        }
1301
0
    }
1302
1303
0
    if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA ||
1304
0
        component_type == COMPONENT_ALL) {
1305
        //Estimate the rate of the transform type and coefficient for chroma Cr
1306
0
        if (cr_eob) {
1307
0
            coeff_buffer = (int32_t*)&coeff_buffer_sb->v_buffer[txb_chroma_origin_index * sizeof(int32_t)];
1308
1309
0
            *cr_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx,
1310
0
                                                         allow_update_cdf,
1311
0
                                                         ec_ctx,
1312
0
                                                         cand_bf,
1313
0
                                                         coeff_buffer,
1314
0
                                                         (uint16_t)cr_eob,
1315
0
                                                         PLANE_TYPE_UV,
1316
0
                                                         txsize_uv,
1317
0
                                                         tx_type_uv,
1318
0
                                                         cr_txb_skip_context,
1319
0
                                                         cr_dc_sign_context,
1320
0
                                                         reduced_transform_set_flag);
1321
0
        } else {
1322
0
            *cr_txb_coeff_bits = av1_cost_skip_txb(
1323
0
                ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cr_txb_skip_context);
1324
0
        }
1325
0
    }
1326
1327
0
    return return_error;
1328
0
}
1329
1330
EbErrorType svt_aom_full_cost_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1331
7.29k
                                        uint64_t* y_distortion, uint64_t lambda, uint64_t* y_coeff_bits) {
1332
7.29k
    EbErrorType return_error = EB_ErrorNone;
1333
1334
7.29k
    uint64_t coeff_rate = (*y_coeff_bits + (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[0][0]);
1335
1336
    // Assign full cost
1337
    // Use context index 0 for the partition rate as an approximation to skip call to
1338
    // av1_partition_rate_cost Partition cost is only needed for > 4x4 blocks, but light-PD0 assumes
1339
    // 4x4 blocks are disallowed
1340
7.29k
    *(cand_bf->full_cost) = RDCOST(
1341
7.29k
        lambda, coeff_rate + ctx->md_rate_est_ctx->partition_fac_bits[0][PARTITION_NONE], y_distortion[0]);
1342
7.29k
    return return_error;
1343
7.29k
}
1344
1345
/*********************************************************************************
1346
 * svt_aom_av1_full_cost function is used to estimate the cost of a candidate mode
1347
 * for full mode decision module.
1348
 **********************************************************************************/
1349
void svt_aom_full_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf,
1350
                       uint64_t lambda, uint64_t y_distortion[DIST_TOTAL][DIST_CALC_TOTAL],
1351
                       uint64_t cb_distortion[DIST_TOTAL][DIST_CALC_TOTAL],
1352
                       uint64_t cr_distortion[DIST_TOTAL][DIST_CALC_TOTAL], uint64_t* y_coeff_bits,
1353
228k
                       uint64_t* cb_coeff_bits, uint64_t* cr_coeff_bits) {
1354
228k
    const uint8_t skip_coeff_ctx        = ctx->skip_coeff_ctx;
1355
228k
    const bool    update_full_cost_ssim = ctx->tune_ssim_level > SSIM_LVL_0 ? true : false;
1356
1357
    // Get the TX size rate for skip and non-skip block. Need both to make non-skip decision
1358
228k
    uint64_t non_skip_tx_size_bits = 0, skip_tx_size_bits = 0;
1359
228k
    if (!ctx->shut_fast_rate && pcs->ppcs->frm_hdr.tx_mode == TX_MODE_SELECT) {
1360
117k
        if (cand_bf->block_has_coeff) {
1361
5.54k
            non_skip_tx_size_bits = svt_aom_get_tx_size_bits(
1362
5.54k
                cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 1);
1363
5.54k
        }
1364
1365
117k
        skip_tx_size_bits = svt_aom_get_tx_size_bits(
1366
117k
            cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 0);
1367
117k
    }
1368
1369
228k
    assert(IMPLIES(is_inter_mode(cand_bf->cand->block_mi.mode), skip_tx_size_bits == 0));
1370
1371
    // Decide if block should be signalled as skip (send no coeffs)
1372
228k
    if (!svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && ctx->blk_skip_decision &&
1373
7.29k
        cand_bf->block_has_coeff && is_inter_mode(cand_bf->cand->block_mi.mode)) {
1374
0
        const uint64_t non_skip_cost = RDCOST(
1375
0
            lambda,
1376
0
            (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits +
1377
0
             (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]),
1378
0
            (y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0]));
1379
1380
0
        const uint64_t skip_cost = RDCOST(
1381
0
            lambda,
1382
0
            ((uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1]) + skip_tx_size_bits,
1383
0
            (y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] + cr_distortion[DIST_SSD][1]));
1384
1385
        // Update signals to correspond to skip_mode values (no coeffs, etc.)
1386
0
        if (skip_cost < non_skip_cost) {
1387
0
            y_distortion[DIST_SSD][0]  = y_distortion[DIST_SSD][1];
1388
0
            cb_distortion[DIST_SSD][0] = cb_distortion[DIST_SSD][1];
1389
0
            cr_distortion[DIST_SSD][0] = cr_distortion[DIST_SSD][1];
1390
1391
0
            y_distortion[DIST_SSIM][0]  = y_distortion[DIST_SSIM][1];
1392
0
            cb_distortion[DIST_SSIM][0] = cb_distortion[DIST_SSIM][1];
1393
0
            cr_distortion[DIST_SSIM][0] = cr_distortion[DIST_SSIM][1];
1394
0
            cand_bf->block_has_coeff    = 0;
1395
0
            cand_bf->y_has_coeff        = 0;
1396
0
            cand_bf->u_has_coeff        = 0;
1397
0
            cand_bf->v_has_coeff        = 0;
1398
0
            cand_bf->cnt_nz_coeff       = 0;
1399
1400
            // For inter modes, signalling skip means no TX depth is used and the TX type will be DCT_DCT
1401
0
            cand_bf->cand->block_mi.tx_depth = 0;
1402
0
            cand_bf->cand->transform_type_uv = DCT_DCT;
1403
0
            memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0]));
1404
0
            memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData));
1405
0
            memset(&cand_bf->eob, 0, sizeof(EobData));
1406
0
        }
1407
0
    }
1408
1409
228k
    uint64_t coeff_rate = 0;
1410
228k
    if (cand_bf->block_has_coeff) {
1411
7.46k
        coeff_rate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits +
1412
7.46k
                      (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]);
1413
220k
    } else {
1414
220k
        coeff_rate = ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1] + skip_tx_size_bits;
1415
220k
    }
1416
1417
228k
    uint64_t mode_rate            = cand_bf->fast_luma_rate + cand_bf->fast_chroma_rate + coeff_rate;
1418
228k
    uint64_t mode_distortion      = y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0];
1419
228k
    uint64_t mode_ssim_distortion = update_full_cost_ssim
1420
228k
        ? y_distortion[DIST_SSIM][0] + cb_distortion[DIST_SSIM][0] + cr_distortion[DIST_SSIM][0]
1421
228k
        : 0;
1422
228k
    uint64_t mode_cost            = RDCOST(lambda, mode_rate, mode_distortion);
1423
1424
    // If skip_mode is allowed for this candidate, check cost of skip mode compared to regular cost
1425
228k
    if (cand_bf->cand->skip_mode_allowed == true) {
1426
0
        const uint8_t skip_mode_ctx = ctx->skip_mode_ctx;
1427
1428
        // Skip mode cost
1429
0
        const uint64_t skip_mode_rate       = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1];
1430
0
        const uint64_t skip_mode_distortion = y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] +
1431
0
            cr_distortion[DIST_SSD][1];
1432
0
        const uint64_t skip_mode_ssim_distortion = update_full_cost_ssim
1433
0
            ? y_distortion[DIST_SSIM][1] + cb_distortion[DIST_SSIM][1] + cr_distortion[DIST_SSIM][1]
1434
0
            : 0;
1435
0
        const uint64_t skip_mode_cost            = RDCOST(lambda, skip_mode_rate, skip_mode_distortion);
1436
1437
0
        cand_bf->cand->block_mi.skip_mode = false;
1438
0
        if (skip_mode_cost <= mode_cost) {
1439
            // Update candidate cost
1440
0
            mode_cost                         = skip_mode_cost;
1441
0
            mode_rate                         = skip_mode_rate;
1442
0
            mode_distortion                   = skip_mode_distortion;
1443
0
            mode_ssim_distortion              = skip_mode_ssim_distortion;
1444
0
            cand_bf->cand->block_mi.skip_mode = true;
1445
1446
            // Update signals to correspond to skip_mode values (no coeffs, etc.)
1447
0
            cand_bf->block_has_coeff         = 0;
1448
0
            cand_bf->y_has_coeff             = 0;
1449
0
            cand_bf->u_has_coeff             = 0;
1450
0
            cand_bf->v_has_coeff             = 0;
1451
0
            cand_bf->cnt_nz_coeff            = 0;
1452
0
            cand_bf->cand->block_mi.tx_depth = 0;
1453
0
            memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0]));
1454
0
            cand_bf->cand->transform_type_uv = DCT_DCT;
1455
0
            memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData));
1456
0
            memset(&cand_bf->eob, 0, sizeof(EobData));
1457
0
        }
1458
0
    }
1459
1460
    // Assign full cost
1461
228k
    *(cand_bf->full_cost) = mode_cost;
1462
228k
    cand_bf->total_rate   = mode_rate;
1463
228k
    cand_bf->full_dist    = (uint32_t)mode_distortion;
1464
228k
    if (update_full_cost_ssim) {
1465
0
        assert(ctx->pd_pass == PD_PASS_1);
1466
0
        assert(ctx->md_stage == MD_STAGE_3);
1467
0
        *(cand_bf->full_cost_ssim) = RDCOST(lambda, mode_rate, mode_ssim_distortion);
1468
0
    }
1469
228k
    return;
1470
228k
}
1471
1472
/************************************************************
1473
 * Coding Loop Context Generation
1474
 ************************************************************/
1475
224k
void svt_aom_coding_loop_context_generation(PictureControlSet* pcs, ModeDecisionContext* ctx) {
1476
224k
    BlkStruct*   blk_ptr = ctx->blk_ptr;
1477
224k
    MacroBlockD* xd      = blk_ptr->av1xd;
1478
224k
    if (!ctx->shut_fast_rate) {
1479
116k
        if (pcs->slice_type == I_SLICE) {
1480
116k
            svt_aom_get_kf_y_mode_ctx(xd, &ctx->intra_luma_top_ctx, &ctx->intra_luma_left_ctx);
1481
116k
        }
1482
116k
        ctx->is_inter_ctx  = svt_av1_get_intra_inter_context(xd);
1483
116k
        ctx->skip_mode_ctx = av1_get_skip_mode_context(xd);
1484
116k
    }
1485
    // Collect Neighbor ref cout
1486
224k
#if OPT_APPROX_COEFF_RATE
1487
    // At approx_inter_rate>=2, estimate_ref_frames_num_bits is skipped so ref counts
1488
    // are not consumed in MD. EC has its own call to collect_neighbors_ref_counts_new.
1489
225k
    if ((pcs->slice_type != I_SLICE || pcs->ppcs->frm_hdr.allow_intrabc) && ctx->approx_inter_rate < 2) {
1490
#else
1491
    if (pcs->slice_type != I_SLICE || pcs->ppcs->frm_hdr.allow_intrabc) {
1492
#endif
1493
0
        svt_aom_collect_neighbors_ref_counts_new(blk_ptr->av1xd);
1494
0
    }
1495
1496
    // Skip Coeff Context
1497
224k
    ctx->skip_coeff_ctx = ctx->rate_est_ctrls.update_skip_coeff_ctx ? av1_get_skip_context(xd) : 0;
1498
224k
}
1499
1500
357k
static INLINE int block_signals_txsize(BlockSize bsize) {
1501
357k
    return bsize > BLOCK_4X4;
1502
357k
}
1503
1504
0
static INLINE int get_vartx_max_txsize(/*const MbModeInfo *xd,*/ BlockSize bsize, int plane) {
1505
    /* if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;*/
1506
0
    const TxSize max_txsize = blocksize_to_txsize[bsize];
1507
0
    if (plane == 0) {
1508
0
        return max_txsize; // luma
1509
0
    }
1510
0
    return av1_get_adjusted_tx_size(max_txsize); // chroma
1511
0
}
1512
1513
0
static INLINE int max_block_wide(const MacroBlockD* xd, BlockSize bsize, int plane) {
1514
0
    int max_blocks_wide = block_size_wide[bsize];
1515
1516
0
    if (xd->mb_to_right_edge < 0) {
1517
0
        max_blocks_wide += gcc_right_shift(xd->mb_to_right_edge, 3 + !!plane);
1518
0
    }
1519
1520
    // Scale the width in the transform block unit.
1521
0
    return max_blocks_wide >> tx_size_wide_log2[0];
1522
0
}
1523
1524
0
static INLINE int max_block_high(const MacroBlockD* xd, BlockSize bsize, int plane) {
1525
0
    int max_blocks_high = block_size_high[bsize];
1526
1527
0
    if (xd->mb_to_bottom_edge < 0) {
1528
0
        max_blocks_high += gcc_right_shift(xd->mb_to_bottom_edge, 3 + !!plane);
1529
0
    }
1530
1531
    // Scale the height in the transform block unit.
1532
0
    return max_blocks_high >> tx_size_high_log2[0];
1533
0
}
1534
1535
static INLINE void txfm_partition_update(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, TxSize tx_size,
1536
0
                                         TxSize txb_size) {
1537
0
    BlockSize bsize = txsize_to_bsize[txb_size];
1538
0
    assert(bsize < BLOCK_SIZES_ALL);
1539
0
    int     bh  = mi_size_high[bsize];
1540
0
    int     bw  = mi_size_wide[bsize];
1541
0
    uint8_t txw = tx_size_wide[tx_size];
1542
0
    uint8_t txh = tx_size_high[tx_size];
1543
0
    int     i;
1544
0
    for (i = 0; i < bh; ++i) {
1545
0
        left_ctx[i] = txh;
1546
0
    }
1547
0
    for (i = 0; i < bw; ++i) {
1548
0
        above_ctx[i] = txw;
1549
0
    }
1550
0
}
1551
1552
0
static INLINE TxSize get_sqr_tx_size(int tx_dim) {
1553
0
    switch (tx_dim) {
1554
0
    case 128:
1555
0
    case 64:
1556
0
        return TX_64X64;
1557
0
        break;
1558
0
    case 32:
1559
0
        return TX_32X32;
1560
0
        break;
1561
0
    case 16:
1562
0
        return TX_16X16;
1563
0
        break;
1564
0
    case 8:
1565
0
        return TX_8X8;
1566
0
        break;
1567
0
    default:
1568
0
        return TX_4X4;
1569
0
    }
1570
0
}
1571
1572
static INLINE int txfm_partition_context(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, BlockSize bsize,
1573
0
                                         TxSize tx_size) {
1574
0
    const uint8_t txw      = tx_size_wide[tx_size];
1575
0
    const uint8_t txh      = tx_size_high[tx_size];
1576
0
    const int     above    = *above_ctx < txw;
1577
0
    const int     left     = *left_ctx < txh;
1578
0
    int           category = TXFM_PARTITION_CONTEXTS;
1579
1580
    // dummy return, not used by others.
1581
0
    if (tx_size == TX_4X4) {
1582
0
        return 0;
1583
0
    }
1584
1585
0
    TxSize max_tx_size = get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize]));
1586
1587
0
    if (max_tx_size >= TX_8X8) {
1588
0
        category = (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) +
1589
0
            (TX_SIZES - 1 - max_tx_size) * 2;
1590
0
    }
1591
0
    assert(category != TXFM_PARTITION_CONTEXTS);
1592
0
    return category * 3 + above + left;
1593
0
}
1594
1595
static uint64_t cost_tx_size_vartx(MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, int depth, int blk_row,
1596
                                   int blk_col, MdRateEstimationContext* md_rate_est_ctx, FRAME_CONTEXT* ec_ctx,
1597
0
                                   uint8_t allow_update_cdf) {
1598
0
    uint64_t  bits            = 0;
1599
0
    const int max_blocks_high = max_block_high(xd, mbmi->bsize, 0);
1600
0
    const int max_blocks_wide = max_block_wide(xd, mbmi->bsize, 0);
1601
1602
0
    if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) {
1603
0
        return bits;
1604
0
    }
1605
1606
0
    if (depth == MAX_VARTX_DEPTH) {
1607
0
        txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size);
1608
1609
0
        return bits;
1610
0
    }
1611
1612
0
    const int ctx = txfm_partition_context(
1613
0
        xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, mbmi->bsize, tx_size);
1614
0
    const int write_txfm_partition = (tx_size == tx_depth_to_tx_size[mbmi->block_mi.tx_depth][mbmi->bsize]);
1615
0
    if (write_txfm_partition) {
1616
0
        bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][0];
1617
1618
0
        if (allow_update_cdf) {
1619
0
            update_cdf(ec_ctx->txfm_partition_cdf[ctx], 0, 2);
1620
0
        }
1621
1622
0
        txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size);
1623
1624
0
    } else {
1625
0
        assert(tx_size < TX_SIZES_ALL);
1626
0
        const TxSize sub_txs = eb_sub_tx_size_map[tx_size];
1627
0
        const int    bsw     = eb_tx_size_wide_unit[sub_txs];
1628
0
        const int    bsh     = eb_tx_size_high_unit[sub_txs];
1629
1630
0
        bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][1];
1631
1632
0
        if (allow_update_cdf) {
1633
0
            update_cdf(ec_ctx->txfm_partition_cdf[ctx], 1, 2);
1634
0
        }
1635
1636
0
        if (sub_txs == TX_4X4) {
1637
0
            txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, sub_txs, tx_size);
1638
1639
0
            return bits;
1640
0
        }
1641
1642
0
        assert(bsw > 0 && bsh > 0);
1643
0
        for (int row = 0; row < eb_tx_size_high_unit[tx_size]; row += bsh) {
1644
0
            for (int col = 0; col < eb_tx_size_wide_unit[tx_size]; col += bsw) {
1645
0
                int offsetr = blk_row + row;
1646
0
                int offsetc = blk_col + col;
1647
0
                bits += cost_tx_size_vartx(
1648
0
                    xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, md_rate_est_ctx, ec_ctx, allow_update_cdf);
1649
0
            }
1650
0
        }
1651
0
    }
1652
0
    return bits;
1653
0
}
1654
1655
686k
static INLINE void set_txfm_ctx(TXFM_CONTEXT* txfm_ctx, uint8_t txs, int len) {
1656
686k
    int i;
1657
2.21M
    for (i = 0; i < len; ++i) {
1658
1.52M
        txfm_ctx[i] = txs;
1659
1.52M
    }
1660
686k
}
1661
1662
345k
static INLINE void set_txfm_ctxs(TxSize tx_size, int n8_w, int n8_h, int skip, const MacroBlockD* xd) {
1663
345k
    uint8_t bw = tx_size_wide[tx_size];
1664
345k
    uint8_t bh = tx_size_high[tx_size];
1665
1666
345k
    if (skip) {
1667
0
        bw = n8_w * MI_SIZE;
1668
0
        bh = n8_h * MI_SIZE;
1669
0
    }
1670
1671
345k
    set_txfm_ctx(xd->above_txfm_context, bw, n8_w);
1672
345k
    set_txfm_ctx(xd->left_txfm_context, bh, n8_h);
1673
345k
}
1674
1675
10.9k
static INLINE int tx_size_to_depth(TxSize tx_size, BlockSize bsize) {
1676
10.9k
    TxSize ctx_size = blocksize_to_txsize[bsize];
1677
10.9k
    int    depth    = 0;
1678
10.9k
    while (tx_size != ctx_size) {
1679
0
        depth++;
1680
0
        ctx_size = eb_sub_tx_size_map[ctx_size];
1681
0
        assert(depth <= MAX_TX_DEPTH);
1682
0
    }
1683
10.9k
    return depth;
1684
10.9k
}
1685
1686
// Returns a context number for the given MB prediction signal
1687
// The mode info data structure has a one element border above and to the
1688
// left of the entries corresponding to real blocks.
1689
// The prediction flags in these dummy entries are initialized to 0.
1690
10.9k
static INLINE int get_tx_size_context(const MacroBlockD* xd) {
1691
10.9k
    const MbModeInfo*       mbmi        = xd->mi[0];
1692
10.9k
    const MbModeInfo* const above_mbmi  = xd->above_mbmi;
1693
10.9k
    const MbModeInfo* const left_mbmi   = xd->left_mbmi;
1694
10.9k
    const TxSize            max_tx_size = blocksize_to_txsize[mbmi->bsize];
1695
10.9k
    const int               max_tx_wide = tx_size_wide[max_tx_size];
1696
10.9k
    const int               max_tx_high = tx_size_high[max_tx_size];
1697
10.9k
    const int               has_above   = xd->up_available;
1698
10.9k
    const int               has_left    = xd->left_available;
1699
1700
10.9k
    int above = xd->above_txfm_context[0] >= max_tx_wide;
1701
10.9k
    int left  = xd->left_txfm_context[0] >= max_tx_high;
1702
1703
10.9k
    if (has_above) {
1704
2.74k
        if (is_inter_block(&above_mbmi->block_mi)) {
1705
0
            above = block_size_wide[above_mbmi->bsize] >= max_tx_wide;
1706
0
        }
1707
2.74k
    }
1708
1709
10.9k
    if (has_left) {
1710
2.86k
        if (is_inter_block(&left_mbmi->block_mi)) {
1711
0
            left = block_size_high[left_mbmi->bsize] >= max_tx_high;
1712
0
        }
1713
2.86k
    }
1714
1715
10.9k
    if (has_above && has_left) {
1716
152
        return (above + left);
1717
10.7k
    } else if (has_above) {
1718
2.59k
        return above;
1719
8.17k
    } else if (has_left) {
1720
2.71k
        return left;
1721
5.46k
    } else {
1722
5.46k
        return 0;
1723
5.46k
    }
1724
10.9k
}
1725
1726
static uint64_t cost_selected_tx_size(const MacroBlockD* xd, MdRateEstimationContext* md_rate_est_ctx, TxSize tx_size,
1727
10.9k
                                      FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) {
1728
10.9k
    const MbModeInfo* const mbmi  = xd->mi[0];
1729
10.9k
    const BlockSize         bsize = mbmi->bsize;
1730
10.9k
    uint64_t                bits  = 0;
1731
1732
10.9k
    if (block_signals_txsize(bsize)) {
1733
10.9k
        const int tx_size_ctx = get_tx_size_context(xd);
1734
10.9k
        assert(bsize < BLOCK_SIZES_ALL);
1735
10.9k
        const int     depth       = tx_size_to_depth(tx_size, bsize);
1736
10.9k
        const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
1737
10.9k
        bits += md_rate_est_ctx->tx_size_fac_bits[tx_size_cat][tx_size_ctx][depth];
1738
1739
10.9k
        if (allow_update_cdf) {
1740
0
            const int max_depths = bsize_to_max_depth(bsize);
1741
0
            assert(depth >= 0 && depth <= max_depths);
1742
0
            assert(!is_inter_block(&mbmi->block_mi));
1743
0
            assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(/*xd,*/ mbmi)));
1744
0
            update_cdf(ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], depth, max_depths + 1);
1745
0
        }
1746
10.9k
    }
1747
1748
10.9k
    return bits;
1749
10.9k
}
1750
1751
/* Get the TXS rate and update the txfm context.  If allow_update_cdf is true, the TX size CDFs will
1752
be updated. */
1753
uint64_t svt_aom_tx_size_bits(PictureControlSet* pcs, uint8_t segment_id, MdRateEstimationContext* md_rate_est_ctx,
1754
                              MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, TxMode tx_mode, BlockSize bsize,
1755
346k
                              uint8_t skip, FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) {
1756
346k
    uint64_t bits        = 0;
1757
346k
    int      is_inter_tx = is_inter_block(&mbmi->block_mi);
1758
346k
    if (tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) && !(is_inter_tx && skip) &&
1759
346k
        !svt_av1_is_lossless_segment(pcs, segment_id)) {
1760
10.9k
        if (is_inter_tx) { // This implies skip flag is 0.
1761
0
            const TxSize max_tx_size = get_vartx_max_txsize(/*xd,*/ bsize, 0);
1762
0
            const int    txbh        = eb_tx_size_high_unit[max_tx_size];
1763
0
            const int    txbw        = eb_tx_size_wide_unit[max_tx_size];
1764
0
            const int    width       = block_size_wide[bsize] >> tx_size_wide_log2[0];
1765
0
            const int    height      = block_size_high[bsize] >> tx_size_high_log2[0];
1766
0
            int          idx, idy;
1767
0
            for (idy = 0; idy < height; idy += txbh) {
1768
0
                for (idx = 0; idx < width; idx += txbw) {
1769
0
                    bits += cost_tx_size_vartx(
1770
0
                        xd, mbmi, max_tx_size, 0, idy, idx, md_rate_est_ctx, ec_ctx, allow_update_cdf);
1771
0
                }
1772
0
            }
1773
10.9k
        } else {
1774
10.9k
            bits += cost_selected_tx_size(xd, md_rate_est_ctx, tx_size, ec_ctx, allow_update_cdf);
1775
10.9k
            set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, 0, xd);
1776
10.9k
        }
1777
336k
    } else {
1778
336k
        set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, skip && is_inter_block(&mbmi->block_mi), xd);
1779
336k
    }
1780
1781
346k
    return bits;
1782
346k
}
1783
1784
/* Get the TXS rate.  A dummy txfm context array will be used, so context updates will not be saved for
1785
future blocks. */
1786
uint64_t svt_aom_get_tx_size_bits(ModeDecisionCandidateBuffer* candidateBuffer, ModeDecisionContext* ctx,
1787
347k
                                  PictureControlSet* pcs, uint8_t tx_depth, bool block_has_coeff) {
1788
347k
    NeighborArrayUnit* txfm_context_array = ctx->txfm_context_array;
1789
347k
    const uint8_t*     txfm_above_ptr     = svt_aom_na_top_ptr_pu(txfm_context_array, ctx->blk_org_x);
1790
347k
    const uint8_t*     txfm_left_ptr      = svt_aom_na_left_ptr_pu(txfm_context_array, ctx->blk_org_y);
1791
1792
347k
    TxMode       tx_mode = pcs->ppcs->frm_hdr.tx_mode;
1793
347k
    MacroBlockD* xd      = ctx->blk_ptr->av1xd;
1794
347k
    BlockSize    bsize   = ctx->blk_geom->bsize;
1795
347k
    const TxSize tx_size = tx_depth_to_tx_size[tx_depth][bsize];
1796
347k
    MbModeInfo*  mbmi    = xd->mi[0];
1797
1798
347k
    svt_memcpy(ctx->above_txfm_context, txfm_above_ptr, (ctx->blk_geom->bwidth >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT));
1799
347k
    svt_memcpy(ctx->left_txfm_context, txfm_left_ptr, (ctx->blk_geom->bheight >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT));
1800
1801
347k
    xd->above_txfm_context      = ctx->above_txfm_context;
1802
347k
    xd->left_txfm_context       = ctx->left_txfm_context;
1803
347k
    mbmi->bsize                 = ctx->blk_geom->bsize;
1804
347k
    mbmi->block_mi.use_intrabc  = candidateBuffer->cand->block_mi.use_intrabc;
1805
347k
    mbmi->block_mi.ref_frame[0] = candidateBuffer->cand->block_mi.ref_frame[0];
1806
347k
    mbmi->block_mi.tx_depth     = tx_depth;
1807
1808
347k
    const uint64_t bits = svt_aom_tx_size_bits(pcs,
1809
347k
                                               ctx->blk_ptr->segment_id,
1810
347k
                                               ctx->md_rate_est_ctx,
1811
347k
                                               xd,
1812
347k
                                               mbmi,
1813
347k
                                               tx_size,
1814
347k
                                               tx_mode,
1815
347k
                                               bsize,
1816
347k
                                               !block_has_coeff,
1817
347k
                                               NULL,
1818
347k
                                               0);
1819
347k
    return bits;
1820
347k
}
1821
1822
/*
1823
 * av1_partition_rate_cost function is used to generate the rate of signaling the
1824
 * partition type for a given block.
1825
 */
1826
int64_t svt_aom_partition_rate_cost(PictureParentControlSet* ppcs, const BlockSize bsize, const int mi_row,
1827
                                    const int mi_col, MdRateEstimationContext* md_rate_est_ctx, PartitionType p,
1828
319k
                                    const PartitionContextType left_ctx, const PartitionContextType above_ctx) {
1829
319k
    if (bsize < BLOCK_8X8) {
1830
0
        return 0;
1831
0
    }
1832
319k
    assert(bsize < BLOCK_SIZES_ALL && mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]);
1833
1834
319k
    const int hbs      = mi_size_wide[bsize] >> 1;
1835
319k
    const int has_rows = (mi_row + hbs) < ppcs->av1_cm->mi_rows;
1836
319k
    const int has_cols = (mi_col + hbs) < ppcs->av1_cm->mi_cols;
1837
    // Don't consider invalid partitions or blocks outside the picture
1838
319k
    if (!has_rows && !has_cols) {
1839
816
        return 0;
1840
816
    }
1841
1842
318k
    const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8];
1843
318k
    assert(bsl >= 0);
1844
1845
318k
    const int      above = (above_ctx >> bsl) & 1, left = (left_ctx >> bsl) & 1;
1846
318k
    const uint32_t context_index = (left * 2 + above) + bsl * PARTITION_PLOFFSET;
1847
1848
318k
    uint64_t split_rate = 0;
1849
1850
318k
    if (has_rows && has_cols) {
1851
303k
        split_rate = (uint64_t)md_rate_est_ctx->partition_fac_bits[context_index][p];
1852
303k
    } else if (!has_rows && has_cols) {
1853
        // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per
1854
        // section 8.3.2 of the AV1 spec (Cdf selection process).  Therefore, only update partition ctx 4+,
1855
        // which corresponds to the paritition CDFs for 16x16 and larger blocks
1856
7.76k
        assert(bsize != BLOCK_8X8);
1857
7.76k
        split_rate = bsize == BLOCK_128X128
1858
7.76k
            ? (uint64_t)md_rate_est_ctx->partition_vert_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT]
1859
7.76k
            : (uint64_t)md_rate_est_ctx->partition_vert_alike_fac_bits[context_index][p == PARTITION_SPLIT];
1860
7.82k
    } else {
1861
        // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per
1862
        // section 8.3.2 of the AV1 spec (Cdf selection process).  Therefore, only update partition ctx 4+,
1863
        // which corresponds to the paritition CDFs for 16x16 and larger blocks
1864
7.82k
        assert(bsize != BLOCK_8X8);
1865
7.82k
        split_rate = bsize == BLOCK_128X128
1866
7.82k
            ? (uint64_t)md_rate_est_ctx->partition_horz_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT]
1867
7.82k
            : (uint64_t)md_rate_est_ctx->partition_horz_alike_fac_bits[context_index][p == PARTITION_SPLIT];
1868
7.82k
    }
1869
1870
318k
    return split_rate;
1871
319k
}