/work/svt-av1/Source/Lib/Codec/rd_cost.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 2 Clause License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | |
13 | | /*************************************** |
14 | | * Includes |
15 | | ***************************************/ |
16 | | #include "rd_cost.h" |
17 | | #include "common_utils.h" |
18 | | #include "aom_dsp_rtcd.h" |
19 | | #include "svt_log.h" |
20 | | #include "enc_inter_prediction.h" |
21 | | #include "full_loop.h" |
22 | | #include "entropy_coding.h" |
23 | | |
24 | | #include <assert.h> |
25 | | |
26 | 0 | #define MV_COST_WEIGHT 108 |
27 | | int svt_aom_get_reference_mode_context_new(const MacroBlockD* xd); |
28 | | int svt_av1_get_pred_context_uni_comp_ref_p(const MacroBlockD* xd); |
29 | | int svt_av1_get_pred_context_uni_comp_ref_p1(const MacroBlockD* xd); |
30 | | int svt_av1_get_pred_context_uni_comp_ref_p2(const MacroBlockD* xd); |
31 | | int svt_aom_get_comp_reference_type_context_new(const MacroBlockD* xd); |
32 | | |
33 | | int svt_aom_get_palette_bsize_ctx(BlockSize bsize); |
34 | | int svt_aom_get_palette_mode_ctx(const MacroBlockD* xd); |
35 | | int svt_aom_write_uniform_cost(int n, int v); |
36 | | int svt_get_palette_cache_y(const MacroBlockD* const xd, uint16_t* cache); |
37 | | int svt_av1_palette_color_cost_y(const PaletteModeInfo* const pmi, uint16_t* color_cache, const int palette_size, |
38 | | int n_cache, int bit_depth); |
39 | | int svt_av1_cost_color_map(ModeDecisionCandidate* cand, MdRateEstimationContext* rate_table, |
40 | | |
41 | | BlkStruct* blk_ptr, int plane, BlockSize bsize, COLOR_MAP_TYPE type); |
42 | | void svt_aom_get_block_dimensions(BlockSize bsize, int plane, const MacroBlockD* xd, int* width, int* height, |
43 | | int* rows_within_bounds, int* cols_within_bounds); |
44 | | int svt_aom_allow_palette(int allow_screen_content_tools, BlockSize bsize); |
45 | | int svt_aom_allow_intrabc(const FrameHeader* frm_hdr, SliceType slice_type); |
46 | | |
47 | 0 | MvJointType svt_av1_get_mv_joint(const Mv* mv) { |
48 | 0 | if (mv->y == 0) { |
49 | 0 | return mv->x == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ; |
50 | 0 | } else { |
51 | 0 | return mv->x == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ; |
52 | 0 | } |
53 | 0 | } |
54 | | |
55 | 0 | static int32_t mv_cost(const Mv* mv, const int32_t* joint_cost, const int32_t* const comp_cost[2]) { |
56 | 0 | int32_t jn_c = svt_av1_get_mv_joint(mv); |
57 | 0 | int32_t res = joint_cost[jn_c] + comp_cost[0][CLIP3(MV_LOW, MV_UPP, mv->y)] + |
58 | 0 | comp_cost[1][CLIP3(MV_LOW, MV_UPP, mv->x)]; |
59 | 0 | return res; |
60 | 0 | } |
61 | | |
62 | 0 | int32_t svt_av1_mv_bit_cost_light(const Mv* mv, const Mv* ref) { |
63 | 0 | const uint32_t factor = 50; |
64 | 0 | const uint32_t absmvdiffx = ABS(mv->x - ref->x); |
65 | 0 | const uint32_t absmvdiffy = ABS(mv->y - ref->y); |
66 | 0 | const uint32_t mv_rate = 1296 + (factor * (absmvdiffx + absmvdiffy)); |
67 | 0 | return mv_rate; |
68 | 0 | } |
69 | | |
70 | | int32_t svt_av1_mv_bit_cost(const Mv* mv, const Mv* ref, const int32_t* mvjcost, const int32_t* const mvcost[2], |
71 | 0 | int32_t weight) { |
72 | | // Restrict the size of the MV diff to be within the max AV1 range. If the MV diff |
73 | | // is outside this range, the diff will index beyond the cost array, causing a seg fault. |
74 | | // Both the MVs and the MV diffs should be within the allowable range for accessing the MV cost |
75 | | // infrastructure. |
76 | 0 | const int16_t x = MIN(MAX(mv->x - ref->x, MV_LOW), MV_UPP); |
77 | 0 | const int16_t y = MIN(MAX(mv->y - ref->y, MV_LOW), MV_UPP); |
78 | 0 | Mv temp_diff = {{x, y}}; |
79 | |
|
80 | 0 | return ROUND_POWER_OF_TWO(mv_cost(&temp_diff, mvjcost, mvcost) * weight, 7); |
81 | 0 | } |
82 | | |
83 | | /////////////////////////////COEFFICIENT CALCULATION ////////////////////////////////////////////// |
84 | 9.33k | static INLINE int32_t get_golomb_cost(int32_t abs_qc) { |
85 | 9.33k | if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
86 | 9.33k | const int32_t r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS; |
87 | 9.33k | const int32_t length = get_msb(r) + 1; |
88 | 9.33k | return av1_cost_literal(2 * length - 1); |
89 | 9.33k | } |
90 | 0 | return 0; |
91 | 9.33k | } |
92 | | |
93 | | void svt_av1_txb_init_levels_c(const TranLow* const coeff, const int32_t width, const int32_t height, |
94 | 0 | uint8_t* const levels) { |
95 | 0 | const int32_t stride = width + TX_PAD_HOR; |
96 | 0 | uint8_t* ls = levels; |
97 | |
|
98 | 0 | memset(levels - TX_PAD_TOP * stride, 0, sizeof(*levels) * TX_PAD_TOP * stride); |
99 | 0 | memset(levels + stride * height, 0, sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END)); |
100 | |
|
101 | 0 | for (int32_t i = 0; i < height; i++) { |
102 | 0 | for (int32_t j = 0; j < width; j++) { |
103 | 0 | *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX); |
104 | 0 | } |
105 | 0 | for (int32_t j = 0; j < TX_PAD_HOR; j++) { |
106 | 0 | *ls++ = 0; |
107 | 0 | } |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | static int32_t av1_transform_type_rate_estimation(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* fc, |
112 | | ModeDecisionCandidateBuffer* cand_bf, bool is_inter, |
113 | | TxSize transform_size, TxType transform_type, |
114 | 0 | bool reduced_tx_set_used) { |
115 | | // const MbModeInfo *mbmi = &xd->mi[0]->mbmi; |
116 | | // const int32_t is_inter = is_inter_block(mbmi); |
117 | |
|
118 | 0 | if (get_ext_tx_types(transform_size, is_inter, reduced_tx_set_used) > |
119 | 0 | 1 /*&& !xd->lossless[xd->mi[0]->mbmi.segment_id] WE ARE NOT LOSSLESS*/) { |
120 | 0 | const TxSize square_tx_size = txsize_sqr_map[transform_size]; |
121 | 0 | assert(square_tx_size < EXT_TX_SIZES); |
122 | |
|
123 | 0 | const int32_t ext_tx_set = get_ext_tx_set(transform_size, is_inter, reduced_tx_set_used); |
124 | 0 | if (is_inter) { |
125 | 0 | if (ext_tx_set > 0) { |
126 | 0 | if (allow_update_cdf) { |
127 | 0 | const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used); |
128 | |
|
129 | 0 | update_cdf(fc->inter_ext_tx_cdf[ext_tx_set][square_tx_size], |
130 | 0 | av1_ext_tx_ind[tx_set_type][transform_type], |
131 | 0 | av1_num_ext_tx_set[tx_set_type]); |
132 | 0 | } |
133 | 0 | return ctx->md_rate_est_ctx->inter_tx_type_fac_bits[ext_tx_set][square_tx_size][transform_type]; |
134 | 0 | } |
135 | 0 | } else { |
136 | 0 | if (ext_tx_set > 0) { |
137 | 0 | PredictionMode intra_dir; |
138 | 0 | if (cand_bf->cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) { |
139 | 0 | intra_dir = fimode_to_intradir[cand_bf->cand->block_mi.filter_intra_mode]; |
140 | 0 | } else { |
141 | 0 | intra_dir = cand_bf->cand->block_mi.mode; |
142 | 0 | } |
143 | 0 | assert(intra_dir < INTRA_MODES); |
144 | 0 | const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used); |
145 | |
|
146 | 0 | if (allow_update_cdf) { |
147 | 0 | update_cdf(fc->intra_ext_tx_cdf[ext_tx_set][square_tx_size][intra_dir], |
148 | 0 | av1_ext_tx_ind[tx_set_type][transform_type], |
149 | 0 | av1_num_ext_tx_set[tx_set_type]); |
150 | 0 | } |
151 | 0 | return ctx->md_rate_est_ctx |
152 | 0 | ->intra_tx_type_fac_bits[ext_tx_set][square_tx_size][intra_dir][transform_type]; |
153 | 0 | } |
154 | 0 | } |
155 | 0 | } |
156 | 0 | return 0; |
157 | 0 | } |
158 | | |
159 | | // Update the eob-related CDFs. Function assumes allow_update_cdf is true |
160 | | // as the only action of the function is to update the CDFs. |
161 | 0 | static void update_eob_context(int eob, TxSize tx_size, TxClass tx_class, PlaneType plane, FRAME_CONTEXT* ec_ctx) { |
162 | 0 | int eob_extra; |
163 | 0 | const int eob_pt = get_eob_pos_token(eob, &eob_extra); |
164 | 0 | const TxSize txs_ctx = (TxSize)((txsize_sqr_map[tx_size] + txsize_sqr_up_map[tx_size] + 1) >> 1); |
165 | 0 | assert(txs_ctx < TX_SIZES); |
166 | 0 | const int eob_multi_size = txsize_log2_minus4[tx_size]; |
167 | 0 | const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; |
168 | |
|
169 | 0 | switch (eob_multi_size) { |
170 | 0 | case 0: |
171 | 0 | update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5); |
172 | 0 | break; |
173 | 0 | case 1: |
174 | 0 | update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6); |
175 | 0 | break; |
176 | 0 | case 2: |
177 | 0 | update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7); |
178 | 0 | break; |
179 | 0 | case 3: |
180 | 0 | update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1, 8); |
181 | 0 | break; |
182 | 0 | case 4: |
183 | 0 | update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1, 9); |
184 | 0 | break; |
185 | 0 | case 5: |
186 | 0 | update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1, 10); |
187 | 0 | break; |
188 | 0 | case 6: |
189 | 0 | default: |
190 | 0 | update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1, 11); |
191 | 0 | break; |
192 | 0 | } |
193 | | |
194 | 0 | const int eob_offset_bits = svt_aom_eob_offset_bits[eob_pt]; |
195 | 0 | if (eob_offset_bits > 0) { |
196 | 0 | const int eob_ctx = eob_pt - 3; |
197 | 0 | const int eob_shift = eob_offset_bits - 1; |
198 | 0 | const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; |
199 | 0 | update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2); |
200 | 0 | } |
201 | 0 | } |
202 | | |
203 | | // Transform end of block bit estimation |
204 | 21.2k | int get_eob_cost(int eob, const LvMapEobCost* txb_eob_costs, const LvMapCoeffCost* txb_costs, TxClass tx_class) { |
205 | 21.2k | int eob_extra; |
206 | 21.2k | const int eob_pt = get_eob_pos_token(eob, &eob_extra); |
207 | 21.2k | const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; |
208 | 21.2k | int eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1]; |
209 | | |
210 | 21.2k | const int eob_offset_bits = svt_aom_eob_offset_bits[eob_pt]; |
211 | 21.2k | if (eob_offset_bits > 0) { |
212 | 0 | const int eob_ctx = eob_pt - 3; |
213 | 0 | const int eob_shift = eob_offset_bits - 1; |
214 | 0 | const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; |
215 | 0 | eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit]; |
216 | 0 | if (eob_offset_bits > 1) { |
217 | 0 | eob_cost += av1_cost_literal(eob_offset_bits - 1); |
218 | 0 | } |
219 | 0 | } |
220 | 21.2k | return eob_cost; |
221 | 21.2k | } |
222 | | |
223 | | static INLINE int32_t av1_cost_skip_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx, |
224 | 243k | TxSize transform_size, PlaneType plane_type, int16_t txb_skip_ctx) { |
225 | 243k | const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1); |
226 | 243k | assert(txs_ctx < TX_SIZES); |
227 | 243k | const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type]; |
228 | 243k | if (allow_update_cdf) { |
229 | 0 | update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 1, 2); |
230 | 0 | } |
231 | 243k | return coeff_costs->txb_skip_cost[txb_skip_ctx][1]; |
232 | 243k | } |
233 | | |
234 | | static INLINE int32_t av1_cost_coeffs_txb_loop_cost_one_eob(const TranLow* const qcoeff, int8_t* const coeff_contexts, |
235 | 10.9k | const LvMapCoeffCost* coeff_costs, int16_t dc_sign_ctx) { |
236 | 10.9k | const TranLow v = qcoeff[0]; |
237 | 10.9k | const int32_t level = abs(v); |
238 | 10.9k | const int32_t coeff_ctx = coeff_contexts[0]; |
239 | | |
240 | 10.9k | assert((AOMMIN(level, 3) - 1) >= 0); |
241 | 10.9k | int32_t cost = coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; |
242 | | |
243 | 10.9k | if (v != 0) { |
244 | 10.9k | const int32_t sign = (v < 0) ? 1 : 0; |
245 | | // sign bit cost |
246 | 10.9k | cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign]; |
247 | | |
248 | 10.9k | if (level > NUM_BASE_LEVELS) { |
249 | 9.98k | const int32_t base_range = level - 1 - NUM_BASE_LEVELS; |
250 | | |
251 | 9.98k | if (base_range < COEFF_BASE_RANGE) { |
252 | 644 | cost += coeff_costs->lps_cost[0][base_range]; |
253 | 9.33k | } else { |
254 | 9.33k | cost += coeff_costs->lps_cost[0][COEFF_BASE_RANGE]; |
255 | 9.33k | } |
256 | | |
257 | 9.98k | if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
258 | 9.33k | cost += get_golomb_cost(level); |
259 | 9.33k | } |
260 | 9.98k | } |
261 | 10.9k | } |
262 | 10.9k | return cost; |
263 | 10.9k | } |
264 | | |
265 | | static INLINE int32_t av1_cost_coeffs_txb_loop_cost_eob(ModeDecisionContext* md_ctx, uint16_t eob, |
266 | | const int16_t* const scan, const TranLow* const qcoeff, |
267 | | int8_t* const coeff_contexts, const LvMapCoeffCost* coeff_costs, |
268 | | int16_t dc_sign_ctx, uint8_t* const levels, const int32_t bwl, |
269 | 10.9k | TxType transform_type) { |
270 | 10.9k | const uint32_t cost_literal = av1_cost_literal(1); |
271 | 10.9k | int32_t cost = 0; |
272 | | |
273 | | //Optimized/simplified function when eob is 1 |
274 | 10.9k | if (eob == 1) { |
275 | 10.9k | return av1_cost_coeffs_txb_loop_cost_one_eob(qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx); |
276 | 10.9k | } |
277 | | |
278 | | // first (eob - 1) index |
279 | 0 | { |
280 | 0 | const int32_t pos = scan[eob - 1]; |
281 | 0 | const TranLow v = qcoeff[pos]; |
282 | 0 | const int32_t level = abs(v); |
283 | 0 | const int32_t coeff_ctx = coeff_contexts[pos]; |
284 | |
|
285 | 0 | assert((AOMMIN(level, 3) - 1) >= 0); |
286 | 0 | cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; |
287 | |
|
288 | 0 | if (v != 0) { |
289 | 0 | cost += cost_literal; |
290 | 0 | if (level > NUM_BASE_LEVELS) { |
291 | 0 | int32_t ctx = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]); |
292 | 0 | const int32_t base_range = level - 1 - NUM_BASE_LEVELS; |
293 | |
|
294 | 0 | if (base_range < COEFF_BASE_RANGE) { |
295 | 0 | cost += coeff_costs->lps_cost[ctx][base_range]; |
296 | 0 | } else { |
297 | 0 | cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE]; |
298 | 0 | } |
299 | |
|
300 | 0 | if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
301 | 0 | cost += get_golomb_cost(level); |
302 | 0 | } |
303 | 0 | } |
304 | 0 | } |
305 | 0 | } |
306 | | // last (0) index |
307 | 0 | { |
308 | 0 | const TranLow v = qcoeff[0]; |
309 | 0 | const int32_t level = abs(v); |
310 | 0 | const int32_t coeff_ctx = coeff_contexts[0]; |
311 | |
|
312 | 0 | cost += coeff_costs->base_cost[coeff_ctx][AOMMIN(level, 3)]; |
313 | |
|
314 | 0 | if (v != 0) { |
315 | 0 | const int32_t sign = (v < 0) ? 1 : 0; |
316 | | // sign bit cost |
317 | |
|
318 | 0 | cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign]; |
319 | |
|
320 | 0 | if (level > NUM_BASE_LEVELS) { |
321 | 0 | int32_t ctx = get_br_ctx(levels, 0, bwl, tx_type_to_class[transform_type]); |
322 | 0 | const int32_t base_range = level - 1 - NUM_BASE_LEVELS; |
323 | |
|
324 | 0 | if (base_range < COEFF_BASE_RANGE) { |
325 | 0 | cost += coeff_costs->lps_cost[ctx][base_range]; |
326 | 0 | } else { |
327 | 0 | cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE]; |
328 | 0 | } |
329 | |
|
330 | 0 | if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
331 | 0 | cost += get_golomb_cost(level); |
332 | 0 | } |
333 | 0 | } |
334 | 0 | } |
335 | 0 | } |
336 | 0 | int32_t c; |
337 | | /* Optimized Loop, omitted first (eob - 1) and last (0) index */ |
338 | | // Estimate the rate of the first(eob / fast_coeff_est_level) coeff(s), DC and last coeff only |
339 | 0 | int32_t c_start = MIN(eob - 2, eob / MAX(1, (int)(md_ctx->mds_fast_coeff_est_level - md_ctx->mds_subres_step))); |
340 | 0 | uint32_t cost_literal_cnt = 0; |
341 | 0 | for (c = c_start; c >= 1; --c) { |
342 | 0 | const int32_t pos = scan[c]; |
343 | 0 | cost_literal_cnt += !!(qcoeff[pos]); |
344 | 0 | const int32_t level = abs(qcoeff[pos]); |
345 | 0 | if (level > NUM_BASE_LEVELS) { |
346 | 0 | int32_t ctx = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]); |
347 | 0 | const int32_t base_range = level - 1 - NUM_BASE_LEVELS; |
348 | |
|
349 | 0 | cost += coeff_costs->base_cost[coeff_contexts[pos]][3]; |
350 | 0 | if (base_range < COEFF_BASE_RANGE) { |
351 | 0 | cost += coeff_costs->lps_cost[ctx][base_range]; |
352 | 0 | } else { |
353 | 0 | cost += get_golomb_cost(level) + coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE]; |
354 | 0 | } |
355 | 0 | } else { |
356 | 0 | cost += coeff_costs->base_cost[coeff_contexts[pos]][level]; |
357 | 0 | } |
358 | 0 | } |
359 | 0 | cost += cost_literal_cnt * cost_literal; |
360 | |
|
361 | 0 | return cost; |
362 | 10.9k | } |
363 | | |
364 | | // Note: don't call this function when eob is 0. |
365 | | uint64_t svt_av1_cost_coeffs_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx, |
366 | | ModeDecisionCandidateBuffer* cand_bf, const TranLow* const qcoeff, uint16_t eob, |
367 | | PlaneType plane_type, TxSize transform_size, TxType transform_type, |
368 | | int16_t txb_skip_ctx, int16_t dc_sign_ctx, bool reduced_transform_set_flag) |
369 | | |
370 | 10.9k | { |
371 | | //Note: there is a different version of this function in AOM that seems to be efficient as its name is: |
372 | | //warehouse_efficients_txb |
373 | | |
374 | 10.9k | const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1); |
375 | 10.9k | const TxClass tx_class = tx_type_to_class[transform_type]; |
376 | 10.9k | int32_t cost; |
377 | 10.9k | const int32_t bwl = get_txb_bwl(transform_size); |
378 | 10.9k | const int32_t width = get_txb_wide(transform_size); |
379 | 10.9k | const int32_t height = get_txb_high(transform_size); |
380 | | |
381 | 10.9k | const ScanOrder* const scan_order = get_scan_order(transform_size, transform_type); |
382 | 10.9k | const int16_t* const scan = scan_order->scan; |
383 | 10.9k | uint8_t levels_buf[TX_PAD_2D]; |
384 | 10.9k | uint8_t* const levels = set_levels(levels_buf, width); |
385 | 10.9k | DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); |
386 | 10.9k | assert(txs_ctx < TX_SIZES); |
387 | 10.9k | const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type]; |
388 | | |
389 | 10.9k | const int32_t eob_multi_size = txsize_log2_minus4[transform_size]; |
390 | 10.9k | const LvMapEobCost* const eob_bits = &ctx->md_rate_est_ctx->eob_frac_bits[eob_multi_size][plane_type]; |
391 | | // eob must be greater than 0 here. |
392 | 10.9k | assert(eob > 0); |
393 | 10.9k | cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0]; |
394 | | |
395 | 10.9k | if (allow_update_cdf) { |
396 | 0 | update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], eob == 0, 2); |
397 | 0 | } |
398 | | |
399 | 10.9k | if (eob > 1) { |
400 | 0 | svt_av1_txb_init_levels(qcoeff, |
401 | 0 | width, |
402 | 0 | height, |
403 | 0 | levels); // NM - Needs to be optimized - to be combined with the quantisation. |
404 | 0 | } |
405 | 10.9k | const bool is_inter = is_inter_mode(cand_bf->cand->block_mi.mode); |
406 | | // Transform type bit estimation |
407 | 10.9k | cost += plane_type > PLANE_TYPE_Y ? 0 |
408 | 10.9k | : av1_transform_type_rate_estimation(ctx, |
409 | 1 | allow_update_cdf, |
410 | 1 | ec_ctx, |
411 | 1 | cand_bf, |
412 | 1 | is_inter, |
413 | 1 | transform_size, |
414 | 1 | transform_type, |
415 | 1 | reduced_transform_set_flag); |
416 | | |
417 | | // Transform eob bit estimation |
418 | 10.9k | cost += get_eob_cost(eob, eob_bits, coeff_costs, tx_class); |
419 | 10.9k | if (allow_update_cdf) { |
420 | 0 | update_eob_context(eob, transform_size, tx_class, plane_type, ec_ctx); |
421 | 0 | } |
422 | | // Transform non-zero coeff bit estimation |
423 | 10.9k | svt_av1_get_nz_map_contexts(levels, |
424 | 10.9k | scan, |
425 | 10.9k | eob, |
426 | 10.9k | transform_size, |
427 | 10.9k | tx_class, |
428 | 10.9k | coeff_contexts); // NM - Assembly version is available in AOM |
429 | 10.9k | assert(eob <= width * height); |
430 | 10.9k | if (allow_update_cdf) { |
431 | 0 | for (int c = eob - 1; c >= 0; --c) { |
432 | 0 | const int pos = scan[c]; |
433 | 0 | const int coeff_ctx = coeff_contexts[pos]; |
434 | 0 | const TranLow v = qcoeff[pos]; |
435 | 0 | const TranLow level = abs(v); |
436 | 0 | if (c == eob - 1) { |
437 | 0 | assert(coeff_ctx < 4); |
438 | 0 | update_cdf(ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3) - 1, 3); |
439 | 0 | } else { |
440 | 0 | update_cdf(ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3), 4); |
441 | 0 | } |
442 | |
|
443 | 0 | { |
444 | 0 | if (c == eob - 1) { |
445 | 0 | assert(coeff_ctx < 4); |
446 | | #if CONFIG_ENTROPY_STATS |
447 | | ++td->counts |
448 | | ->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type][coeff_ctx][AOMMIN(level, 3) - 1]; |
449 | | } else { |
450 | | ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type][coeff_ctx][AOMMIN(level, 3)]; |
451 | | #endif |
452 | 0 | } |
453 | 0 | } |
454 | |
|
455 | 0 | if (level > NUM_BASE_LEVELS) { |
456 | 0 | const int base_range = level - 1 - NUM_BASE_LEVELS; |
457 | 0 | int br_ctx; |
458 | 0 | if (eob == 1) { |
459 | 0 | br_ctx = 0; |
460 | 0 | } else { |
461 | 0 | br_ctx = get_br_ctx(levels, pos, bwl, tx_class); |
462 | 0 | } |
463 | |
|
464 | 0 | for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { |
465 | 0 | const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); |
466 | 0 | update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx], k, BR_CDF_SIZE); |
467 | 0 | for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) { |
468 | | #if CONFIG_ENTROPY_STATS |
469 | | ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps][br_ctx][lps == k]; |
470 | | #endif // CONFIG_ENTROPY_STATS |
471 | 0 | if (lps == k) { |
472 | 0 | break; |
473 | 0 | } |
474 | 0 | } |
475 | | #if CONFIG_ENTROPY_STATS |
476 | | ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)][plane_type][br_ctx][k]; |
477 | | #endif |
478 | 0 | if (k < BR_CDF_SIZE - 1) { |
479 | 0 | break; |
480 | 0 | } |
481 | 0 | } |
482 | 0 | } |
483 | 0 | } |
484 | |
|
485 | 0 | if (qcoeff[0] != 0) { |
486 | 0 | update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], qcoeff[0] < 0, 2); |
487 | 0 | } |
488 | | |
489 | | //TODO: CHKN for 128x128 where we need more than one TXb, we need to update the txb_context(dc_sign+skip_ctx) in a Txb basis. |
490 | |
|
491 | 0 | return 0; |
492 | 0 | } |
493 | | |
494 | 10.9k | cost += av1_cost_coeffs_txb_loop_cost_eob( |
495 | 10.9k | ctx, eob, scan, qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx, levels, bwl, transform_type); |
496 | 10.9k | return cost; |
497 | 10.9k | } |
498 | | |
499 | | uint64_t svt_aom_get_intra_uv_fast_rate(PictureControlSet* pcs, ModeDecisionContext* ctx, |
500 | 127k | ModeDecisionCandidateBuffer* cand_bf, bool use_accurate_cfl) { |
501 | 127k | const BlockGeom* const blk_geom = ctx->blk_geom; |
502 | 127k | ModeDecisionCandidate* cand = cand_bf->cand; |
503 | 127k | assert(ctx->has_uv); |
504 | 127k | assert(!(svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc)); |
505 | 127k | MdRateEstimationContext* md_rate_est_ctx = ctx->md_rate_est_ctx; |
506 | 127k | const uint8_t is_cfl_allowed = (blk_geom->bwidth <= 32 && blk_geom->bheight <= 32) ? 1 : 0; |
507 | 127k | PredictionMode intra_mode = (PredictionMode)cand->block_mi.mode; |
508 | | // If CFL alphas are not known yet, calculate the chroma mode bits based on DC Mode. If CFL is selected the chroma mode bits must be updated later |
509 | 127k | const UvPredictionMode chroma_mode = cand->block_mi.uv_mode == UV_CFL_PRED && !use_accurate_cfl |
510 | 127k | ? UV_DC_PRED |
511 | 127k | : cand->block_mi.uv_mode; |
512 | 127k | const uint32_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2; |
513 | 127k | const uint32_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2; |
514 | | // Subsampling assumes YUV 420 content |
515 | 127k | const uint8_t ss_x = 1; |
516 | 127k | const uint8_t ss_y = 1; |
517 | | |
518 | 127k | uint64_t chroma_rate = 0; |
519 | | // Estimate chroma nominal intra mode bits |
520 | 127k | chroma_rate += (uint64_t)md_rate_est_ctx->intra_uv_mode_fac_bits[is_cfl_allowed][intra_mode][chroma_mode]; |
521 | | |
522 | | // Estimate chroma angular mode bits; angular offset only allow for bsize >= 8x8 |
523 | 127k | if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(get_uv_mode(chroma_mode))) { |
524 | 0 | chroma_rate += |
525 | 0 | md_rate_est_ctx->angle_delta_fac_bits[chroma_mode - V_PRED] |
526 | 0 | [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_UV]]; |
527 | 0 | } |
528 | | |
529 | | // Estimate CFL factor bits when CFL is used |
530 | 127k | if (chroma_mode == UV_CFL_PRED) { |
531 | 0 | chroma_rate += (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_U] |
532 | 0 | [CFL_IDX_U(cand->block_mi.cfl_alpha_idx)] + |
533 | 0 | (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_V] |
534 | 0 | [CFL_IDX_V(cand->block_mi.cfl_alpha_idx)]; |
535 | 0 | } |
536 | | |
537 | | // Estimate chroma palette mode bits (currently not supported, so just cost of signalling off) |
538 | 127k | if (chroma_mode == UV_DC_PRED && |
539 | 127k | svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) && |
540 | 0 | is_chroma_reference(mi_row, mi_col, blk_geom->bsize, ss_x, ss_y)) { |
541 | 0 | const int use_palette_y = cand->palette_info && (cand->palette_size[0] > 0); |
542 | 0 | const int use_palette_uv = cand->palette_info && (cand->palette_size[1] > 0); |
543 | 0 | chroma_rate += ctx->md_rate_est_ctx->palette_uv_mode_fac_bits[use_palette_y][use_palette_uv]; |
544 | 0 | } |
545 | | |
546 | 127k | return chroma_rate; |
547 | 127k | } |
548 | | |
549 | | uint64_t svt_aom_intra_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
550 | 127k | uint64_t lambda, uint64_t luma_distortion) { |
551 | 127k | const BlockGeom* blk_geom = ctx->blk_geom; |
552 | 127k | BlkStruct* blk_ptr = ctx->blk_ptr; |
553 | 127k | ModeDecisionCandidate* cand = cand_bf->cand; |
554 | 127k | if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc) { |
555 | 0 | uint64_t rate = 0; |
556 | |
|
557 | 0 | Mv mv = {.as_int = cand->block_mi.mv[0].as_int}; |
558 | 0 | Mv ref_mv = {.as_int = cand->pred_mv[0].as_int}; |
559 | 0 | const int* dvcost[2] = {(int*)&ctx->md_rate_est_ctx->dv_cost[0][MV_MAX], |
560 | 0 | (int*)&ctx->md_rate_est_ctx->dv_cost[1][MV_MAX]}; |
561 | 0 | int32_t mv_rate = svt_av1_mv_bit_cost( |
562 | 0 | &mv, &ref_mv, ctx->md_rate_est_ctx->dv_joint_cost, dvcost, MV_COST_WEIGHT_SUB); |
563 | |
|
564 | 0 | rate = mv_rate + ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc]; |
565 | 0 | cand_bf->fast_luma_rate = rate; |
566 | 0 | cand_bf->fast_chroma_rate = 0; |
567 | 0 | return (RDCOST(lambda, rate, luma_distortion)); |
568 | 127k | } else { |
569 | | // Number of bits for each synatax element |
570 | 127k | uint64_t intra_mode_bits_num = 0; |
571 | 127k | uint64_t intra_luma_mode_bits_num = 0; |
572 | 127k | uint64_t intra_luma_ang_mode_bits_num = 0; |
573 | 127k | uint64_t intra_filter_mode_bits_num = 0; |
574 | 127k | uint64_t skip_mode_rate = 0; |
575 | 127k | const uint8_t skip_mode_ctx = ctx->skip_mode_ctx; |
576 | 127k | PredictionMode intra_mode = (PredictionMode)cand->block_mi.mode; |
577 | | // Luma and chroma rate |
578 | 127k | uint32_t rate; |
579 | 127k | uint32_t luma_rate = 0; |
580 | 127k | uint32_t chroma_rate = 0; |
581 | 127k | intra_mode_bits_num = pcs->slice_type != I_SLICE |
582 | 127k | ? (uint64_t)ctx->md_rate_est_ctx->mb_mode_fac_bits[eb_size_group_lookup[blk_geom->bsize]][intra_mode] |
583 | 127k | : ZERO_COST; |
584 | | |
585 | 127k | skip_mode_rate = pcs->slice_type != I_SLICE && pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && |
586 | 0 | is_comp_ref_allowed(blk_geom->bsize) |
587 | 127k | ? (uint64_t)ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0] |
588 | 127k | : ZERO_COST; |
589 | | // Estimate luma nominal intra mode bits for key frame |
590 | 127k | intra_luma_mode_bits_num = pcs->slice_type == I_SLICE |
591 | 127k | ? (uint64_t) |
592 | 127k | ctx->md_rate_est_ctx->y_mode_fac_bits[ctx->intra_luma_top_ctx][ctx->intra_luma_left_ctx][intra_mode] |
593 | 127k | : ZERO_COST; |
594 | | // Estimate luma angular mode bits |
595 | 127k | if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(cand->block_mi.mode)) { |
596 | 0 | assert((intra_mode - V_PRED) < 8); |
597 | 0 | assert((intra_mode - V_PRED) >= 0); |
598 | 0 | intra_luma_ang_mode_bits_num = |
599 | 0 | ctx->md_rate_est_ctx->angle_delta_fac_bits[intra_mode - V_PRED] |
600 | 0 | [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_Y]]; |
601 | 0 | } |
602 | 127k | if (svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) && |
603 | 0 | intra_mode == DC_PRED) { |
604 | 0 | const int use_palette = cand->palette_info ? (cand->palette_size[0] > 0) : 0; |
605 | 0 | const int bsize_ctx = svt_aom_get_palette_bsize_ctx(blk_geom->bsize); |
606 | 0 | const int mode_ctx = svt_aom_get_palette_mode_ctx(blk_ptr->av1xd); |
607 | 0 | intra_luma_mode_bits_num += ctx->md_rate_est_ctx->palette_ymode_fac_bits[bsize_ctx][mode_ctx][use_palette]; |
608 | 0 | if (use_palette) { |
609 | 0 | const uint8_t* const color_map = cand->palette_info->color_idx_map; |
610 | 0 | int block_width, block_height, rows, cols; |
611 | 0 | svt_aom_get_block_dimensions( |
612 | 0 | blk_geom->bsize, 0, blk_ptr->av1xd, &block_width, &block_height, &rows, &cols); |
613 | 0 | const int plt_size = cand->palette_size[0]; |
614 | 0 | int palette_mode_cost = |
615 | 0 | ctx->md_rate_est_ctx->palette_ysize_fac_bits[bsize_ctx][plt_size - PALETTE_MIN_SIZE] + |
616 | 0 | svt_aom_write_uniform_cost(plt_size, color_map[0]); |
617 | 0 | uint16_t color_cache[2 * PALETTE_MAX_SIZE]; |
618 | 0 | const int n_cache = svt_get_palette_cache_y(blk_ptr->av1xd, color_cache); |
619 | 0 | palette_mode_cost += svt_av1_palette_color_cost_y(&cand->palette_info->pmi, |
620 | 0 | color_cache, |
621 | 0 | cand->palette_size[0], |
622 | 0 | n_cache, |
623 | 0 | pcs->ppcs->scs->encoder_bit_depth); |
624 | 0 | palette_mode_cost += svt_av1_cost_color_map( |
625 | 0 | cand, ctx->md_rate_est_ctx, blk_ptr, 0, blk_geom->bsize, PALETTE_MAP); |
626 | 0 | intra_luma_mode_bits_num += palette_mode_cost; |
627 | 0 | } |
628 | 0 | } |
629 | | |
630 | 127k | if (svt_aom_filter_intra_allowed(pcs->ppcs->scs->seq_header.filter_intra_level, |
631 | 127k | blk_geom->bsize, |
632 | 127k | cand->palette_info ? cand->palette_size[0] : 0, |
633 | 127k | intra_mode)) { |
634 | 0 | intra_filter_mode_bits_num = |
635 | 0 | ctx->md_rate_est_ctx |
636 | 0 | ->filter_intra_fac_bits[blk_geom->bsize][cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES]; |
637 | 0 | if (cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) { |
638 | 0 | intra_filter_mode_bits_num += |
639 | 0 | ctx->md_rate_est_ctx->filter_intra_mode_fac_bits[cand->block_mi.filter_intra_mode]; |
640 | 0 | } |
641 | 0 | } |
642 | 127k | if (ctx->has_uv) { |
643 | | // CFL info not known in fasta loop, so assume DC mode when CFL is allowed |
644 | 127k | chroma_rate = (uint32_t)svt_aom_get_intra_uv_fast_rate(pcs, ctx, cand_bf, 0); |
645 | 127k | } |
646 | | |
647 | 127k | uint32_t is_inter_rate = pcs->slice_type != I_SLICE |
648 | 127k | ? ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][0] |
649 | 127k | : 0; |
650 | 127k | luma_rate = (uint32_t)(intra_mode_bits_num + skip_mode_rate + intra_luma_mode_bits_num + |
651 | 127k | intra_luma_ang_mode_bits_num + is_inter_rate + intra_filter_mode_bits_num); |
652 | 127k | if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type)) { |
653 | 0 | svt_aom_assert_err(cand->block_mi.use_intrabc == 0, "this block ibc should be off\n"); |
654 | 0 | luma_rate += ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc]; |
655 | 0 | } |
656 | | // Keep the Fast Luma and Chroma rate for future use |
657 | 127k | cand_bf->fast_luma_rate = luma_rate; |
658 | 127k | cand_bf->fast_chroma_rate = chroma_rate; |
659 | 127k | rate = luma_rate + chroma_rate; |
660 | | // Assign fast cost |
661 | 127k | return (RDCOST(lambda, rate, luma_distortion)); |
662 | 127k | } |
663 | 127k | } |
664 | | |
665 | | // This function encodes the reference frame |
666 | | uint64_t estimate_ref_frame_type_bits(ModeDecisionContext* ctx, BlkStruct* blk_ptr, uint8_t ref_frame_type, |
667 | 0 | bool is_compound) { |
668 | 0 | uint64_t ref_rate_bits = 0; |
669 | |
|
670 | 0 | MbModeInfo* const mbmi = blk_ptr->av1xd->mi[0]; |
671 | 0 | MvReferenceFrame ref_type[2]; |
672 | 0 | av1_set_ref_frame(ref_type, ref_frame_type); |
673 | 0 | mbmi->block_mi.ref_frame[0] = ref_type[0]; |
674 | 0 | mbmi->block_mi.ref_frame[1] = ref_type[1]; |
675 | | //const int is_compound = svt_aom_has_second_ref(mbmi); |
676 | 0 | { |
677 | 0 | if (is_compound) { |
678 | 0 | const CompReferenceType comp_ref_type = has_uni_comp_refs(&mbmi->block_mi) ? UNIDIR_COMP_REFERENCE |
679 | 0 | : BIDIR_COMP_REFERENCE; |
680 | |
|
681 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_type_fac_bits[svt_aom_get_comp_reference_type_context_new( |
682 | 0 | blk_ptr->av1xd)][comp_ref_type]; |
683 | | /*aom_write_symbol(w, comp_ref_type, |
684 | | svt_aom_get_comp_reference_type_cdf(blk_ptr->av1xd), 2);*/ |
685 | |
|
686 | 0 | if (comp_ref_type == UNIDIR_COMP_REFERENCE) { |
687 | | // SVT_LOG("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n"); |
688 | 0 | const int bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME; |
689 | |
|
690 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p( |
691 | 0 | blk_ptr->av1xd)][0][bit]; |
692 | | // blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][0]; |
693 | | // WRITE_REF_BIT(bit, uni_comp_ref_p); |
694 | |
|
695 | 0 | if (!bit) { |
696 | 0 | assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME); |
697 | 0 | const int bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME || |
698 | 0 | mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME; |
699 | 0 | ref_rate_bits += |
700 | 0 | ctx->md_rate_est_ctx |
701 | 0 | ->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p1(blk_ptr->av1xd)][1][bit1]; |
702 | | // ref_rate_d = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][1]; |
703 | | // WRITE_REF_BIT(bit1, uni_comp_ref_p1); |
704 | 0 | if (bit1) { |
705 | 0 | const int bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME; |
706 | 0 | ref_rate_bits += |
707 | 0 | ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p2( |
708 | 0 | blk_ptr->av1xd)][2][bit2]; |
709 | | |
710 | | // ref_rate_e = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][2]; |
711 | | //WRITE_REF_BIT(bit2, uni_comp_ref_p2); |
712 | 0 | } |
713 | 0 | } |
714 | 0 | return ref_rate_bits; |
715 | 0 | } |
716 | | |
717 | 0 | assert(comp_ref_type == BIDIR_COMP_REFERENCE); |
718 | |
|
719 | 0 | const int bit = (mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME || mbmi->block_mi.ref_frame[0] == LAST3_FRAME); |
720 | 0 | const int pred_ctx = svt_av1_get_pred_context_comp_ref_p(blk_ptr->av1xd); |
721 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_fac_bits[pred_ctx][0][bit]; |
722 | | // ref_rate_f = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_ctx][0]; |
723 | | // WRITE_REF_BIT(bit, comp_ref_p); |
724 | |
|
725 | 0 | if (!bit) { |
726 | 0 | const int bit1 = mbmi->block_mi.ref_frame[0] == LAST2_FRAME; |
727 | 0 | ref_rate_bits += ctx->md_rate_est_ctx |
728 | 0 | ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p1(blk_ptr->av1xd)][1][bit1]; |
729 | | // ref_rate_g = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][1]; |
730 | | // WRITE_REF_BIT(bit1, comp_ref_p1); |
731 | 0 | } else { |
732 | 0 | const int bit2 = mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME; |
733 | 0 | ref_rate_bits += ctx->md_rate_est_ctx |
734 | 0 | ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p2(blk_ptr->av1xd)][2][bit2]; |
735 | | // ref_rate_h = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][2]; |
736 | | // WRITE_REF_BIT(bit2, comp_ref_p2); |
737 | 0 | } |
738 | |
|
739 | 0 | const int bit_bwd = mbmi->block_mi.ref_frame[1] == ALTREF_FRAME; |
740 | 0 | const int pred_ctx_2 = svt_av1_get_pred_context_comp_bwdref_p(blk_ptr->av1xd); |
741 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[pred_ctx_2][0][bit_bwd]; |
742 | | // ref_rate_i = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_ctx_2][0]; |
743 | | // WRITE_REF_BIT(bit_bwd, comp_bwdref_p); |
744 | |
|
745 | 0 | if (!bit_bwd) { |
746 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[svt_av1_get_pred_context_comp_bwdref_p1( |
747 | 0 | blk_ptr->av1xd)][1][ref_type[1] == ALTREF2_FRAME]; |
748 | | // ref_rate_j = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_context][1]; |
749 | | // WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1); |
750 | 0 | } |
751 | 0 | } else { |
752 | 0 | const int bit0 = (mbmi->block_mi.ref_frame[0] <= ALTREF_FRAME && |
753 | 0 | mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME); |
754 | 0 | ref_rate_bits += ctx->md_rate_est_ctx |
755 | 0 | ->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0][bit0]; |
756 | | // ref_rate_k = |
757 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0]; |
758 | | // WRITE_REF_BIT(bit0, single_ref_p1); |
759 | |
|
760 | 0 | if (bit0) { |
761 | 0 | const int bit1 = mbmi->block_mi.ref_frame[0] == ALTREF_FRAME; |
762 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p2( |
763 | 0 | blk_ptr->av1xd)][1][bit1]; |
764 | | // ref_rate_l = |
765 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p2(blk_ptr->av1xd)][1]; |
766 | | // WRITE_REF_BIT(bit1, single_ref_p2); |
767 | 0 | if (!bit1) { |
768 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p6( |
769 | 0 | blk_ptr->av1xd)][5][ref_frame_type == ALTREF2_FRAME]; |
770 | | // ref_rate_m = |
771 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p6(blk_ptr->av1xd)][5]; |
772 | | // WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6); |
773 | 0 | } |
774 | 0 | } else { |
775 | 0 | const int bit2 = (mbmi->block_mi.ref_frame[0] == LAST3_FRAME || |
776 | 0 | mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME); |
777 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p3( |
778 | 0 | blk_ptr->av1xd)][2][bit2]; |
779 | | // ref_rate_n = |
780 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p3(blk_ptr->av1xd)][2]; |
781 | | // WRITE_REF_BIT(bit2, single_ref_p3); |
782 | 0 | if (!bit2) { |
783 | 0 | const int bit3 = mbmi->block_mi.ref_frame[0] != LAST_FRAME; |
784 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p4( |
785 | 0 | blk_ptr->av1xd)][3][bit3]; |
786 | | // ref_rate_o = |
787 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p4(blk_ptr->av1xd)][3]; |
788 | | // WRITE_REF_BIT(bit3, single_ref_p4); |
789 | 0 | } else { |
790 | 0 | const int bit4 = mbmi->block_mi.ref_frame[0] != LAST3_FRAME; |
791 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p5( |
792 | 0 | blk_ptr->av1xd)][4][bit4]; |
793 | | // ref_rate_p = |
794 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p5(blk_ptr->av1xd)][4]; |
795 | | // WRITE_REF_BIT(bit4, single_ref_p5); |
796 | 0 | } |
797 | 0 | } |
798 | 0 | } |
799 | 0 | } |
800 | 0 | return ref_rate_bits; |
801 | 0 | } |
802 | | |
803 | | int svt_aom_get_comp_group_idx_context_enc(const MacroBlockD* xd); |
804 | | int is_any_masked_compound_used(BlockSize bsize); |
805 | | |
806 | | static INLINE uint32_t get_compound_mode_rate(PictureControlSet* pcs, ModeDecisionContext* ctx, |
807 | 0 | ModeDecisionCandidate* cand, BlockSize bsize) { |
808 | 0 | BlkStruct* blk_ptr = ctx->blk_ptr; |
809 | 0 | SequenceControlSet* scs = pcs->ppcs->scs; |
810 | 0 | uint32_t comp_rate = 0; |
811 | 0 | MbModeInfo* const mbmi = blk_ptr->av1xd->mi[0]; |
812 | 0 | MvReferenceFrame rf[2] = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]}; |
813 | 0 | mbmi->block_mi.ref_frame[0] = rf[0]; |
814 | 0 | mbmi->block_mi.ref_frame[1] = rf[1]; |
815 | | |
816 | | //NOTE : Make sure, any cuPtr data is already set before usage |
817 | |
|
818 | 0 | if (has_second_ref(&mbmi->block_mi)) { |
819 | 0 | const int masked_compound_used = is_any_masked_compound_used(bsize) && scs->seq_header.enable_masked_compound; |
820 | |
|
821 | 0 | if (masked_compound_used) { |
822 | 0 | const int ctx_comp_group_idx = svt_aom_get_comp_group_idx_context_enc(blk_ptr->av1xd); |
823 | 0 | comp_rate = |
824 | 0 | ctx->md_rate_est_ctx->comp_group_idx_fac_bits[ctx_comp_group_idx][cand->block_mi.comp_group_idx]; |
825 | 0 | } else { |
826 | 0 | assert(cand->block_mi.comp_group_idx == 0); |
827 | 0 | } |
828 | |
|
829 | 0 | if (cand->block_mi.comp_group_idx == 0) { |
830 | 0 | if (cand->block_mi.compound_idx) { |
831 | 0 | assert(cand->block_mi.interinter_comp.type == COMPOUND_AVERAGE); |
832 | 0 | } |
833 | |
|
834 | 0 | if (scs->seq_header.order_hint_info.enable_jnt_comp) { |
835 | 0 | const int comp_index_ctx = svt_aom_get_comp_index_context_enc(pcs->ppcs, |
836 | 0 | pcs->ppcs->cur_order_hint, |
837 | 0 | pcs->ppcs->ref_order_hint[rf[0] - 1], |
838 | 0 | pcs->ppcs->ref_order_hint[rf[1] - 1], |
839 | 0 | blk_ptr->av1xd); |
840 | 0 | comp_rate += ctx->md_rate_est_ctx->comp_idx_fac_bits[comp_index_ctx][cand->block_mi.compound_idx]; |
841 | 0 | } else { |
842 | 0 | assert(cand->block_mi.compound_idx == 1); |
843 | 0 | } |
844 | 0 | } else { |
845 | 0 | assert(pcs->ppcs->frm_hdr.reference_mode != SINGLE_REFERENCE && |
846 | 0 | is_inter_compound_mode(cand->block_mi.mode)); |
847 | 0 | assert(masked_compound_used); |
848 | | // compound_diffwtd, wedge |
849 | 0 | assert(cand->block_mi.interinter_comp.type == COMPOUND_WEDGE || |
850 | 0 | cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD); |
851 | |
|
852 | 0 | if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { |
853 | 0 | comp_rate += ctx->md_rate_est_ctx |
854 | 0 | ->compound_type_fac_bits[bsize][cand->block_mi.interinter_comp.type - COMPOUND_WEDGE]; |
855 | 0 | } |
856 | |
|
857 | 0 | if (cand->block_mi.interinter_comp.type == COMPOUND_WEDGE) { |
858 | 0 | assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize)); |
859 | 0 | comp_rate += |
860 | 0 | ctx->md_rate_est_ctx->wedge_idx_fac_bits[bsize][cand->block_mi.interinter_comp.wedge_index]; |
861 | 0 | comp_rate += av1_cost_literal(1); |
862 | 0 | } else { |
863 | 0 | assert(cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD); |
864 | 0 | comp_rate += av1_cost_literal(1); |
865 | 0 | } |
866 | 0 | } |
867 | 0 | } |
868 | |
|
869 | 0 | return comp_rate; |
870 | 0 | } |
871 | | |
872 | | int32_t svt_aom_get_switchable_rate(BlockModeInfo* block_mi, const FrameHeader* const frm_hdr, ModeDecisionContext* ctx, |
873 | 0 | const bool enable_dual_filter) { |
874 | 0 | if (frm_hdr->interpolation_filter != SWITCHABLE) { |
875 | 0 | return 0; |
876 | 0 | } |
877 | | |
878 | 0 | int32_t inter_filter_cost = 0; |
879 | 0 | const int max_dir = enable_dual_filter ? 2 : 1; |
880 | 0 | for (int dir = 0; dir < max_dir; ++dir) { |
881 | 0 | const int32_t pred_ctx = svt_aom_get_pred_context_switchable_interp( |
882 | 0 | block_mi->ref_frame[0], block_mi->ref_frame[1], ctx->blk_ptr->av1xd, dir); |
883 | 0 | const InterpFilter filter = av1_extract_interp_filter(block_mi->interp_filters, dir); |
884 | 0 | assert(pred_ctx < SWITCHABLE_FILTER_CONTEXTS); |
885 | 0 | assert(filter < SWITCHABLE_FILTERS); |
886 | 0 | inter_filter_cost += ctx->md_rate_est_ctx->switchable_interp_fac_bitss[pred_ctx][filter]; |
887 | 0 | } |
888 | 0 | return inter_filter_cost; |
889 | 0 | } |
890 | | |
891 | | int svt_aom_is_interintra_wedge_used(BlockSize bsize); |
892 | | |
893 | | static uint64_t av1_inter_fast_cost_light(ModeDecisionContext* ctx, BlkStruct* blk_ptr, |
894 | | ModeDecisionCandidateBuffer* cand_bf, uint64_t luma_distortion, |
895 | 0 | uint64_t lambda, PictureControlSet* pcs, CandidateMv* ref_mv_stack) { |
896 | 0 | ModeDecisionCandidate* cand = cand_bf->cand; |
897 | | // NM - fast inter cost estimation |
898 | 0 | MdRateEstimationContext* r = ctx->md_rate_est_ctx; |
899 | | //_mm_prefetch(p, _MM_HINT_T2); |
900 | | // Luma rate |
901 | 0 | uint32_t luma_rate = 0; |
902 | 0 | uint64_t mv_rate = 0; |
903 | 0 | const PredictionMode inter_mode = (PredictionMode)cand->block_mi.mode; |
904 | 0 | const uint8_t have_nearmv = have_nearmv_in_inter_mode(inter_mode); |
905 | 0 | uint64_t inter_mode_bits_num = 0; |
906 | 0 | const uint8_t skip_mode_ctx = ctx->skip_mode_ctx; |
907 | 0 | MvReferenceFrame rf[2] = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]}; |
908 | 0 | const int8_t ref_frame_type = av1_ref_frame_type(rf); |
909 | 0 | const uint8_t is_compound = is_inter_compound_mode(cand->block_mi.mode); |
910 | 0 | const uint32_t mode_context = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf); |
911 | 0 | uint64_t reference_picture_bits_num = 0; |
912 | 0 | if (ctx->approx_inter_rate < 2) { |
913 | 0 | reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type]; |
914 | 0 | } |
915 | 0 | if (is_compound) { |
916 | 0 | assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES); |
917 | 0 | inter_mode_bits_num += r->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)]; |
918 | 0 | } else { |
919 | 0 | int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK; |
920 | | //aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2); |
921 | 0 | inter_mode_bits_num += r->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV]; |
922 | 0 | if (inter_mode != NEWMV) { |
923 | 0 | const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; |
924 | | //aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx], 2); |
925 | 0 | inter_mode_bits_num += r->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV]; |
926 | 0 | if (inter_mode != GLOBALMV) { |
927 | 0 | int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; |
928 | | /*aom_write_symbol(ec_writer, mode != NEARESTMV, frame_context->refmv_cdf[refmv_ctx], 2);*/ |
929 | 0 | inter_mode_bits_num += r->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV]; |
930 | 0 | } |
931 | 0 | } |
932 | 0 | } |
933 | 0 | if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv) { |
934 | | //drLIdex cost estimation |
935 | 0 | const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV; |
936 | 0 | if (new_mv) { |
937 | 0 | int32_t idx; |
938 | 0 | for (idx = 0; idx < 2; ++idx) { |
939 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
940 | 0 | uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx); |
941 | 0 | inter_mode_bits_num += r->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx]; |
942 | 0 | if (cand->drl_index == idx) { |
943 | 0 | break; |
944 | 0 | } |
945 | 0 | } |
946 | 0 | } |
947 | 0 | } |
948 | 0 | if (have_nearmv) { |
949 | 0 | int32_t idx; |
950 | 0 | for (idx = 1; idx < 3; ++idx) { |
951 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
952 | 0 | uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx); |
953 | 0 | inter_mode_bits_num += r->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)]; |
954 | 0 | if (cand->drl_index == (idx - 1)) { |
955 | 0 | break; |
956 | 0 | } |
957 | 0 | } |
958 | 0 | } |
959 | 0 | } |
960 | 0 | } |
961 | 0 | if (svt_aom_have_newmv_in_inter_mode(inter_mode)) { |
962 | 0 | const uint16_t factor = pcs->ppcs->frm_hdr.allow_screen_content_tools ? 20 : 50; |
963 | 0 | if (is_compound) { |
964 | 0 | mv_rate = 0; |
965 | 0 | if (inter_mode == NEW_NEWMV) { |
966 | 0 | for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) { |
967 | 0 | Mv mv = cand->block_mi.mv[ref_list_idx]; |
968 | 0 | Mv ref_mv = cand->pred_mv[ref_list_idx]; |
969 | 0 | const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x); |
970 | 0 | const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y); |
971 | 0 | mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy)); |
972 | 0 | } |
973 | 0 | } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) { |
974 | | // New MV is second ref |
975 | 0 | Mv mv = cand->block_mi.mv[1]; |
976 | 0 | Mv ref_mv = cand->pred_mv[1]; |
977 | 0 | const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x); |
978 | 0 | const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y); |
979 | 0 | mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy)); |
980 | 0 | } else { |
981 | 0 | assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV); |
982 | | // New MV is first ref |
983 | 0 | Mv mv = cand->block_mi.mv[0]; |
984 | 0 | Mv ref_mv = cand->pred_mv[0]; |
985 | 0 | const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x); |
986 | 0 | const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y); |
987 | 0 | mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy)); |
988 | 0 | } |
989 | 0 | } else { |
990 | 0 | assert(!is_compound); // single ref inter prediction |
991 | | // unipred MV stored in idx0 |
992 | 0 | Mv mv = cand->block_mi.mv[0]; |
993 | 0 | Mv ref_mv = cand->pred_mv[0]; |
994 | 0 | const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x); |
995 | 0 | const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y); |
996 | 0 | mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy)); |
997 | 0 | } |
998 | 0 | } |
999 | | // Get the interpolation filter rate if IFS is performed at MDS0. Otherwise, the filter is unknown, so the rate will be updated after IFS is performed. |
1000 | 0 | uint32_t ifs_rate = 0; |
1001 | 0 | if (ctx->ifs_ctrls.level == IFS_MDS0 && |
1002 | 0 | av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) && |
1003 | 0 | pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE) { |
1004 | 0 | ifs_rate = svt_aom_get_switchable_rate( |
1005 | 0 | &cand_bf->cand->block_mi, &pcs->ppcs->frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter); |
1006 | 0 | } |
1007 | 0 | uint32_t is_inter_rate = r->intra_inter_fac_bits[ctx->is_inter_ctx][1]; |
1008 | |
|
1009 | 0 | uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && |
1010 | 0 | is_comp_ref_allowed(ctx->blk_geom->bsize) |
1011 | 0 | ? r->skip_mode_fac_bits[skip_mode_ctx][0] |
1012 | 0 | : 0; |
1013 | 0 | luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate + |
1014 | 0 | ifs_rate); |
1015 | | // Keep the Fast Luma and Chroma rate for future use |
1016 | 0 | cand_bf->fast_luma_rate = luma_rate; |
1017 | 0 | cand_bf->fast_chroma_rate = 0; |
1018 | | // Assign fast cost |
1019 | 0 | if (cand->skip_mode_allowed) { |
1020 | 0 | skip_mode_rate = r->skip_mode_fac_bits[skip_mode_ctx][1]; |
1021 | 0 | if (skip_mode_rate < luma_rate) { |
1022 | 0 | return (RDCOST(lambda, skip_mode_rate, luma_distortion)); |
1023 | 0 | } |
1024 | 0 | } |
1025 | 0 | return (RDCOST(lambda, luma_rate, luma_distortion)); |
1026 | 0 | } |
1027 | | |
1028 | | uint64_t svt_aom_inter_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1029 | 0 | uint64_t lambda, uint64_t luma_distortion) { |
1030 | 0 | const BlockGeom* blk_geom = ctx->blk_geom; |
1031 | 0 | BlkStruct* blk_ptr = ctx->blk_ptr; |
1032 | 0 | ModeDecisionCandidate* cand = cand_bf->cand; |
1033 | 0 | MvReferenceFrame rf[2] = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]}; |
1034 | 0 | const int8_t ref_frame_type = av1_ref_frame_type(cand->block_mi.ref_frame); |
1035 | 0 | CandidateMv* ref_mv_stack = &(ctx->ref_mv_stack[ref_frame_type][0]); |
1036 | |
|
1037 | 0 | if (ctx->approx_inter_rate) { |
1038 | 0 | return av1_inter_fast_cost_light(ctx, blk_ptr, cand_bf, luma_distortion, lambda, pcs, ref_mv_stack); |
1039 | 0 | } |
1040 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
1041 | | |
1042 | | // Luma rate |
1043 | 0 | uint32_t luma_rate = 0; |
1044 | 0 | uint64_t mv_rate = 0; |
1045 | 0 | PredictionMode inter_mode = (PredictionMode)cand->block_mi.mode; |
1046 | |
|
1047 | 0 | uint64_t inter_mode_bits_num = 0; |
1048 | |
|
1049 | 0 | const uint8_t skip_mode_ctx = ctx->skip_mode_ctx; |
1050 | 0 | const uint8_t is_compound = is_inter_compound_mode(cand->block_mi.mode); |
1051 | 0 | uint32_t mode_context = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf); |
1052 | 0 | uint64_t reference_picture_bits_num = 0; |
1053 | | |
1054 | | //Reference Type and Mode Bit estimation |
1055 | 0 | reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type]; |
1056 | 0 | if (is_compound) { |
1057 | 0 | assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES); |
1058 | 0 | inter_mode_bits_num += |
1059 | 0 | ctx->md_rate_est_ctx->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)]; |
1060 | 0 | } else { |
1061 | | // uint32_t newmv_ctx = mode_context & NEWMV_CTX_MASK; |
1062 | | // inter_mode_bits_num = cand_bf->cand->md_rate_est_ctx->new_mv_mode_fac_bits[mode_ctx][0]; |
1063 | |
|
1064 | 0 | int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK; |
1065 | | // aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2); |
1066 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV]; |
1067 | 0 | if (inter_mode != NEWMV) { |
1068 | 0 | const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; |
1069 | | // aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx], |
1070 | | // 2); |
1071 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV]; |
1072 | 0 | if (inter_mode != GLOBALMV) { |
1073 | 0 | int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; |
1074 | | /*aom_write_symbol(ec_writer, mode != NEARESTMV, |
1075 | | * frame_context->refmv_cdf[refmv_ctx], 2);*/ |
1076 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV]; |
1077 | 0 | } |
1078 | 0 | } |
1079 | 0 | } |
1080 | 0 | if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) { |
1081 | | //drLIdex cost estimation |
1082 | 0 | const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV; |
1083 | 0 | if (new_mv) { |
1084 | 0 | int32_t idx; |
1085 | 0 | for (idx = 0; idx < 2; ++idx) { |
1086 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
1087 | 0 | uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx); |
1088 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx]; |
1089 | 0 | if (cand->drl_index == idx) { |
1090 | 0 | break; |
1091 | 0 | } |
1092 | 0 | } |
1093 | 0 | } |
1094 | 0 | } |
1095 | |
|
1096 | 0 | if (have_nearmv_in_inter_mode(inter_mode)) { |
1097 | 0 | int32_t idx; |
1098 | 0 | for (idx = 1; idx < 3; ++idx) { |
1099 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
1100 | 0 | uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx); |
1101 | 0 | inter_mode_bits_num += |
1102 | 0 | ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)]; |
1103 | |
|
1104 | 0 | if (cand->drl_index == (idx - 1)) { |
1105 | 0 | break; |
1106 | 0 | } |
1107 | 0 | } |
1108 | 0 | } |
1109 | 0 | } |
1110 | 0 | } |
1111 | |
|
1112 | 0 | if (svt_aom_have_newmv_in_inter_mode(inter_mode)) { |
1113 | 0 | if (is_compound) { |
1114 | 0 | mv_rate = 0; |
1115 | |
|
1116 | 0 | if (inter_mode == NEW_NEWMV) { |
1117 | 0 | for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) { |
1118 | 0 | Mv mv = cand->block_mi.mv[ref_list_idx]; |
1119 | 0 | Mv ref_mv = cand->pred_mv[ref_list_idx]; |
1120 | 0 | mv_rate += svt_av1_mv_bit_cost(&mv, |
1121 | 0 | &ref_mv, |
1122 | 0 | ctx->md_rate_est_ctx->nmv_vec_cost, |
1123 | 0 | ctx->md_rate_est_ctx->nmvcoststack, |
1124 | 0 | MV_COST_WEIGHT); |
1125 | 0 | } |
1126 | 0 | } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) { |
1127 | 0 | Mv mv = cand->block_mi.mv[1]; |
1128 | 0 | Mv ref_mv = cand->pred_mv[1]; |
1129 | 0 | mv_rate += svt_av1_mv_bit_cost(&mv, |
1130 | 0 | &ref_mv, |
1131 | 0 | ctx->md_rate_est_ctx->nmv_vec_cost, |
1132 | 0 | ctx->md_rate_est_ctx->nmvcoststack, |
1133 | 0 | MV_COST_WEIGHT); |
1134 | 0 | } else { |
1135 | 0 | assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV); |
1136 | 0 | Mv mv = cand->block_mi.mv[0]; |
1137 | 0 | Mv ref_mv = cand->pred_mv[0]; |
1138 | 0 | mv_rate += svt_av1_mv_bit_cost(&mv, |
1139 | 0 | &ref_mv, |
1140 | 0 | ctx->md_rate_est_ctx->nmv_vec_cost, |
1141 | 0 | ctx->md_rate_est_ctx->nmvcoststack, |
1142 | 0 | MV_COST_WEIGHT); |
1143 | 0 | } |
1144 | 0 | } else { |
1145 | 0 | assert(!is_compound); // single ref inter prediction |
1146 | | // unipred MVs stored in idx0 |
1147 | 0 | Mv mv = cand->block_mi.mv[0]; |
1148 | 0 | Mv ref_mv = cand->pred_mv[0]; |
1149 | 0 | mv_rate = svt_av1_mv_bit_cost( |
1150 | 0 | &mv, &ref_mv, ctx->md_rate_est_ctx->nmv_vec_cost, ctx->md_rate_est_ctx->nmvcoststack, MV_COST_WEIGHT); |
1151 | 0 | } |
1152 | 0 | } |
1153 | | // inter intra mode rate |
1154 | 0 | if (pcs->ppcs->scs->seq_header.enable_interintra_compound && |
1155 | | /* Check if inter-intra is allowed for current block size / mode (even if the feature is off |
1156 | | * for the current block, we still need to signal inter-intra off. |
1157 | | */ |
1158 | 0 | svt_is_interintra_allowed(true, blk_geom->bsize, cand->block_mi.mode, rf)) { |
1159 | 0 | const int interintra = cand->block_mi.is_interintra_used; |
1160 | 0 | const int bsize_group = eb_size_group_lookup[blk_geom->bsize]; |
1161 | |
|
1162 | 0 | inter_mode_bits_num += |
1163 | 0 | ctx->md_rate_est_ctx->inter_intra_fac_bits[bsize_group][cand->block_mi.is_interintra_used]; |
1164 | |
|
1165 | 0 | if (interintra) { |
1166 | 0 | inter_mode_bits_num += |
1167 | 0 | ctx->md_rate_est_ctx->inter_intra_mode_fac_bits[bsize_group][cand->block_mi.interintra_mode]; |
1168 | |
|
1169 | 0 | if (svt_aom_is_interintra_wedge_used(blk_geom->bsize)) { |
1170 | 0 | inter_mode_bits_num += |
1171 | 0 | ctx->md_rate_est_ctx |
1172 | 0 | ->wedge_inter_intra_fac_bits[blk_geom->bsize][cand->block_mi.use_wedge_interintra]; |
1173 | |
|
1174 | 0 | if (cand->block_mi.use_wedge_interintra) { |
1175 | 0 | inter_mode_bits_num += |
1176 | 0 | ctx->md_rate_est_ctx |
1177 | 0 | ->wedge_idx_fac_bits[blk_geom->bsize][cand->block_mi.interintra_wedge_index]; |
1178 | 0 | } |
1179 | 0 | } |
1180 | 0 | } |
1181 | 0 | } |
1182 | 0 | if (is_inter_singleref_mode(inter_mode) && frm_hdr->is_motion_mode_switchable && rf[1] != INTRA_FRAME) { |
1183 | 0 | assert(!cand->block_mi.is_interintra_used); |
1184 | 0 | const MotionMode motion_mode_rd = cand->block_mi.motion_mode; |
1185 | 0 | const BlockSize bsize = blk_geom->bsize; |
1186 | 0 | const MotionMode last_motion_mode_allowed = svt_aom_motion_mode_allowed( |
1187 | 0 | pcs, cand->block_mi.num_proj_ref, blk_ptr->overlappable_neighbors, bsize, rf[0], rf[1], inter_mode); |
1188 | 0 | switch (last_motion_mode_allowed) { |
1189 | 0 | case SIMPLE_TRANSLATION: |
1190 | 0 | break; |
1191 | 0 | case OBMC_CAUSAL: |
1192 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits1[bsize][motion_mode_rd == OBMC_CAUSAL]; |
1193 | 0 | break; |
1194 | 0 | default: |
1195 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits[bsize][motion_mode_rd]; |
1196 | 0 | } |
1197 | 0 | } |
1198 | | // this func return 0 if masked=0 and distance=0 |
1199 | 0 | inter_mode_bits_num += get_compound_mode_rate(pcs, ctx, cand, blk_geom->bsize); |
1200 | | // Get the interpolation filter rate if IFS is performed at MDS0. Otherwise, the filter is unknown, so the rate will be updated after IFS is performed. |
1201 | 0 | uint32_t ifs_rate = 0; |
1202 | 0 | if (ctx->ifs_ctrls.level == IFS_MDS0 && |
1203 | 0 | av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) && |
1204 | 0 | frm_hdr->interpolation_filter == SWITCHABLE) { |
1205 | 0 | ifs_rate = svt_aom_get_switchable_rate( |
1206 | 0 | &cand_bf->cand->block_mi, frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter); |
1207 | 0 | } |
1208 | 0 | uint32_t is_inter_rate = ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][1]; |
1209 | 0 | uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && is_comp_ref_allowed(blk_geom->bsize) |
1210 | 0 | ? ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0] |
1211 | 0 | : 0; |
1212 | 0 | luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate + |
1213 | 0 | ifs_rate); |
1214 | | // Keep the Fast Luma and Chroma rate for future use |
1215 | 0 | cand_bf->fast_luma_rate = luma_rate; |
1216 | 0 | cand_bf->fast_chroma_rate = 0; |
1217 | | // Assign fast cost |
1218 | 0 | if (cand->skip_mode_allowed) { |
1219 | 0 | skip_mode_rate = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1]; |
1220 | 0 | if (skip_mode_rate < luma_rate) { |
1221 | 0 | return (RDCOST(lambda, skip_mode_rate, luma_distortion)); |
1222 | 0 | } |
1223 | 0 | } |
1224 | 0 | return (RDCOST(lambda, luma_rate, luma_distortion)); |
1225 | 0 | } |
1226 | | |
1227 | | /* |
1228 | | */ |
1229 | | EbErrorType svt_aom_txb_estimate_coeff_bits_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1230 | | uint32_t txb_origin_index, EbPictureBufferDesc* coeff_buffer_sb, |
1231 | 0 | uint32_t y_eob, uint64_t* y_txb_coeff_bits, TxSize txsize) { |
1232 | 0 | if (y_eob) { |
1233 | 0 | *y_txb_coeff_bits = svt_av1_cost_coeffs_txb( |
1234 | 0 | ctx, |
1235 | 0 | 0, |
1236 | 0 | 0, |
1237 | 0 | cand_bf, |
1238 | 0 | (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)], |
1239 | 0 | (uint16_t)y_eob, |
1240 | 0 | PLANE_TYPE_Y, |
1241 | 0 | txsize, |
1242 | 0 | DCT_DCT, |
1243 | 0 | 0, |
1244 | 0 | 0, |
1245 | 0 | 0); |
1246 | |
|
1247 | 0 | *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step; |
1248 | |
|
1249 | 0 | } else { |
1250 | 0 | *y_txb_coeff_bits = av1_cost_skip_txb(ctx, 0, 0, txsize, PLANE_TYPE_Y, 0); |
1251 | 0 | } |
1252 | |
|
1253 | 0 | return EB_ErrorNone; |
1254 | 0 | } |
1255 | | |
1256 | | EbErrorType svt_aom_txb_estimate_coeff_bits(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx, |
1257 | | PictureControlSet* pcs, ModeDecisionCandidateBuffer* cand_bf, |
1258 | | uint32_t txb_origin_index, uint32_t txb_chroma_origin_index, |
1259 | | EbPictureBufferDesc* coeff_buffer_sb, uint32_t y_eob, uint32_t cb_eob, |
1260 | | uint32_t cr_eob, uint64_t* y_txb_coeff_bits, uint64_t* cb_txb_coeff_bits, |
1261 | | uint64_t* cr_txb_coeff_bits, TxSize txsize, TxSize txsize_uv, |
1262 | 127k | TxType tx_type, TxType tx_type_uv, COMPONENT_TYPE component_type) { |
1263 | 127k | EbErrorType return_error = EB_ErrorNone; |
1264 | | |
1265 | 127k | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
1266 | | |
1267 | 127k | int32_t* coeff_buffer; |
1268 | 127k | int16_t luma_txb_skip_context = ctx->luma_txb_skip_context; |
1269 | 127k | int16_t luma_dc_sign_context = ctx->luma_dc_sign_context; |
1270 | 127k | int16_t cb_txb_skip_context = ctx->cb_txb_skip_context; |
1271 | 127k | int16_t cb_dc_sign_context = ctx->cb_dc_sign_context; |
1272 | 127k | int16_t cr_txb_skip_context = ctx->cr_txb_skip_context; |
1273 | 127k | int16_t cr_dc_sign_context = ctx->cr_dc_sign_context; |
1274 | | |
1275 | 127k | bool reduced_transform_set_flag = frm_hdr->reduced_tx_set ? true : false; |
1276 | | |
1277 | | //Estimate the rate of the transform type and coefficient for Luma |
1278 | | |
1279 | 127k | if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) { |
1280 | 0 | if (y_eob) { |
1281 | 0 | coeff_buffer = (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)]; |
1282 | |
|
1283 | 0 | *y_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx, |
1284 | 0 | allow_update_cdf, |
1285 | 0 | ec_ctx, |
1286 | 0 | cand_bf, |
1287 | 0 | coeff_buffer, |
1288 | 0 | (uint16_t)y_eob, |
1289 | 0 | PLANE_TYPE_Y, |
1290 | 0 | txsize, |
1291 | 0 | tx_type, |
1292 | 0 | luma_txb_skip_context, |
1293 | 0 | luma_dc_sign_context, |
1294 | 0 | reduced_transform_set_flag); |
1295 | 0 | *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step; |
1296 | 0 | } else { |
1297 | 0 | *y_txb_coeff_bits = av1_cost_skip_txb( |
1298 | 0 | ctx, allow_update_cdf, ec_ctx, txsize, PLANE_TYPE_Y, luma_txb_skip_context); |
1299 | 0 | } |
1300 | 0 | } |
1301 | | // Estimate the rate of the transform type and coefficient for chroma Cb |
1302 | | |
1303 | 127k | if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA || |
1304 | 127k | component_type == COMPONENT_ALL) { |
1305 | 127k | if (cb_eob) { |
1306 | 5.47k | coeff_buffer = (int32_t*)&coeff_buffer_sb->u_buffer[txb_chroma_origin_index * sizeof(int32_t)]; |
1307 | | |
1308 | 5.47k | *cb_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx, |
1309 | 5.47k | allow_update_cdf, |
1310 | 5.47k | ec_ctx, |
1311 | 5.47k | cand_bf, |
1312 | 5.47k | coeff_buffer, |
1313 | 5.47k | (uint16_t)cb_eob, |
1314 | 5.47k | PLANE_TYPE_UV, |
1315 | 5.47k | txsize_uv, |
1316 | 5.47k | tx_type_uv, |
1317 | 5.47k | cb_txb_skip_context, |
1318 | 5.47k | cb_dc_sign_context, |
1319 | 5.47k | reduced_transform_set_flag); |
1320 | 122k | } else { |
1321 | 122k | *cb_txb_coeff_bits = av1_cost_skip_txb( |
1322 | 122k | ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cb_txb_skip_context); |
1323 | 122k | } |
1324 | 127k | } |
1325 | | |
1326 | 127k | if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || |
1327 | 127k | component_type == COMPONENT_ALL) { |
1328 | | //Estimate the rate of the transform type and coefficient for chroma Cr |
1329 | 127k | if (cr_eob) { |
1330 | 5.46k | coeff_buffer = (int32_t*)&coeff_buffer_sb->v_buffer[txb_chroma_origin_index * sizeof(int32_t)]; |
1331 | | |
1332 | 5.46k | *cr_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx, |
1333 | 5.46k | allow_update_cdf, |
1334 | 5.46k | ec_ctx, |
1335 | 5.46k | cand_bf, |
1336 | 5.46k | coeff_buffer, |
1337 | 5.46k | (uint16_t)cr_eob, |
1338 | 5.46k | PLANE_TYPE_UV, |
1339 | 5.46k | txsize_uv, |
1340 | 5.46k | tx_type_uv, |
1341 | 5.46k | cr_txb_skip_context, |
1342 | 5.46k | cr_dc_sign_context, |
1343 | 5.46k | reduced_transform_set_flag); |
1344 | 122k | } else { |
1345 | 122k | *cr_txb_coeff_bits = av1_cost_skip_txb( |
1346 | 122k | ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cr_txb_skip_context); |
1347 | 122k | } |
1348 | 127k | } |
1349 | | |
1350 | 127k | return return_error; |
1351 | 127k | } |
1352 | | |
1353 | | EbErrorType svt_aom_full_cost_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1354 | 7.54k | uint64_t* y_distortion, uint64_t lambda, uint64_t* y_coeff_bits) { |
1355 | 7.54k | EbErrorType return_error = EB_ErrorNone; |
1356 | | |
1357 | 7.54k | uint64_t coeff_rate = (*y_coeff_bits + (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[0][0]); |
1358 | | |
1359 | | // Assign full cost |
1360 | | // Use context index 0 for the partition rate as an approximation to skip call to |
1361 | | // av1_partition_rate_cost Partition cost is only needed for > 4x4 blocks, but light-PD0 assumes |
1362 | | // 4x4 blocks are disallowed |
1363 | 7.54k | *(cand_bf->full_cost) = RDCOST( |
1364 | 7.54k | lambda, coeff_rate + ctx->md_rate_est_ctx->partition_fac_bits[0][PARTITION_NONE], y_distortion[0]); |
1365 | 7.54k | return return_error; |
1366 | 7.54k | } |
1367 | | |
1368 | | /********************************************************************************* |
1369 | | * svt_aom_av1_full_cost function is used to estimate the cost of a candidate mode |
1370 | | * for full mode decision module. |
1371 | | **********************************************************************************/ |
1372 | | void svt_aom_full_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1373 | | uint64_t lambda, uint64_t y_distortion[DIST_TOTAL][DIST_CALC_TOTAL], |
1374 | | uint64_t cb_distortion[DIST_TOTAL][DIST_CALC_TOTAL], |
1375 | | uint64_t cr_distortion[DIST_TOTAL][DIST_CALC_TOTAL], uint64_t* y_coeff_bits, |
1376 | 244k | uint64_t* cb_coeff_bits, uint64_t* cr_coeff_bits) { |
1377 | 244k | const uint8_t skip_coeff_ctx = ctx->skip_coeff_ctx; |
1378 | 244k | const bool update_full_cost_ssim = ctx->tune_ssim_level > SSIM_LVL_0 ? true : false; |
1379 | | |
1380 | | // Get the TX size rate for skip and non-skip block. Need both to make non-skip decision |
1381 | 244k | uint64_t non_skip_tx_size_bits = 0, skip_tx_size_bits = 0; |
1382 | 244k | if (!ctx->shut_fast_rate && pcs->ppcs->frm_hdr.tx_mode == TX_MODE_SELECT) { |
1383 | 126k | if (cand_bf->block_has_coeff) { |
1384 | 5.87k | non_skip_tx_size_bits = svt_aom_get_tx_size_bits( |
1385 | 5.87k | cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 1); |
1386 | 5.87k | } |
1387 | | |
1388 | 126k | skip_tx_size_bits = svt_aom_get_tx_size_bits( |
1389 | 126k | cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 0); |
1390 | 126k | } |
1391 | | |
1392 | 244k | assert(IMPLIES(is_inter_mode(cand_bf->cand->block_mi.mode), skip_tx_size_bits == 0)); |
1393 | | |
1394 | | // Decide if block should be signalled as skip (send no coeffs) |
1395 | 244k | if (!svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && ctx->blk_skip_decision && |
1396 | 7.54k | cand_bf->block_has_coeff && is_inter_mode(cand_bf->cand->block_mi.mode)) { |
1397 | 0 | const uint64_t non_skip_cost = RDCOST( |
1398 | 0 | lambda, |
1399 | 0 | (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits + |
1400 | 0 | (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]), |
1401 | 0 | (y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0])); |
1402 | |
|
1403 | 0 | const uint64_t skip_cost = RDCOST( |
1404 | 0 | lambda, |
1405 | 0 | ((uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1]) + skip_tx_size_bits, |
1406 | 0 | (y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] + cr_distortion[DIST_SSD][1])); |
1407 | | |
1408 | | // Update signals to correspond to skip_mode values (no coeffs, etc.) |
1409 | 0 | if (skip_cost < non_skip_cost) { |
1410 | 0 | y_distortion[DIST_SSD][0] = y_distortion[DIST_SSD][1]; |
1411 | 0 | cb_distortion[DIST_SSD][0] = cb_distortion[DIST_SSD][1]; |
1412 | 0 | cr_distortion[DIST_SSD][0] = cr_distortion[DIST_SSD][1]; |
1413 | |
|
1414 | 0 | y_distortion[DIST_SSIM][0] = y_distortion[DIST_SSIM][1]; |
1415 | 0 | cb_distortion[DIST_SSIM][0] = cb_distortion[DIST_SSIM][1]; |
1416 | 0 | cr_distortion[DIST_SSIM][0] = cr_distortion[DIST_SSIM][1]; |
1417 | 0 | cand_bf->block_has_coeff = 0; |
1418 | 0 | cand_bf->y_has_coeff = 0; |
1419 | 0 | cand_bf->u_has_coeff = 0; |
1420 | 0 | cand_bf->v_has_coeff = 0; |
1421 | 0 | cand_bf->cnt_nz_coeff = 0; |
1422 | | |
1423 | | // For inter modes, signalling skip means no TX depth is used and the TX type will be DCT_DCT |
1424 | 0 | cand_bf->cand->block_mi.tx_depth = 0; |
1425 | 0 | cand_bf->cand->transform_type_uv = DCT_DCT; |
1426 | 0 | memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0])); |
1427 | 0 | memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData)); |
1428 | 0 | memset(&cand_bf->eob, 0, sizeof(EobData)); |
1429 | 0 | } |
1430 | 0 | } |
1431 | | |
1432 | 244k | uint64_t coeff_rate = 0; |
1433 | 244k | if (cand_bf->block_has_coeff) { |
1434 | 7.95k | coeff_rate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits + |
1435 | 7.95k | (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]); |
1436 | 236k | } else { |
1437 | 236k | coeff_rate = ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1] + skip_tx_size_bits; |
1438 | 236k | } |
1439 | | |
1440 | 244k | uint64_t mode_rate = cand_bf->fast_luma_rate + cand_bf->fast_chroma_rate + coeff_rate; |
1441 | 244k | uint64_t mode_distortion = y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0]; |
1442 | 244k | uint64_t mode_ssim_distortion = update_full_cost_ssim |
1443 | 244k | ? y_distortion[DIST_SSIM][0] + cb_distortion[DIST_SSIM][0] + cr_distortion[DIST_SSIM][0] |
1444 | 244k | : 0; |
1445 | 244k | uint64_t mode_cost = RDCOST(lambda, mode_rate, mode_distortion); |
1446 | | |
1447 | | // If skip_mode is allowed for this candidate, check cost of skip mode compared to regular cost |
1448 | 244k | if (cand_bf->cand->skip_mode_allowed == true) { |
1449 | 0 | const uint8_t skip_mode_ctx = ctx->skip_mode_ctx; |
1450 | | |
1451 | | // Skip mode cost |
1452 | 0 | const uint64_t skip_mode_rate = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1]; |
1453 | 0 | const uint64_t skip_mode_distortion = y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] + |
1454 | 0 | cr_distortion[DIST_SSD][1]; |
1455 | 0 | const uint64_t skip_mode_ssim_distortion = update_full_cost_ssim |
1456 | 0 | ? y_distortion[DIST_SSIM][1] + cb_distortion[DIST_SSIM][1] + cr_distortion[DIST_SSIM][1] |
1457 | 0 | : 0; |
1458 | 0 | const uint64_t skip_mode_cost = RDCOST(lambda, skip_mode_rate, skip_mode_distortion); |
1459 | |
|
1460 | 0 | cand_bf->cand->block_mi.skip_mode = false; |
1461 | 0 | if (skip_mode_cost <= mode_cost) { |
1462 | | // Update candidate cost |
1463 | 0 | mode_cost = skip_mode_cost; |
1464 | 0 | mode_rate = skip_mode_rate; |
1465 | 0 | mode_distortion = skip_mode_distortion; |
1466 | 0 | mode_ssim_distortion = skip_mode_ssim_distortion; |
1467 | 0 | cand_bf->cand->block_mi.skip_mode = true; |
1468 | | |
1469 | | // Update signals to correspond to skip_mode values (no coeffs, etc.) |
1470 | 0 | cand_bf->block_has_coeff = 0; |
1471 | 0 | cand_bf->y_has_coeff = 0; |
1472 | 0 | cand_bf->u_has_coeff = 0; |
1473 | 0 | cand_bf->v_has_coeff = 0; |
1474 | 0 | cand_bf->cnt_nz_coeff = 0; |
1475 | 0 | cand_bf->cand->block_mi.tx_depth = 0; |
1476 | 0 | memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0])); |
1477 | 0 | cand_bf->cand->transform_type_uv = DCT_DCT; |
1478 | 0 | memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData)); |
1479 | 0 | memset(&cand_bf->eob, 0, sizeof(EobData)); |
1480 | 0 | } |
1481 | 0 | } |
1482 | | |
1483 | | // Assign full cost |
1484 | 244k | *(cand_bf->full_cost) = mode_cost; |
1485 | 244k | cand_bf->total_rate = mode_rate; |
1486 | 244k | cand_bf->full_dist = (uint32_t)mode_distortion; |
1487 | 244k | if (update_full_cost_ssim) { |
1488 | 0 | assert(ctx->pd_pass == PD_PASS_1); |
1489 | 0 | assert(ctx->md_stage == MD_STAGE_3); |
1490 | 0 | *(cand_bf->full_cost_ssim) = RDCOST(lambda, mode_rate, mode_ssim_distortion); |
1491 | 0 | } |
1492 | 244k | return; |
1493 | 244k | } |
1494 | | |
1495 | | /************************************************************ |
1496 | | * Coding Loop Context Generation |
1497 | | ************************************************************/ |
1498 | 244k | void svt_aom_coding_loop_context_generation(PictureControlSet* pcs, ModeDecisionContext* ctx) { |
1499 | 244k | BlkStruct* blk_ptr = ctx->blk_ptr; |
1500 | 244k | MacroBlockD* xd = blk_ptr->av1xd; |
1501 | 244k | if (!ctx->shut_fast_rate) { |
1502 | 126k | if (pcs->slice_type == I_SLICE) { |
1503 | 126k | svt_aom_get_kf_y_mode_ctx(xd, &ctx->intra_luma_top_ctx, &ctx->intra_luma_left_ctx); |
1504 | 126k | } |
1505 | 126k | ctx->is_inter_ctx = svt_av1_get_intra_inter_context(xd); |
1506 | 126k | ctx->skip_mode_ctx = av1_get_skip_mode_context(xd); |
1507 | 126k | } |
1508 | | // Collect Neighbor ref cout |
1509 | 245k | if (pcs->slice_type != I_SLICE || pcs->ppcs->frm_hdr.allow_intrabc) { |
1510 | 0 | svt_aom_collect_neighbors_ref_counts_new(blk_ptr->av1xd); |
1511 | 0 | } |
1512 | | |
1513 | | // Skip Coeff Context |
1514 | 244k | ctx->skip_coeff_ctx = ctx->rate_est_ctrls.update_skip_coeff_ctx ? av1_get_skip_context(xd) : 0; |
1515 | 244k | } |
1516 | | |
1517 | 383k | static INLINE int block_signals_txsize(BlockSize bsize) { |
1518 | 383k | return bsize > BLOCK_4X4; |
1519 | 383k | } |
1520 | | |
1521 | 0 | static INLINE int get_vartx_max_txsize(/*const MbModeInfo *xd,*/ BlockSize bsize, int plane) { |
1522 | | /* if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;*/ |
1523 | 0 | const TxSize max_txsize = blocksize_to_txsize[bsize]; |
1524 | 0 | if (plane == 0) { |
1525 | 0 | return max_txsize; // luma |
1526 | 0 | } |
1527 | 0 | return av1_get_adjusted_tx_size(max_txsize); // chroma |
1528 | 0 | } |
1529 | | |
1530 | 0 | static INLINE int max_block_wide(const MacroBlockD* xd, BlockSize bsize, int plane) { |
1531 | 0 | int max_blocks_wide = block_size_wide[bsize]; |
1532 | |
|
1533 | 0 | if (xd->mb_to_right_edge < 0) { |
1534 | 0 | max_blocks_wide += gcc_right_shift(xd->mb_to_right_edge, 3 + !!plane); |
1535 | 0 | } |
1536 | | |
1537 | | // Scale the width in the transform block unit. |
1538 | 0 | return max_blocks_wide >> tx_size_wide_log2[0]; |
1539 | 0 | } |
1540 | | |
1541 | 0 | static INLINE int max_block_high(const MacroBlockD* xd, BlockSize bsize, int plane) { |
1542 | 0 | int max_blocks_high = block_size_high[bsize]; |
1543 | |
|
1544 | 0 | if (xd->mb_to_bottom_edge < 0) { |
1545 | 0 | max_blocks_high += gcc_right_shift(xd->mb_to_bottom_edge, 3 + !!plane); |
1546 | 0 | } |
1547 | | |
1548 | | // Scale the height in the transform block unit. |
1549 | 0 | return max_blocks_high >> tx_size_high_log2[0]; |
1550 | 0 | } |
1551 | | |
1552 | | static INLINE void txfm_partition_update(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, TxSize tx_size, |
1553 | 0 | TxSize txb_size) { |
1554 | 0 | BlockSize bsize = txsize_to_bsize[txb_size]; |
1555 | 0 | assert(bsize < BLOCK_SIZES_ALL); |
1556 | 0 | int bh = mi_size_high[bsize]; |
1557 | 0 | int bw = mi_size_wide[bsize]; |
1558 | 0 | uint8_t txw = tx_size_wide[tx_size]; |
1559 | 0 | uint8_t txh = tx_size_high[tx_size]; |
1560 | 0 | int i; |
1561 | 0 | for (i = 0; i < bh; ++i) { |
1562 | 0 | left_ctx[i] = txh; |
1563 | 0 | } |
1564 | 0 | for (i = 0; i < bw; ++i) { |
1565 | 0 | above_ctx[i] = txw; |
1566 | 0 | } |
1567 | 0 | } |
1568 | | |
1569 | 0 | static INLINE TxSize get_sqr_tx_size(int tx_dim) { |
1570 | 0 | switch (tx_dim) { |
1571 | 0 | case 128: |
1572 | 0 | case 64: |
1573 | 0 | return TX_64X64; |
1574 | 0 | break; |
1575 | 0 | case 32: |
1576 | 0 | return TX_32X32; |
1577 | 0 | break; |
1578 | 0 | case 16: |
1579 | 0 | return TX_16X16; |
1580 | 0 | break; |
1581 | 0 | case 8: |
1582 | 0 | return TX_8X8; |
1583 | 0 | break; |
1584 | 0 | default: |
1585 | 0 | return TX_4X4; |
1586 | 0 | } |
1587 | 0 | } |
1588 | | |
1589 | | static INLINE int txfm_partition_context(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, BlockSize bsize, |
1590 | 0 | TxSize tx_size) { |
1591 | 0 | const uint8_t txw = tx_size_wide[tx_size]; |
1592 | 0 | const uint8_t txh = tx_size_high[tx_size]; |
1593 | 0 | const int above = *above_ctx < txw; |
1594 | 0 | const int left = *left_ctx < txh; |
1595 | 0 | int category = TXFM_PARTITION_CONTEXTS; |
1596 | | |
1597 | | // dummy return, not used by others. |
1598 | 0 | if (tx_size == TX_4X4) { |
1599 | 0 | return 0; |
1600 | 0 | } |
1601 | | |
1602 | 0 | TxSize max_tx_size = get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize])); |
1603 | |
|
1604 | 0 | if (max_tx_size >= TX_8X8) { |
1605 | 0 | category = (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) + |
1606 | 0 | (TX_SIZES - 1 - max_tx_size) * 2; |
1607 | 0 | } |
1608 | 0 | assert(category != TXFM_PARTITION_CONTEXTS); |
1609 | 0 | return category * 3 + above + left; |
1610 | 0 | } |
1611 | | |
1612 | | static uint64_t cost_tx_size_vartx(MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, int depth, int blk_row, |
1613 | | int blk_col, MdRateEstimationContext* md_rate_est_ctx, FRAME_CONTEXT* ec_ctx, |
1614 | 0 | uint8_t allow_update_cdf) { |
1615 | 0 | uint64_t bits = 0; |
1616 | 0 | const int max_blocks_high = max_block_high(xd, mbmi->bsize, 0); |
1617 | 0 | const int max_blocks_wide = max_block_wide(xd, mbmi->bsize, 0); |
1618 | |
|
1619 | 0 | if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) { |
1620 | 0 | return bits; |
1621 | 0 | } |
1622 | | |
1623 | 0 | if (depth == MAX_VARTX_DEPTH) { |
1624 | 0 | txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size); |
1625 | |
|
1626 | 0 | return bits; |
1627 | 0 | } |
1628 | | |
1629 | 0 | const int ctx = txfm_partition_context( |
1630 | 0 | xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, mbmi->bsize, tx_size); |
1631 | 0 | const int write_txfm_partition = (tx_size == tx_depth_to_tx_size[mbmi->block_mi.tx_depth][mbmi->bsize]); |
1632 | 0 | if (write_txfm_partition) { |
1633 | 0 | bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][0]; |
1634 | |
|
1635 | 0 | if (allow_update_cdf) { |
1636 | 0 | update_cdf(ec_ctx->txfm_partition_cdf[ctx], 0, 2); |
1637 | 0 | } |
1638 | |
|
1639 | 0 | txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size); |
1640 | |
|
1641 | 0 | } else { |
1642 | 0 | assert(tx_size < TX_SIZES_ALL); |
1643 | 0 | const TxSize sub_txs = eb_sub_tx_size_map[tx_size]; |
1644 | 0 | const int bsw = eb_tx_size_wide_unit[sub_txs]; |
1645 | 0 | const int bsh = eb_tx_size_high_unit[sub_txs]; |
1646 | |
|
1647 | 0 | bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][1]; |
1648 | |
|
1649 | 0 | if (allow_update_cdf) { |
1650 | 0 | update_cdf(ec_ctx->txfm_partition_cdf[ctx], 1, 2); |
1651 | 0 | } |
1652 | |
|
1653 | 0 | if (sub_txs == TX_4X4) { |
1654 | 0 | txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, sub_txs, tx_size); |
1655 | |
|
1656 | 0 | return bits; |
1657 | 0 | } |
1658 | | |
1659 | 0 | assert(bsw > 0 && bsh > 0); |
1660 | 0 | for (int row = 0; row < eb_tx_size_high_unit[tx_size]; row += bsh) { |
1661 | 0 | for (int col = 0; col < eb_tx_size_wide_unit[tx_size]; col += bsw) { |
1662 | 0 | int offsetr = blk_row + row; |
1663 | 0 | int offsetc = blk_col + col; |
1664 | 0 | bits += cost_tx_size_vartx( |
1665 | 0 | xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, md_rate_est_ctx, ec_ctx, allow_update_cdf); |
1666 | 0 | } |
1667 | 0 | } |
1668 | 0 | } |
1669 | 0 | return bits; |
1670 | 0 | } |
1671 | | |
1672 | 741k | static INLINE void set_txfm_ctx(TXFM_CONTEXT* txfm_ctx, uint8_t txs, int len) { |
1673 | 741k | int i; |
1674 | 2.38M | for (i = 0; i < len; ++i) { |
1675 | 1.64M | txfm_ctx[i] = txs; |
1676 | 1.64M | } |
1677 | 741k | } |
1678 | | |
1679 | 372k | static INLINE void set_txfm_ctxs(TxSize tx_size, int n8_w, int n8_h, int skip, const MacroBlockD* xd) { |
1680 | 372k | uint8_t bw = tx_size_wide[tx_size]; |
1681 | 372k | uint8_t bh = tx_size_high[tx_size]; |
1682 | | |
1683 | 372k | if (skip) { |
1684 | 0 | bw = n8_w * MI_SIZE; |
1685 | 0 | bh = n8_h * MI_SIZE; |
1686 | 0 | } |
1687 | | |
1688 | 372k | set_txfm_ctx(xd->above_txfm_context, bw, n8_w); |
1689 | 372k | set_txfm_ctx(xd->left_txfm_context, bh, n8_h); |
1690 | 372k | } |
1691 | | |
1692 | 11.3k | static INLINE int tx_size_to_depth(TxSize tx_size, BlockSize bsize) { |
1693 | 11.3k | TxSize ctx_size = blocksize_to_txsize[bsize]; |
1694 | 11.3k | int depth = 0; |
1695 | 11.3k | while (tx_size != ctx_size) { |
1696 | 0 | depth++; |
1697 | 0 | ctx_size = eb_sub_tx_size_map[ctx_size]; |
1698 | 0 | assert(depth <= MAX_TX_DEPTH); |
1699 | 0 | } |
1700 | 11.3k | return depth; |
1701 | 11.3k | } |
1702 | | |
1703 | | // Returns a context number for the given MB prediction signal |
1704 | | // The mode info data structure has a one element border above and to the |
1705 | | // left of the entries corresponding to real blocks. |
1706 | | // The prediction flags in these dummy entries are initialized to 0. |
1707 | 11.3k | static INLINE int get_tx_size_context(const MacroBlockD* xd) { |
1708 | 11.3k | const MbModeInfo* mbmi = xd->mi[0]; |
1709 | 11.3k | const MbModeInfo* const above_mbmi = xd->above_mbmi; |
1710 | 11.3k | const MbModeInfo* const left_mbmi = xd->left_mbmi; |
1711 | 11.3k | const TxSize max_tx_size = blocksize_to_txsize[mbmi->bsize]; |
1712 | 11.3k | const int max_tx_wide = tx_size_wide[max_tx_size]; |
1713 | 11.3k | const int max_tx_high = tx_size_high[max_tx_size]; |
1714 | 11.3k | const int has_above = xd->up_available; |
1715 | 11.3k | const int has_left = xd->left_available; |
1716 | | |
1717 | 11.3k | int above = xd->above_txfm_context[0] >= max_tx_wide; |
1718 | 11.3k | int left = xd->left_txfm_context[0] >= max_tx_high; |
1719 | | |
1720 | 11.3k | if (has_above) { |
1721 | 2.84k | if (is_inter_block(&above_mbmi->block_mi)) { |
1722 | 0 | above = block_size_wide[above_mbmi->bsize] >= max_tx_wide; |
1723 | 0 | } |
1724 | 2.84k | } |
1725 | | |
1726 | 11.3k | if (has_left) { |
1727 | 2.81k | if (is_inter_block(&left_mbmi->block_mi)) { |
1728 | 0 | left = block_size_high[left_mbmi->bsize] >= max_tx_high; |
1729 | 0 | } |
1730 | 2.81k | } |
1731 | | |
1732 | 11.3k | if (has_above && has_left) { |
1733 | 148 | return (above + left); |
1734 | 11.1k | } else if (has_above) { |
1735 | 2.69k | return above; |
1736 | 8.49k | } else if (has_left) { |
1737 | 2.66k | return left; |
1738 | 5.83k | } else { |
1739 | 5.83k | return 0; |
1740 | 5.83k | } |
1741 | 11.3k | } |
1742 | | |
1743 | | static uint64_t cost_selected_tx_size(const MacroBlockD* xd, MdRateEstimationContext* md_rate_est_ctx, TxSize tx_size, |
1744 | 11.3k | FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) { |
1745 | 11.3k | const MbModeInfo* const mbmi = xd->mi[0]; |
1746 | 11.3k | const BlockSize bsize = mbmi->bsize; |
1747 | 11.3k | uint64_t bits = 0; |
1748 | | |
1749 | 11.3k | if (block_signals_txsize(bsize)) { |
1750 | 11.3k | const int tx_size_ctx = get_tx_size_context(xd); |
1751 | 11.3k | assert(bsize < BLOCK_SIZES_ALL); |
1752 | 11.3k | const int depth = tx_size_to_depth(tx_size, bsize); |
1753 | 11.3k | const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize); |
1754 | 11.3k | bits += md_rate_est_ctx->tx_size_fac_bits[tx_size_cat][tx_size_ctx][depth]; |
1755 | | |
1756 | 11.3k | if (allow_update_cdf) { |
1757 | 0 | const int max_depths = bsize_to_max_depth(bsize); |
1758 | 0 | assert(depth >= 0 && depth <= max_depths); |
1759 | 0 | assert(!is_inter_block(&mbmi->block_mi)); |
1760 | 0 | assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(/*xd,*/ mbmi))); |
1761 | 0 | update_cdf(ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], depth, max_depths + 1); |
1762 | 0 | } |
1763 | 11.3k | } |
1764 | | |
1765 | 11.3k | return bits; |
1766 | 11.3k | } |
1767 | | |
1768 | | /* Get the TXS rate and update the txfm context. If allow_update_cdf is true, the TX size CDFs will |
1769 | | be updated. */ |
1770 | | uint64_t svt_aom_tx_size_bits(PictureControlSet* pcs, uint8_t segment_id, MdRateEstimationContext* md_rate_est_ctx, |
1771 | | MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, TxMode tx_mode, BlockSize bsize, |
1772 | 372k | uint8_t skip, FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) { |
1773 | 372k | uint64_t bits = 0; |
1774 | 372k | int is_inter_tx = is_inter_block(&mbmi->block_mi); |
1775 | 372k | if (tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) && !(is_inter_tx && skip) && |
1776 | 372k | !svt_av1_is_lossless_segment(pcs, segment_id)) { |
1777 | 11.3k | if (is_inter_tx) { // This implies skip flag is 0. |
1778 | 0 | const TxSize max_tx_size = get_vartx_max_txsize(/*xd,*/ bsize, 0); |
1779 | 0 | const int txbh = eb_tx_size_high_unit[max_tx_size]; |
1780 | 0 | const int txbw = eb_tx_size_wide_unit[max_tx_size]; |
1781 | 0 | const int width = block_size_wide[bsize] >> tx_size_wide_log2[0]; |
1782 | 0 | const int height = block_size_high[bsize] >> tx_size_high_log2[0]; |
1783 | 0 | int idx, idy; |
1784 | 0 | for (idy = 0; idy < height; idy += txbh) { |
1785 | 0 | for (idx = 0; idx < width; idx += txbw) { |
1786 | 0 | bits += cost_tx_size_vartx( |
1787 | 0 | xd, mbmi, max_tx_size, 0, idy, idx, md_rate_est_ctx, ec_ctx, allow_update_cdf); |
1788 | 0 | } |
1789 | 0 | } |
1790 | 11.3k | } else { |
1791 | 11.3k | bits += cost_selected_tx_size(xd, md_rate_est_ctx, tx_size, ec_ctx, allow_update_cdf); |
1792 | 11.3k | set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, 0, xd); |
1793 | 11.3k | } |
1794 | 361k | } else { |
1795 | 361k | set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, skip && is_inter_block(&mbmi->block_mi), xd); |
1796 | 361k | } |
1797 | | |
1798 | 372k | return bits; |
1799 | 372k | } |
1800 | | |
1801 | | /* Get the TXS rate. A dummy txfm context array will be used, so context updates will not be saved for |
1802 | | future blocks. */ |
1803 | | uint64_t svt_aom_get_tx_size_bits(ModeDecisionCandidateBuffer* candidateBuffer, ModeDecisionContext* ctx, |
1804 | 373k | PictureControlSet* pcs, uint8_t tx_depth, bool block_has_coeff) { |
1805 | 373k | NeighborArrayUnit* txfm_context_array = ctx->txfm_context_array; |
1806 | 373k | uint32_t txfm_context_left_index = get_neighbor_array_unit_left_index(txfm_context_array, ctx->blk_org_y); |
1807 | 373k | uint32_t txfm_context_above_index = get_neighbor_array_unit_top_index(txfm_context_array, ctx->blk_org_x); |
1808 | | |
1809 | 373k | TxMode tx_mode = pcs->ppcs->frm_hdr.tx_mode; |
1810 | 373k | MacroBlockD* xd = ctx->blk_ptr->av1xd; |
1811 | 373k | BlockSize bsize = ctx->blk_geom->bsize; |
1812 | 373k | const TxSize tx_size = tx_depth_to_tx_size[tx_depth][bsize]; |
1813 | 373k | MbModeInfo* mbmi = xd->mi[0]; |
1814 | | |
1815 | 373k | svt_memcpy(ctx->above_txfm_context, |
1816 | 373k | &(txfm_context_array->top_array[txfm_context_above_index]), |
1817 | 373k | (ctx->blk_geom->bwidth >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT)); |
1818 | 373k | svt_memcpy(ctx->left_txfm_context, |
1819 | 373k | &(txfm_context_array->left_array[txfm_context_left_index]), |
1820 | 373k | (ctx->blk_geom->bheight >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT)); |
1821 | | |
1822 | 373k | xd->above_txfm_context = ctx->above_txfm_context; |
1823 | 373k | xd->left_txfm_context = ctx->left_txfm_context; |
1824 | 373k | mbmi->bsize = ctx->blk_geom->bsize; |
1825 | 373k | mbmi->block_mi.use_intrabc = candidateBuffer->cand->block_mi.use_intrabc; |
1826 | 373k | mbmi->block_mi.ref_frame[0] = candidateBuffer->cand->block_mi.ref_frame[0]; |
1827 | 373k | mbmi->block_mi.tx_depth = tx_depth; |
1828 | | |
1829 | 373k | const uint64_t bits = svt_aom_tx_size_bits(pcs, |
1830 | 373k | ctx->blk_ptr->segment_id, |
1831 | 373k | ctx->md_rate_est_ctx, |
1832 | 373k | xd, |
1833 | 373k | mbmi, |
1834 | 373k | tx_size, |
1835 | 373k | tx_mode, |
1836 | 373k | bsize, |
1837 | 373k | !block_has_coeff, |
1838 | 373k | NULL, |
1839 | 373k | 0); |
1840 | 373k | return bits; |
1841 | 373k | } |
1842 | | |
1843 | | /* |
1844 | | * av1_partition_rate_cost function is used to generate the rate of signaling the |
1845 | | * partition type for a given block. |
1846 | | */ |
1847 | | int64_t svt_aom_partition_rate_cost(PictureParentControlSet* ppcs, const BlockSize bsize, const int mi_row, |
1848 | | const int mi_col, MdRateEstimationContext* md_rate_est_ctx, PartitionType p, |
1849 | 344k | const PartitionContextType left_ctx, const PartitionContextType above_ctx) { |
1850 | 344k | if (bsize < BLOCK_8X8) { |
1851 | 0 | return 0; |
1852 | 0 | } |
1853 | 344k | assert(bsize < BLOCK_SIZES_ALL && mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]); |
1854 | | |
1855 | 344k | const int hbs = mi_size_wide[bsize] >> 1; |
1856 | 344k | const int has_rows = (mi_row + hbs) < ppcs->av1_cm->mi_rows; |
1857 | 344k | const int has_cols = (mi_col + hbs) < ppcs->av1_cm->mi_cols; |
1858 | | // Don't consider invalid partitions or blocks outside the picture |
1859 | 344k | if (!has_rows && !has_cols) { |
1860 | 840 | return 0; |
1861 | 840 | } |
1862 | | |
1863 | 343k | const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8]; |
1864 | 343k | assert(bsl >= 0); |
1865 | | |
1866 | 343k | const int above = (above_ctx >> bsl) & 1, left = (left_ctx >> bsl) & 1; |
1867 | 343k | const uint32_t context_index = (left * 2 + above) + bsl * PARTITION_PLOFFSET; |
1868 | | |
1869 | 343k | uint64_t split_rate = 0; |
1870 | | |
1871 | 343k | if (has_rows && has_cols) { |
1872 | 327k | split_rate = (uint64_t)md_rate_est_ctx->partition_fac_bits[context_index][p]; |
1873 | 327k | } else if (!has_rows && has_cols) { |
1874 | | // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per |
1875 | | // section 8.3.2 of the AV1 spec (Cdf selection process). Therefore, only update partition ctx 4+, |
1876 | | // which corresponds to the paritition CDFs for 16x16 and larger blocks |
1877 | 8.24k | assert(bsize != BLOCK_8X8); |
1878 | 8.24k | split_rate = bsize == BLOCK_128X128 |
1879 | 8.24k | ? (uint64_t)md_rate_est_ctx->partition_vert_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT] |
1880 | 8.24k | : (uint64_t)md_rate_est_ctx->partition_vert_alike_fac_bits[context_index][p == PARTITION_SPLIT]; |
1881 | 8.24k | } else { |
1882 | | // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per |
1883 | | // section 8.3.2 of the AV1 spec (Cdf selection process). Therefore, only update partition ctx 4+, |
1884 | | // which corresponds to the paritition CDFs for 16x16 and larger blocks |
1885 | 7.75k | assert(bsize != BLOCK_8X8); |
1886 | 7.75k | split_rate = bsize == BLOCK_128X128 |
1887 | 7.75k | ? (uint64_t)md_rate_est_ctx->partition_horz_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT] |
1888 | 7.75k | : (uint64_t)md_rate_est_ctx->partition_horz_alike_fac_bits[context_index][p == PARTITION_SPLIT]; |
1889 | 7.75k | } |
1890 | | |
1891 | 343k | return split_rate; |
1892 | 344k | } |