/work/svt-av1/Source/Lib/Codec/rd_cost.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2019 Intel Corporation |
3 | | * Copyright (c) 2016, Alliance for Open Media. All rights reserved |
4 | | * |
5 | | * This source code is subject to the terms of the BSD 2 Clause License and |
6 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
7 | | * was not distributed with this source code in the LICENSE file, you can |
8 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
9 | | * Media Patent License 1.0 was not distributed with this source code in the |
10 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
11 | | */ |
12 | | |
13 | | /*************************************** |
14 | | * Includes |
15 | | ***************************************/ |
16 | | #include "rd_cost.h" |
17 | | #include "common_utils.h" |
18 | | #include "aom_dsp_rtcd.h" |
19 | | #include "svt_log.h" |
20 | | #include "enc_inter_prediction.h" |
21 | | #include "full_loop.h" |
22 | | #include "entropy_coding.h" |
23 | | |
24 | | #include <assert.h> |
25 | | |
26 | 0 | #define MV_COST_WEIGHT 108 |
27 | | int svt_aom_get_reference_mode_context_new(const MacroBlockD* xd); |
28 | | int svt_av1_get_pred_context_uni_comp_ref_p(const MacroBlockD* xd); |
29 | | int svt_av1_get_pred_context_uni_comp_ref_p1(const MacroBlockD* xd); |
30 | | int svt_av1_get_pred_context_uni_comp_ref_p2(const MacroBlockD* xd); |
31 | | int svt_aom_get_comp_reference_type_context_new(const MacroBlockD* xd); |
32 | | |
33 | | int svt_aom_get_palette_bsize_ctx(BlockSize bsize); |
34 | | int svt_aom_get_palette_mode_ctx(const MacroBlockD* xd); |
35 | | int svt_aom_write_uniform_cost(int n, int v); |
36 | | int svt_get_palette_cache_y(const MacroBlockD* const xd, uint16_t* cache); |
37 | | int svt_av1_palette_color_cost_y(const PaletteModeInfo* const pmi, uint16_t* color_cache, const int palette_size, |
38 | | int n_cache, int bit_depth); |
39 | | int svt_av1_cost_color_map(ModeDecisionCandidate* cand, MdRateEstimationContext* rate_table, |
40 | | |
41 | | BlkStruct* blk_ptr, int plane, BlockSize bsize, COLOR_MAP_TYPE type); |
42 | | void svt_aom_get_block_dimensions(BlockSize bsize, int plane, const MacroBlockD* xd, int* width, int* height, |
43 | | int* rows_within_bounds, int* cols_within_bounds); |
44 | | int svt_aom_allow_palette(int allow_screen_content_tools, BlockSize bsize); |
45 | | int svt_aom_allow_intrabc(const FrameHeader* frm_hdr, SliceType slice_type); |
46 | | |
47 | 0 | MvJointType svt_av1_get_mv_joint(const Mv* mv) { |
48 | 0 | if (mv->y == 0) { |
49 | 0 | return mv->x == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ; |
50 | 0 | } else { |
51 | 0 | return mv->x == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ; |
52 | 0 | } |
53 | 0 | } |
54 | | |
55 | 0 | static int32_t mv_cost(const Mv* mv, const int32_t* joint_cost, const int32_t* const comp_cost[2]) { |
56 | 0 | int32_t jn_c = svt_av1_get_mv_joint(mv); |
57 | 0 | int32_t res = joint_cost[jn_c] + comp_cost[0][CLIP3(MV_LOW, MV_UPP, mv->y)] + |
58 | 0 | comp_cost[1][CLIP3(MV_LOW, MV_UPP, mv->x)]; |
59 | 0 | return res; |
60 | 0 | } |
61 | | |
62 | 0 | int32_t svt_av1_mv_bit_cost_light(const Mv* mv, const Mv* ref) { |
63 | 0 | const uint32_t factor = 50; |
64 | 0 | const uint32_t absmvdiffx = ABS(mv->x - ref->x); |
65 | 0 | const uint32_t absmvdiffy = ABS(mv->y - ref->y); |
66 | 0 | const uint32_t mv_rate = 1296 + (factor * (absmvdiffx + absmvdiffy)); |
67 | 0 | return mv_rate; |
68 | 0 | } |
69 | | |
70 | | int32_t svt_av1_mv_bit_cost(const Mv* mv, const Mv* ref, const int32_t* mvjcost, const int32_t* const mvcost[2], |
71 | 0 | int32_t weight) { |
72 | | // Restrict the size of the MV diff to be within the max AV1 range. If the MV diff |
73 | | // is outside this range, the diff will index beyond the cost array, causing a seg fault. |
74 | | // Both the MVs and the MV diffs should be within the allowable range for accessing the MV cost |
75 | | // infrastructure. |
76 | 0 | const int16_t x = MIN(MAX(mv->x - ref->x, MV_LOW), MV_UPP); |
77 | 0 | const int16_t y = MIN(MAX(mv->y - ref->y, MV_LOW), MV_UPP); |
78 | 0 | Mv temp_diff = {{x, y}}; |
79 | |
|
80 | 0 | return ROUND_POWER_OF_TWO(mv_cost(&temp_diff, mvjcost, mvcost) * weight, 7); |
81 | 0 | } |
82 | | |
83 | | /////////////////////////////COEFFICIENT CALCULATION ////////////////////////////////////////////// |
84 | 10.5k | static INLINE int32_t get_golomb_cost(int32_t abs_qc) { |
85 | 10.5k | if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
86 | 10.5k | const int32_t r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS; |
87 | 10.5k | const int32_t length = get_msb(r) + 1; |
88 | 10.5k | return av1_cost_literal(2 * length - 1); |
89 | 10.5k | } |
90 | 0 | return 0; |
91 | 10.5k | } |
92 | | |
93 | | void svt_av1_txb_init_levels_c(const TranLow* const coeff, const int32_t width, const int32_t height, |
94 | 0 | uint8_t* const levels) { |
95 | 0 | uint8_t* ls = levels; |
96 | |
|
97 | 0 | for (int32_t i = 0; i < height; i++) { |
98 | 0 | for (int32_t j = 0; j < width; j++) { |
99 | 0 | *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX); |
100 | 0 | } |
101 | 0 | for (int32_t j = 0; j < TX_PAD_HOR; j++) { |
102 | 0 | *ls++ = 0; |
103 | 0 | } |
104 | 0 | } |
105 | 0 | } |
106 | | |
107 | | static int32_t av1_transform_type_rate_estimation(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* fc, |
108 | | ModeDecisionCandidateBuffer* cand_bf, bool is_inter, |
109 | | TxSize transform_size, TxType transform_type, |
110 | 0 | bool reduced_tx_set_used) { |
111 | | // const MbModeInfo *mbmi = &xd->mi[0]->mbmi; |
112 | | // const int32_t is_inter = is_inter_block(mbmi); |
113 | |
|
114 | 0 | if (get_ext_tx_types(transform_size, is_inter, reduced_tx_set_used) > |
115 | 0 | 1 /*&& !xd->lossless[xd->mi[0]->mbmi.segment_id] WE ARE NOT LOSSLESS*/) { |
116 | 0 | const TxSize square_tx_size = txsize_sqr_map[transform_size]; |
117 | 0 | assert(square_tx_size < EXT_TX_SIZES); |
118 | |
|
119 | 0 | const int32_t ext_tx_set = get_ext_tx_set(transform_size, is_inter, reduced_tx_set_used); |
120 | 0 | if (is_inter) { |
121 | 0 | if (ext_tx_set > 0) { |
122 | 0 | if (allow_update_cdf) { |
123 | 0 | const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used); |
124 | |
|
125 | 0 | update_cdf(fc->inter_ext_tx_cdf[ext_tx_set][square_tx_size], |
126 | 0 | av1_ext_tx_ind[tx_set_type][transform_type], |
127 | 0 | av1_num_ext_tx_set[tx_set_type]); |
128 | 0 | } |
129 | 0 | return ctx->md_rate_est_ctx->inter_tx_type_fac_bits[ext_tx_set][square_tx_size][transform_type]; |
130 | 0 | } |
131 | 0 | } else { |
132 | 0 | if (ext_tx_set > 0) { |
133 | 0 | PredictionMode intra_dir; |
134 | 0 | if (cand_bf->cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) { |
135 | 0 | intra_dir = fimode_to_intradir[cand_bf->cand->block_mi.filter_intra_mode]; |
136 | 0 | } else { |
137 | 0 | intra_dir = cand_bf->cand->block_mi.mode; |
138 | 0 | } |
139 | 0 | assert(intra_dir < INTRA_MODES); |
140 | 0 | const TxSetType tx_set_type = get_ext_tx_set_type(transform_size, is_inter, reduced_tx_set_used); |
141 | |
|
142 | 0 | if (allow_update_cdf) { |
143 | 0 | update_cdf(fc->intra_ext_tx_cdf[ext_tx_set][square_tx_size][intra_dir], |
144 | 0 | av1_ext_tx_ind[tx_set_type][transform_type], |
145 | 0 | av1_num_ext_tx_set[tx_set_type]); |
146 | 0 | } |
147 | 0 | return ctx->md_rate_est_ctx |
148 | 0 | ->intra_tx_type_fac_bits[ext_tx_set][square_tx_size][intra_dir][transform_type]; |
149 | 0 | } |
150 | 0 | } |
151 | 0 | } |
152 | 0 | return 0; |
153 | 0 | } |
154 | | |
155 | | // Update the eob-related CDFs. Function assumes allow_update_cdf is true |
156 | | // as the only action of the function is to update the CDFs. |
157 | 0 | static void update_eob_context(int eob, TxSize tx_size, TxClass tx_class, PlaneType plane, FRAME_CONTEXT* ec_ctx) { |
158 | 0 | int eob_extra; |
159 | 0 | const int eob_pt = get_eob_pos_token(eob, &eob_extra); |
160 | 0 | const TxSize txs_ctx = (TxSize)((txsize_sqr_map[tx_size] + txsize_sqr_up_map[tx_size] + 1) >> 1); |
161 | 0 | assert(txs_ctx < TX_SIZES); |
162 | 0 | const int eob_multi_size = txsize_log2_minus4[tx_size]; |
163 | 0 | const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; |
164 | |
|
165 | 0 | switch (eob_multi_size) { |
166 | 0 | case 0: |
167 | 0 | update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5); |
168 | 0 | break; |
169 | 0 | case 1: |
170 | 0 | update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6); |
171 | 0 | break; |
172 | 0 | case 2: |
173 | 0 | update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7); |
174 | 0 | break; |
175 | 0 | case 3: |
176 | 0 | update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1, 8); |
177 | 0 | break; |
178 | 0 | case 4: |
179 | 0 | update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1, 9); |
180 | 0 | break; |
181 | 0 | case 5: |
182 | 0 | update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1, 10); |
183 | 0 | break; |
184 | 0 | case 6: |
185 | 0 | default: |
186 | 0 | update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1, 11); |
187 | 0 | break; |
188 | 0 | } |
189 | | |
190 | 0 | if (eob_pt > 2) { |
191 | 0 | const int cnt = eob_pt - 3; |
192 | 0 | const int bit = (eob_extra >> cnt) & 1; |
193 | 0 | update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][cnt], bit, 2); |
194 | 0 | } |
195 | 0 | } |
196 | | |
197 | | // Transform end of block bit estimation |
198 | 24.1k | int get_eob_cost(int eob, const LvMapEobCost* txb_eob_costs, const LvMapCoeffCost* txb_costs, TxClass tx_class) { |
199 | 24.1k | int eob_extra; |
200 | 24.1k | const int eob_pt = get_eob_pos_token(eob, &eob_extra); |
201 | 24.1k | const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; |
202 | 24.1k | int eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1]; |
203 | | |
204 | 24.1k | if (eob_pt > 2) { |
205 | 0 | const int cnt = eob_pt - 3; |
206 | 0 | const int bit = (eob_extra >> cnt) & 1; |
207 | 0 | eob_cost += txb_costs->eob_extra_cost[cnt][bit]; |
208 | 0 | eob_cost += av1_cost_literal(cnt); |
209 | 0 | } |
210 | 24.1k | return eob_cost; |
211 | 24.1k | } |
212 | | |
213 | | static INLINE int32_t av1_cost_skip_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx, |
214 | 278k | TxSize transform_size, PlaneType plane_type, int16_t txb_skip_ctx) { |
215 | 278k | const TxSize txs_ctx = (TxSize)((txsize_sqr_map[transform_size] + txsize_sqr_up_map[transform_size] + 1) >> 1); |
216 | 278k | assert(txs_ctx < TX_SIZES); |
217 | 278k | const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type]; |
218 | 278k | if (allow_update_cdf) { |
219 | 0 | update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], 1, 2); |
220 | 0 | } |
221 | 278k | return coeff_costs->txb_skip_cost[txb_skip_ctx][1]; |
222 | 278k | } |
223 | | |
224 | | static INLINE int32_t av1_cost_coeffs_txb_loop_cost_one_eob(const TranLow* const qcoeff, int8_t* const coeff_contexts, |
225 | 12.4k | const LvMapCoeffCost* coeff_costs, int16_t dc_sign_ctx) { |
226 | 12.4k | const TranLow v = qcoeff[0]; |
227 | 12.4k | const int32_t level = abs(v); |
228 | 12.4k | const int32_t coeff_ctx = coeff_contexts[0]; |
229 | | |
230 | 12.4k | assert((AOMMIN(level, 3) - 1) >= 0); |
231 | 12.4k | int32_t cost = coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; |
232 | | |
233 | 12.4k | if (v != 0) { |
234 | 12.4k | const int32_t sign = (v < 0) ? 1 : 0; |
235 | | // sign bit cost |
236 | 12.4k | cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign]; |
237 | | |
238 | 12.4k | if (level > NUM_BASE_LEVELS) { |
239 | 11.3k | const int32_t base_range = level - 1 - NUM_BASE_LEVELS; |
240 | | |
241 | 11.3k | if (base_range < COEFF_BASE_RANGE) { |
242 | 780 | cost += coeff_costs->lps_cost[0][base_range]; |
243 | 10.5k | } else { |
244 | 10.5k | cost += coeff_costs->lps_cost[0][COEFF_BASE_RANGE]; |
245 | 10.5k | } |
246 | | |
247 | 11.3k | if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
248 | 10.5k | cost += get_golomb_cost(level); |
249 | 10.5k | } |
250 | 11.3k | } |
251 | 12.4k | } |
252 | 12.4k | return cost; |
253 | 12.4k | } |
254 | | |
255 | | static INLINE int32_t av1_cost_coeffs_txb_loop_cost_eob(ModeDecisionContext* md_ctx, uint16_t eob, |
256 | | const int16_t* const scan, const TranLow* const qcoeff, |
257 | | int8_t* const coeff_contexts, const LvMapCoeffCost* coeff_costs, |
258 | | int16_t dc_sign_ctx, uint8_t* const levels, const int32_t bwl, |
259 | 12.4k | TxType transform_type) { |
260 | 12.4k | const uint32_t cost_literal = av1_cost_literal(1); |
261 | 12.4k | int32_t cost = 0; |
262 | | |
263 | | //Optimized/simplified function when eob is 1 |
264 | 12.4k | if (eob == 1) { |
265 | 12.4k | return av1_cost_coeffs_txb_loop_cost_one_eob(qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx); |
266 | 12.4k | } |
267 | | |
268 | | // first (eob - 1) index |
269 | 18.4E | { |
270 | 18.4E | const int32_t pos = scan[eob - 1]; |
271 | 18.4E | const TranLow v = qcoeff[pos]; |
272 | 18.4E | const int32_t level = abs(v); |
273 | 18.4E | const int32_t coeff_ctx = coeff_contexts[pos]; |
274 | | |
275 | 18.4E | assert((AOMMIN(level, 3) - 1) >= 0); |
276 | 18.4E | cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; |
277 | | |
278 | 18.4E | if (v != 0) { |
279 | 0 | cost += cost_literal; |
280 | 0 | if (level > NUM_BASE_LEVELS) { |
281 | 0 | int32_t ctx = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]); |
282 | 0 | const int32_t base_range = level - 1 - NUM_BASE_LEVELS; |
283 | |
|
284 | 0 | if (base_range < COEFF_BASE_RANGE) { |
285 | 0 | cost += coeff_costs->lps_cost[ctx][base_range]; |
286 | 0 | } else { |
287 | 0 | cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE]; |
288 | 0 | } |
289 | |
|
290 | 0 | if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
291 | 0 | cost += get_golomb_cost(level); |
292 | 0 | } |
293 | 0 | } |
294 | 0 | } |
295 | 18.4E | } |
296 | | // last (0) index |
297 | 18.4E | { |
298 | 18.4E | const TranLow v = qcoeff[0]; |
299 | 18.4E | const int32_t level = abs(v); |
300 | 18.4E | const int32_t coeff_ctx = coeff_contexts[0]; |
301 | | |
302 | 18.4E | cost += coeff_costs->base_cost[coeff_ctx][AOMMIN(level, 3)]; |
303 | | |
304 | 18.4E | if (v != 0) { |
305 | 0 | const int32_t sign = (v < 0) ? 1 : 0; |
306 | | // sign bit cost |
307 | |
|
308 | 0 | cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign]; |
309 | |
|
310 | 0 | if (level > NUM_BASE_LEVELS) { |
311 | 0 | int32_t ctx = get_br_ctx(levels, 0, bwl, tx_type_to_class[transform_type]); |
312 | 0 | const int32_t base_range = level - 1 - NUM_BASE_LEVELS; |
313 | |
|
314 | 0 | if (base_range < COEFF_BASE_RANGE) { |
315 | 0 | cost += coeff_costs->lps_cost[ctx][base_range]; |
316 | 0 | } else { |
317 | 0 | cost += coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE]; |
318 | 0 | } |
319 | |
|
320 | 0 | if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { |
321 | 0 | cost += get_golomb_cost(level); |
322 | 0 | } |
323 | 0 | } |
324 | 0 | } |
325 | 18.4E | } |
326 | 18.4E | int32_t c; |
327 | | /* Optimized Loop, omitted first (eob - 1) and last (0) index */ |
328 | | // Estimate the rate of the first(eob / fast_coeff_est_level) coeff(s), DC and last coeff only |
329 | 18.4E | int32_t c_start = MIN(eob - 2, eob / MAX(1, (int)(md_ctx->mds_fast_coeff_est_level - md_ctx->mds_subres_step))); |
330 | 18.4E | uint32_t cost_literal_cnt = 0; |
331 | 18.4E | for (c = c_start; c >= 1; --c) { |
332 | 0 | const int32_t pos = scan[c]; |
333 | 0 | cost_literal_cnt += !!(qcoeff[pos]); |
334 | 0 | const int32_t level = abs(qcoeff[pos]); |
335 | 0 | if (level > NUM_BASE_LEVELS) { |
336 | 0 | int32_t ctx = get_br_ctx(levels, pos, bwl, tx_type_to_class[transform_type]); |
337 | 0 | const int32_t base_range = level - 1 - NUM_BASE_LEVELS; |
338 | |
|
339 | 0 | cost += coeff_costs->base_cost[coeff_contexts[pos]][3]; |
340 | 0 | if (base_range < COEFF_BASE_RANGE) { |
341 | 0 | cost += coeff_costs->lps_cost[ctx][base_range]; |
342 | 0 | } else { |
343 | 0 | cost += get_golomb_cost(level) + coeff_costs->lps_cost[ctx][COEFF_BASE_RANGE]; |
344 | 0 | } |
345 | 0 | } else { |
346 | 0 | cost += coeff_costs->base_cost[coeff_contexts[pos]][level]; |
347 | 0 | } |
348 | 0 | } |
349 | 18.4E | cost += cost_literal_cnt * cost_literal; |
350 | | |
351 | 18.4E | return cost; |
352 | 12.4k | } |
353 | | |
354 | | // Note: don't call this function when eob is 0. |
355 | | uint64_t svt_av1_cost_coeffs_txb(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx, |
356 | | ModeDecisionCandidateBuffer* cand_bf, const TranLow* const qcoeff, uint16_t eob, |
357 | | PlaneType plane_type, TxSize transform_size, TxType transform_type, |
358 | | int16_t txb_skip_ctx, int16_t dc_sign_ctx, bool reduced_transform_set_flag) |
359 | | |
360 | 12.4k | { |
361 | | //Note: there is a different version of this function in AOM that seems to be efficient as its name is: |
362 | | //warehouse_efficients_txb |
363 | | |
364 | 12.4k | const TxSize txs_ctx = get_txsize_entropy_ctx(transform_size); |
365 | 12.4k | const TxClass tx_class = tx_type_to_class[transform_type]; |
366 | 12.4k | int32_t cost; |
367 | 12.4k | const int32_t bwl = get_txb_bwl(transform_size); |
368 | 12.4k | const int32_t width = get_txb_wide(transform_size); |
369 | 12.4k | const int32_t height = get_txb_high(transform_size); |
370 | | |
371 | 12.4k | const ScanOrder* const scan_order = get_scan_order(transform_size, transform_type); |
372 | 12.4k | const int16_t* const scan = scan_order->scan; |
373 | 12.4k | uint8_t* const levels = set_levels(ctx->md_levels_buf, width, height); |
374 | 12.4k | int8_t* const coeff_contexts = ctx->md_coeff_contexts; |
375 | 12.4k | assert(txs_ctx < TX_SIZES); |
376 | 12.4k | const LvMapCoeffCost* const coeff_costs = &ctx->md_rate_est_ctx->coeff_fac_bits[txs_ctx][plane_type]; |
377 | | |
378 | 12.4k | const int32_t eob_multi_size = txsize_log2_minus4[transform_size]; |
379 | 12.4k | const LvMapEobCost* const eob_bits = &ctx->md_rate_est_ctx->eob_frac_bits[eob_multi_size][plane_type]; |
380 | | // eob must be greater than 0 here. |
381 | 12.4k | assert(eob > 0); |
382 | 12.4k | cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0]; |
383 | | |
384 | 12.4k | if (allow_update_cdf) { |
385 | 0 | update_cdf(ec_ctx->txb_skip_cdf[txs_ctx][txb_skip_ctx], eob == 0, 2); |
386 | 0 | } |
387 | | |
388 | 12.4k | if (eob > 1) { |
389 | 0 | svt_av1_txb_init_levels(qcoeff, |
390 | 0 | width, |
391 | 0 | height, |
392 | 0 | levels); // NM - Needs to be optimized - to be combined with the quantisation. |
393 | 0 | } |
394 | 12.4k | const bool is_inter = is_inter_mode(cand_bf->cand->block_mi.mode); |
395 | | // Transform type bit estimation |
396 | 12.4k | cost += plane_type > PLANE_TYPE_Y ? 0 |
397 | 12.4k | : av1_transform_type_rate_estimation(ctx, |
398 | 0 | allow_update_cdf, |
399 | 0 | ec_ctx, |
400 | 0 | cand_bf, |
401 | 0 | is_inter, |
402 | 0 | transform_size, |
403 | 0 | transform_type, |
404 | 0 | reduced_transform_set_flag); |
405 | | |
406 | | // Transform eob bit estimation |
407 | 12.4k | cost += get_eob_cost(eob, eob_bits, coeff_costs, tx_class); |
408 | 12.4k | if (allow_update_cdf) { |
409 | 0 | update_eob_context(eob, transform_size, tx_class, plane_type, ec_ctx); |
410 | 0 | } |
411 | | // Transform non-zero coeff bit estimation |
412 | 12.4k | svt_av1_get_nz_map_contexts(levels, |
413 | 12.4k | scan, |
414 | 12.4k | eob, |
415 | 12.4k | transform_size, |
416 | 12.4k | tx_class, |
417 | 12.4k | coeff_contexts); // NM - Assembly version is available in AOM |
418 | 12.4k | assert(eob <= width * height); |
419 | 12.4k | if (allow_update_cdf) { |
420 | 0 | for (int c = eob - 1; c >= 0; --c) { |
421 | 0 | const int pos = scan[c]; |
422 | 0 | const int coeff_ctx = coeff_contexts[pos]; |
423 | 0 | const TranLow v = qcoeff[pos]; |
424 | 0 | const TranLow level = abs(v); |
425 | 0 | if (c == eob - 1) { |
426 | 0 | assert(coeff_ctx < 4); |
427 | 0 | update_cdf(ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3) - 1, 3); |
428 | 0 | } else { |
429 | 0 | update_cdf(ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], AOMMIN(level, 3), 4); |
430 | 0 | } |
431 | |
|
432 | 0 | { |
433 | 0 | if (c == eob - 1) { |
434 | 0 | assert(coeff_ctx < 4); |
435 | 0 | } |
436 | 0 | } |
437 | |
|
438 | 0 | if (level > NUM_BASE_LEVELS) { |
439 | 0 | const int base_range = level - 1 - NUM_BASE_LEVELS; |
440 | 0 | int br_ctx; |
441 | 0 | if (eob == 1) { |
442 | 0 | br_ctx = 0; |
443 | 0 | } else { |
444 | 0 | br_ctx = get_br_ctx(levels, pos, bwl, tx_class); |
445 | 0 | } |
446 | |
|
447 | 0 | for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { |
448 | 0 | const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); |
449 | 0 | update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx], k, BR_CDF_SIZE); |
450 | 0 | for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) { |
451 | 0 | if (lps == k) { |
452 | 0 | break; |
453 | 0 | } |
454 | 0 | } |
455 | 0 | if (k < BR_CDF_SIZE - 1) { |
456 | 0 | break; |
457 | 0 | } |
458 | 0 | } |
459 | 0 | } |
460 | 0 | } |
461 | |
|
462 | 0 | if (qcoeff[0] != 0) { |
463 | 0 | update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], qcoeff[0] < 0, 2); |
464 | 0 | } |
465 | | |
466 | | //TODO: CHKN for 128x128 where we need more than one TXb, we need to update the txb_context(dc_sign+skip_ctx) in a Txb basis. |
467 | |
|
468 | 0 | return 0; |
469 | 0 | } |
470 | | |
471 | 12.4k | cost += av1_cost_coeffs_txb_loop_cost_eob( |
472 | 12.4k | ctx, eob, scan, qcoeff, coeff_contexts, coeff_costs, dc_sign_ctx, levels, bwl, transform_type); |
473 | 12.4k | return cost; |
474 | 12.4k | } |
475 | | |
476 | | uint64_t svt_aom_get_intra_uv_fast_rate(PictureControlSet* pcs, ModeDecisionContext* ctx, |
477 | 145k | ModeDecisionCandidateBuffer* cand_bf, bool use_accurate_cfl) { |
478 | 145k | const BlockGeom* const blk_geom = ctx->blk_geom; |
479 | 145k | ModeDecisionCandidate* cand = cand_bf->cand; |
480 | 145k | assert(ctx->has_uv); |
481 | 145k | assert(!(svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc)); |
482 | 145k | MdRateEstimationContext* md_rate_est_ctx = ctx->md_rate_est_ctx; |
483 | 145k | const uint8_t is_cfl_allowed = (blk_geom->bwidth <= 32 && blk_geom->bheight <= 32) ? 1 : 0; |
484 | 145k | PredictionMode intra_mode = (PredictionMode)cand->block_mi.mode; |
485 | | // If CFL alphas are not known yet, calculate the chroma mode bits based on DC Mode. If CFL is selected the chroma mode bits must be updated later |
486 | 145k | const UvPredictionMode chroma_mode = cand->block_mi.uv_mode == UV_CFL_PRED && !use_accurate_cfl |
487 | 145k | ? UV_DC_PRED |
488 | 145k | : cand->block_mi.uv_mode; |
489 | 145k | const uint32_t mi_row = ctx->blk_org_y >> MI_SIZE_LOG2; |
490 | 145k | const uint32_t mi_col = ctx->blk_org_x >> MI_SIZE_LOG2; |
491 | | // Subsampling assumes YUV 420 content |
492 | 145k | const uint8_t ss_x = 1; |
493 | 145k | const uint8_t ss_y = 1; |
494 | | |
495 | 145k | uint64_t chroma_rate = 0; |
496 | | // Estimate chroma nominal intra mode bits |
497 | 145k | chroma_rate += (uint64_t)md_rate_est_ctx->intra_uv_mode_fac_bits[is_cfl_allowed][intra_mode][chroma_mode]; |
498 | | |
499 | | // Estimate chroma angular mode bits; angular offset only allow for bsize >= 8x8 |
500 | 145k | if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(get_uv_mode(chroma_mode))) { |
501 | 0 | chroma_rate += |
502 | 0 | md_rate_est_ctx->angle_delta_fac_bits[chroma_mode - V_PRED] |
503 | 0 | [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_UV]]; |
504 | 0 | } |
505 | | |
506 | | // Estimate CFL factor bits when CFL is used |
507 | 145k | if (chroma_mode == UV_CFL_PRED) { |
508 | 0 | chroma_rate += (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_U] |
509 | 0 | [CFL_IDX_U(cand->block_mi.cfl_alpha_idx)] + |
510 | 0 | (uint64_t)md_rate_est_ctx->cfl_alpha_fac_bits[cand->block_mi.cfl_alpha_signs][CFL_PRED_V] |
511 | 0 | [CFL_IDX_V(cand->block_mi.cfl_alpha_idx)]; |
512 | 0 | } |
513 | | |
514 | | // Estimate chroma palette mode bits (currently not supported, so just cost of signalling off) |
515 | 145k | if (chroma_mode == UV_DC_PRED && |
516 | 145k | svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) && |
517 | 0 | is_chroma_reference(mi_row, mi_col, blk_geom->bsize, ss_x, ss_y)) { |
518 | 0 | const int use_palette_y = cand->palette_info && (cand->palette_size[0] > 0); |
519 | 0 | const int use_palette_uv = cand->palette_info && (cand->palette_size[1] > 0); |
520 | 0 | chroma_rate += ctx->md_rate_est_ctx->palette_uv_mode_fac_bits[use_palette_y][use_palette_uv]; |
521 | 0 | } |
522 | | |
523 | 145k | return chroma_rate; |
524 | 145k | } |
525 | | |
526 | | uint64_t svt_aom_intra_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
527 | 146k | uint64_t lambda, uint64_t luma_distortion) { |
528 | 146k | const BlockGeom* blk_geom = ctx->blk_geom; |
529 | 146k | BlkStruct* blk_ptr = ctx->blk_ptr; |
530 | 146k | ModeDecisionCandidate* cand = cand_bf->cand; |
531 | 146k | if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type) && cand->block_mi.use_intrabc) { |
532 | 0 | uint64_t rate = 0; |
533 | |
|
534 | 0 | Mv mv = {.as_int = cand->block_mi.mv[0].as_int}; |
535 | 0 | Mv ref_mv = {.as_int = cand->pred_mv[0].as_int}; |
536 | 0 | const int* dvcost[2] = {(int*)&ctx->md_rate_est_ctx->dv_cost[0][MV_MAX], |
537 | 0 | (int*)&ctx->md_rate_est_ctx->dv_cost[1][MV_MAX]}; |
538 | 0 | int32_t mv_rate = svt_av1_mv_bit_cost( |
539 | 0 | &mv, &ref_mv, ctx->md_rate_est_ctx->dv_joint_cost, dvcost, MV_COST_WEIGHT_SUB); |
540 | |
|
541 | 0 | rate = mv_rate + ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc]; |
542 | 0 | cand_bf->fast_luma_rate = rate; |
543 | 0 | cand_bf->fast_chroma_rate = 0; |
544 | 0 | return (RDCOST(lambda, rate, luma_distortion)); |
545 | 146k | } else { |
546 | | // Number of bits for each synatax element |
547 | 146k | uint64_t intra_mode_bits_num = 0; |
548 | 146k | uint64_t intra_luma_mode_bits_num = 0; |
549 | 146k | uint64_t intra_luma_ang_mode_bits_num = 0; |
550 | 146k | uint64_t intra_filter_mode_bits_num = 0; |
551 | 146k | uint64_t skip_mode_rate = 0; |
552 | 146k | const uint8_t skip_mode_ctx = ctx->skip_mode_ctx; |
553 | 146k | PredictionMode intra_mode = (PredictionMode)cand->block_mi.mode; |
554 | | // Luma and chroma rate |
555 | 146k | uint32_t rate; |
556 | 146k | uint32_t luma_rate = 0; |
557 | 146k | uint32_t chroma_rate = 0; |
558 | 146k | intra_mode_bits_num = pcs->slice_type != I_SLICE |
559 | 146k | ? (uint64_t)ctx->md_rate_est_ctx->mb_mode_fac_bits[eb_size_group_lookup[blk_geom->bsize]][intra_mode] |
560 | 146k | : ZERO_COST; |
561 | | |
562 | 146k | skip_mode_rate = pcs->slice_type != I_SLICE && pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && |
563 | 0 | is_comp_ref_allowed(blk_geom->bsize) |
564 | 146k | ? (uint64_t)ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0] |
565 | 146k | : ZERO_COST; |
566 | | // Estimate luma nominal intra mode bits for key frame |
567 | 146k | intra_luma_mode_bits_num = pcs->slice_type == I_SLICE |
568 | 146k | ? (uint64_t) |
569 | 146k | ctx->md_rate_est_ctx->y_mode_fac_bits[ctx->intra_luma_top_ctx][ctx->intra_luma_left_ctx][intra_mode] |
570 | 146k | : ZERO_COST; |
571 | | // Estimate luma angular mode bits |
572 | 146k | if (blk_geom->bsize >= BLOCK_8X8 && av1_is_directional_mode(cand->block_mi.mode)) { |
573 | 0 | assert((intra_mode - V_PRED) < 8); |
574 | 0 | assert((intra_mode - V_PRED) >= 0); |
575 | 0 | intra_luma_ang_mode_bits_num = |
576 | 0 | ctx->md_rate_est_ctx->angle_delta_fac_bits[intra_mode - V_PRED] |
577 | 0 | [MAX_ANGLE_DELTA + cand->block_mi.angle_delta[PLANE_TYPE_Y]]; |
578 | 0 | } |
579 | 146k | if (svt_aom_allow_palette(pcs->ppcs->frm_hdr.allow_screen_content_tools, blk_geom->bsize) && |
580 | 0 | intra_mode == DC_PRED) { |
581 | 0 | const int use_palette = cand->palette_info ? (cand->palette_size[0] > 0) : 0; |
582 | 0 | const int bsize_ctx = svt_aom_get_palette_bsize_ctx(blk_geom->bsize); |
583 | 0 | const int mode_ctx = svt_aom_get_palette_mode_ctx(blk_ptr->av1xd); |
584 | 0 | intra_luma_mode_bits_num += ctx->md_rate_est_ctx->palette_ymode_fac_bits[bsize_ctx][mode_ctx][use_palette]; |
585 | 0 | if (use_palette) { |
586 | 0 | const uint8_t* const color_map = cand->palette_info->color_idx_map; |
587 | 0 | int block_width, block_height, rows, cols; |
588 | 0 | svt_aom_get_block_dimensions( |
589 | 0 | blk_geom->bsize, 0, blk_ptr->av1xd, &block_width, &block_height, &rows, &cols); |
590 | 0 | const int plt_size = cand->palette_size[0]; |
591 | 0 | int palette_mode_cost = |
592 | 0 | ctx->md_rate_est_ctx->palette_ysize_fac_bits[bsize_ctx][plt_size - PALETTE_MIN_SIZE] + |
593 | 0 | svt_aom_write_uniform_cost(plt_size, color_map[0]); |
594 | 0 | uint16_t color_cache[2 * PALETTE_MAX_SIZE]; |
595 | 0 | const int n_cache = svt_get_palette_cache_y(blk_ptr->av1xd, color_cache); |
596 | 0 | palette_mode_cost += svt_av1_palette_color_cost_y(&cand->palette_info->pmi, |
597 | 0 | color_cache, |
598 | 0 | cand->palette_size[0], |
599 | 0 | n_cache, |
600 | 0 | pcs->ppcs->scs->encoder_bit_depth); |
601 | 0 | palette_mode_cost += svt_av1_cost_color_map( |
602 | 0 | cand, ctx->md_rate_est_ctx, blk_ptr, 0, blk_geom->bsize, PALETTE_MAP); |
603 | 0 | intra_luma_mode_bits_num += palette_mode_cost; |
604 | 0 | } |
605 | 0 | } |
606 | | |
607 | 146k | if (svt_aom_filter_intra_allowed(pcs->ppcs->scs->seq_header.filter_intra_level, |
608 | 146k | blk_geom->bsize, |
609 | 146k | cand->palette_info ? cand->palette_size[0] : 0, |
610 | 146k | intra_mode)) { |
611 | 0 | intra_filter_mode_bits_num = |
612 | 0 | ctx->md_rate_est_ctx |
613 | 0 | ->filter_intra_fac_bits[blk_geom->bsize][cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES]; |
614 | 0 | if (cand->block_mi.filter_intra_mode != FILTER_INTRA_MODES) { |
615 | 0 | intra_filter_mode_bits_num += |
616 | 0 | ctx->md_rate_est_ctx->filter_intra_mode_fac_bits[cand->block_mi.filter_intra_mode]; |
617 | 0 | } |
618 | 0 | } |
619 | 146k | if (ctx->has_uv) { |
620 | | // CFL info not known in fasta loop, so assume DC mode when CFL is allowed |
621 | 145k | chroma_rate = (uint32_t)svt_aom_get_intra_uv_fast_rate(pcs, ctx, cand_bf, 0); |
622 | 145k | } |
623 | | |
624 | 146k | uint32_t is_inter_rate = pcs->slice_type != I_SLICE |
625 | 146k | ? ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][0] |
626 | 146k | : 0; |
627 | 146k | luma_rate = (uint32_t)(intra_mode_bits_num + skip_mode_rate + intra_luma_mode_bits_num + |
628 | 146k | intra_luma_ang_mode_bits_num + is_inter_rate + intra_filter_mode_bits_num); |
629 | 146k | if (svt_aom_allow_intrabc(&pcs->ppcs->frm_hdr, pcs->ppcs->slice_type)) { |
630 | 0 | svt_aom_assert_err(cand->block_mi.use_intrabc == 0, "this block ibc should be off\n"); |
631 | 0 | luma_rate += ctx->md_rate_est_ctx->intrabc_fac_bits[cand->block_mi.use_intrabc]; |
632 | 0 | } |
633 | | // Keep the Fast Luma and Chroma rate for future use |
634 | 146k | cand_bf->fast_luma_rate = luma_rate; |
635 | 146k | cand_bf->fast_chroma_rate = chroma_rate; |
636 | 146k | rate = luma_rate + chroma_rate; |
637 | | // Assign fast cost |
638 | 146k | return (RDCOST(lambda, rate, luma_distortion)); |
639 | 146k | } |
640 | 146k | } |
641 | | |
642 | | // This function encodes the reference frame |
643 | | uint64_t estimate_ref_frame_type_bits(ModeDecisionContext* ctx, BlkStruct* blk_ptr, uint8_t ref_frame_type, |
644 | 0 | bool is_compound) { |
645 | 0 | uint64_t ref_rate_bits = 0; |
646 | |
|
647 | 0 | MbModeInfo* const mbmi = blk_ptr->av1xd->mi[0]; |
648 | 0 | MvReferenceFrame ref_type[2]; |
649 | 0 | av1_set_ref_frame(ref_type, ref_frame_type); |
650 | 0 | mbmi->block_mi.ref_frame[0] = ref_type[0]; |
651 | 0 | mbmi->block_mi.ref_frame[1] = ref_type[1]; |
652 | | //const int is_compound = svt_aom_has_second_ref(mbmi); |
653 | 0 | { |
654 | 0 | if (is_compound) { |
655 | 0 | const CompReferenceType comp_ref_type = has_uni_comp_refs(&mbmi->block_mi) ? UNIDIR_COMP_REFERENCE |
656 | 0 | : BIDIR_COMP_REFERENCE; |
657 | |
|
658 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_type_fac_bits[svt_aom_get_comp_reference_type_context_new( |
659 | 0 | blk_ptr->av1xd)][comp_ref_type]; |
660 | | /*aom_write_symbol(w, comp_ref_type, |
661 | | svt_aom_get_comp_reference_type_cdf(blk_ptr->av1xd), 2);*/ |
662 | |
|
663 | 0 | if (comp_ref_type == UNIDIR_COMP_REFERENCE) { |
664 | | // SVT_LOG("ERROR[AN]: UNIDIR_COMP_REFERENCE not supported\n"); |
665 | 0 | const int bit = mbmi->block_mi.ref_frame[0] == BWDREF_FRAME; |
666 | |
|
667 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p( |
668 | 0 | blk_ptr->av1xd)][0][bit]; |
669 | | // blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][0]; |
670 | | // WRITE_REF_BIT(bit, uni_comp_ref_p); |
671 | |
|
672 | 0 | if (!bit) { |
673 | 0 | assert(mbmi->block_mi.ref_frame[0] == LAST_FRAME); |
674 | 0 | const int bit1 = mbmi->block_mi.ref_frame[1] == LAST3_FRAME || |
675 | 0 | mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME; |
676 | 0 | ref_rate_bits += |
677 | 0 | ctx->md_rate_est_ctx |
678 | 0 | ->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p1(blk_ptr->av1xd)][1][bit1]; |
679 | | // ref_rate_d = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][1]; |
680 | | // WRITE_REF_BIT(bit1, uni_comp_ref_p1); |
681 | 0 | if (bit1) { |
682 | 0 | const int bit2 = mbmi->block_mi.ref_frame[1] == GOLDEN_FRAME; |
683 | 0 | ref_rate_bits += |
684 | 0 | ctx->md_rate_est_ctx->uni_comp_ref_fac_bits[svt_av1_get_pred_context_uni_comp_ref_p2( |
685 | 0 | blk_ptr->av1xd)][2][bit2]; |
686 | | |
687 | | // ref_rate_e = blk_ptr->av1xd->tile_ctx->uni_comp_ref_cdf[pred_context][2]; |
688 | | //WRITE_REF_BIT(bit2, uni_comp_ref_p2); |
689 | 0 | } |
690 | 0 | } |
691 | 0 | return ref_rate_bits; |
692 | 0 | } |
693 | | |
694 | 0 | assert(comp_ref_type == BIDIR_COMP_REFERENCE); |
695 | |
|
696 | 0 | const int bit = (mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME || mbmi->block_mi.ref_frame[0] == LAST3_FRAME); |
697 | 0 | const int pred_ctx = svt_av1_get_pred_context_comp_ref_p(blk_ptr->av1xd); |
698 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->comp_ref_fac_bits[pred_ctx][0][bit]; |
699 | | // ref_rate_f = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_ctx][0]; |
700 | | // WRITE_REF_BIT(bit, comp_ref_p); |
701 | |
|
702 | 0 | if (!bit) { |
703 | 0 | const int bit1 = mbmi->block_mi.ref_frame[0] == LAST2_FRAME; |
704 | 0 | ref_rate_bits += ctx->md_rate_est_ctx |
705 | 0 | ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p1(blk_ptr->av1xd)][1][bit1]; |
706 | | // ref_rate_g = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][1]; |
707 | | // WRITE_REF_BIT(bit1, comp_ref_p1); |
708 | 0 | } else { |
709 | 0 | const int bit2 = mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME; |
710 | 0 | ref_rate_bits += ctx->md_rate_est_ctx |
711 | 0 | ->comp_ref_fac_bits[svt_av1_get_pred_context_comp_ref_p2(blk_ptr->av1xd)][2][bit2]; |
712 | | // ref_rate_h = blk_ptr->av1xd->tile_ctx->comp_ref_cdf[pred_context][2]; |
713 | | // WRITE_REF_BIT(bit2, comp_ref_p2); |
714 | 0 | } |
715 | |
|
716 | 0 | const int bit_bwd = mbmi->block_mi.ref_frame[1] == ALTREF_FRAME; |
717 | 0 | const int pred_ctx_2 = svt_av1_get_pred_context_comp_bwdref_p(blk_ptr->av1xd); |
718 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[pred_ctx_2][0][bit_bwd]; |
719 | | // ref_rate_i = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_ctx_2][0]; |
720 | | // WRITE_REF_BIT(bit_bwd, comp_bwdref_p); |
721 | |
|
722 | 0 | if (!bit_bwd) { |
723 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->comp_bwd_ref_fac_bits[svt_av1_get_pred_context_comp_bwdref_p1( |
724 | 0 | blk_ptr->av1xd)][1][ref_type[1] == ALTREF2_FRAME]; |
725 | | // ref_rate_j = blk_ptr->av1xd->tile_ctx->comp_bwdref_cdf[pred_context][1]; |
726 | | // WRITE_REF_BIT(mbmi->block_mi.ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1); |
727 | 0 | } |
728 | 0 | } else { |
729 | 0 | const int bit0 = (mbmi->block_mi.ref_frame[0] <= ALTREF_FRAME && |
730 | 0 | mbmi->block_mi.ref_frame[0] >= BWDREF_FRAME); |
731 | 0 | ref_rate_bits += ctx->md_rate_est_ctx |
732 | 0 | ->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0][bit0]; |
733 | | // ref_rate_k = |
734 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p1(blk_ptr->av1xd)][0]; |
735 | | // WRITE_REF_BIT(bit0, single_ref_p1); |
736 | |
|
737 | 0 | if (bit0) { |
738 | 0 | const int bit1 = mbmi->block_mi.ref_frame[0] == ALTREF_FRAME; |
739 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p2( |
740 | 0 | blk_ptr->av1xd)][1][bit1]; |
741 | | // ref_rate_l = |
742 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p2(blk_ptr->av1xd)][1]; |
743 | | // WRITE_REF_BIT(bit1, single_ref_p2); |
744 | 0 | if (!bit1) { |
745 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p6( |
746 | 0 | blk_ptr->av1xd)][5][ref_frame_type == ALTREF2_FRAME]; |
747 | | // ref_rate_m = |
748 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p6(blk_ptr->av1xd)][5]; |
749 | | // WRITE_REF_BIT(mbmi->block_mi.ref_frame[0] == ALTREF2_FRAME, single_ref_p6); |
750 | 0 | } |
751 | 0 | } else { |
752 | 0 | const int bit2 = (mbmi->block_mi.ref_frame[0] == LAST3_FRAME || |
753 | 0 | mbmi->block_mi.ref_frame[0] == GOLDEN_FRAME); |
754 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p3( |
755 | 0 | blk_ptr->av1xd)][2][bit2]; |
756 | | // ref_rate_n = |
757 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p3(blk_ptr->av1xd)][2]; |
758 | | // WRITE_REF_BIT(bit2, single_ref_p3); |
759 | 0 | if (!bit2) { |
760 | 0 | const int bit3 = mbmi->block_mi.ref_frame[0] != LAST_FRAME; |
761 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p4( |
762 | 0 | blk_ptr->av1xd)][3][bit3]; |
763 | | // ref_rate_o = |
764 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p4(blk_ptr->av1xd)][3]; |
765 | | // WRITE_REF_BIT(bit3, single_ref_p4); |
766 | 0 | } else { |
767 | 0 | const int bit4 = mbmi->block_mi.ref_frame[0] != LAST3_FRAME; |
768 | 0 | ref_rate_bits += ctx->md_rate_est_ctx->single_ref_fac_bits[svt_av1_get_pred_context_single_ref_p5( |
769 | 0 | blk_ptr->av1xd)][4][bit4]; |
770 | | // ref_rate_p = |
771 | | // blk_ptr->av1xd->tile_ctx->single_ref_cdf[svt_av1_get_pred_context_single_ref_p5(blk_ptr->av1xd)][4]; |
772 | | // WRITE_REF_BIT(bit4, single_ref_p5); |
773 | 0 | } |
774 | 0 | } |
775 | 0 | } |
776 | 0 | } |
777 | 0 | return ref_rate_bits; |
778 | 0 | } |
779 | | |
780 | | int svt_aom_get_comp_group_idx_context_enc(const MacroBlockD* xd); |
781 | | int is_any_masked_compound_used(BlockSize bsize); |
782 | | |
783 | | static INLINE uint32_t get_compound_mode_rate(PictureControlSet* pcs, ModeDecisionContext* ctx, |
784 | 0 | ModeDecisionCandidate* cand, BlockSize bsize) { |
785 | 0 | BlkStruct* blk_ptr = ctx->blk_ptr; |
786 | 0 | SequenceControlSet* scs = pcs->ppcs->scs; |
787 | 0 | uint32_t comp_rate = 0; |
788 | 0 | MbModeInfo* const mbmi = blk_ptr->av1xd->mi[0]; |
789 | 0 | MvReferenceFrame rf[2] = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]}; |
790 | 0 | mbmi->block_mi.ref_frame[0] = rf[0]; |
791 | 0 | mbmi->block_mi.ref_frame[1] = rf[1]; |
792 | | |
793 | | //NOTE : Make sure, any cuPtr data is already set before usage |
794 | |
|
795 | 0 | if (has_second_ref(&mbmi->block_mi)) { |
796 | 0 | const int masked_compound_used = is_any_masked_compound_used(bsize) && scs->seq_header.enable_masked_compound; |
797 | |
|
798 | 0 | if (masked_compound_used) { |
799 | 0 | const int ctx_comp_group_idx = svt_aom_get_comp_group_idx_context_enc(blk_ptr->av1xd); |
800 | 0 | comp_rate = |
801 | 0 | ctx->md_rate_est_ctx->comp_group_idx_fac_bits[ctx_comp_group_idx][cand->block_mi.comp_group_idx]; |
802 | 0 | } else { |
803 | 0 | assert(cand->block_mi.comp_group_idx == 0); |
804 | 0 | } |
805 | |
|
806 | 0 | if (cand->block_mi.comp_group_idx == 0) { |
807 | 0 | if (cand->block_mi.compound_idx) { |
808 | 0 | assert(cand->block_mi.interinter_comp.type == COMPOUND_AVERAGE); |
809 | 0 | } |
810 | |
|
811 | 0 | if (scs->seq_header.order_hint_info.enable_jnt_comp) { |
812 | 0 | const int comp_index_ctx = svt_aom_get_comp_index_context_enc(pcs->ppcs, |
813 | 0 | pcs->ppcs->cur_order_hint, |
814 | 0 | pcs->ppcs->ref_order_hint[rf[0] - 1], |
815 | 0 | pcs->ppcs->ref_order_hint[rf[1] - 1], |
816 | 0 | blk_ptr->av1xd); |
817 | 0 | comp_rate += ctx->md_rate_est_ctx->comp_idx_fac_bits[comp_index_ctx][cand->block_mi.compound_idx]; |
818 | 0 | } else { |
819 | 0 | assert(cand->block_mi.compound_idx == 1); |
820 | 0 | } |
821 | 0 | } else { |
822 | 0 | assert(pcs->ppcs->frm_hdr.reference_mode != SINGLE_REFERENCE && |
823 | 0 | is_inter_compound_mode(cand->block_mi.mode)); |
824 | 0 | assert(masked_compound_used); |
825 | | // compound_diffwtd, wedge |
826 | 0 | assert(cand->block_mi.interinter_comp.type == COMPOUND_WEDGE || |
827 | 0 | cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD); |
828 | |
|
829 | 0 | if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { |
830 | 0 | comp_rate += ctx->md_rate_est_ctx |
831 | 0 | ->compound_type_fac_bits[bsize][cand->block_mi.interinter_comp.type - COMPOUND_WEDGE]; |
832 | 0 | } |
833 | |
|
834 | 0 | if (cand->block_mi.interinter_comp.type == COMPOUND_WEDGE) { |
835 | 0 | assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize)); |
836 | 0 | comp_rate += |
837 | 0 | ctx->md_rate_est_ctx->wedge_idx_fac_bits[bsize][cand->block_mi.interinter_comp.wedge_index]; |
838 | 0 | comp_rate += av1_cost_literal(1); |
839 | 0 | } else { |
840 | 0 | assert(cand->block_mi.interinter_comp.type == COMPOUND_DIFFWTD); |
841 | 0 | comp_rate += av1_cost_literal(1); |
842 | 0 | } |
843 | 0 | } |
844 | 0 | } |
845 | |
|
846 | 0 | return comp_rate; |
847 | 0 | } |
848 | | |
849 | | int32_t svt_aom_get_switchable_rate(BlockModeInfo* block_mi, const FrameHeader* const frm_hdr, ModeDecisionContext* ctx, |
850 | 0 | const bool enable_dual_filter) { |
851 | 0 | if (frm_hdr->interpolation_filter != SWITCHABLE) { |
852 | 0 | return 0; |
853 | 0 | } |
854 | | |
855 | 0 | int32_t inter_filter_cost = 0; |
856 | 0 | const int max_dir = enable_dual_filter ? 2 : 1; |
857 | 0 | for (int dir = 0; dir < max_dir; ++dir) { |
858 | 0 | const int32_t pred_ctx = svt_aom_get_pred_context_switchable_interp( |
859 | 0 | block_mi->ref_frame[0], block_mi->ref_frame[1], ctx->blk_ptr->av1xd, dir); |
860 | 0 | const InterpFilter filter = av1_extract_interp_filter(block_mi->interp_filters, dir); |
861 | 0 | assert(pred_ctx < SWITCHABLE_FILTER_CONTEXTS); |
862 | 0 | assert(filter < SWITCHABLE_FILTERS); |
863 | 0 | inter_filter_cost += ctx->md_rate_est_ctx->switchable_interp_fac_bitss[pred_ctx][filter]; |
864 | 0 | } |
865 | 0 | return inter_filter_cost; |
866 | 0 | } |
867 | | |
868 | | int svt_aom_is_interintra_wedge_used(BlockSize bsize); |
869 | | |
870 | | static uint64_t av1_inter_fast_cost_light(ModeDecisionContext* ctx, BlkStruct* blk_ptr, |
871 | | ModeDecisionCandidateBuffer* cand_bf, uint64_t luma_distortion, |
872 | 0 | uint64_t lambda, PictureControlSet* pcs, CandidateMv* ref_mv_stack) { |
873 | 0 | ModeDecisionCandidate* cand = cand_bf->cand; |
874 | | // NM - fast inter cost estimation |
875 | 0 | MdRateEstimationContext* r = ctx->md_rate_est_ctx; |
876 | | //_mm_prefetch(p, _MM_HINT_T2); |
877 | | // Luma rate |
878 | 0 | uint32_t luma_rate = 0; |
879 | 0 | uint64_t mv_rate = 0; |
880 | 0 | const PredictionMode inter_mode = (PredictionMode)cand->block_mi.mode; |
881 | 0 | const uint8_t have_nearmv = have_nearmv_in_inter_mode(inter_mode); |
882 | 0 | uint64_t inter_mode_bits_num = 0; |
883 | 0 | const uint8_t skip_mode_ctx = ctx->skip_mode_ctx; |
884 | 0 | MvReferenceFrame rf[2] = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]}; |
885 | 0 | const int8_t ref_frame_type = av1_ref_frame_type(rf); |
886 | 0 | const uint8_t is_compound = is_inter_compound_mode(cand->block_mi.mode); |
887 | 0 | const uint32_t mode_context = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf); |
888 | 0 | uint64_t reference_picture_bits_num = 0; |
889 | 0 | if (ctx->approx_inter_rate < 2) { |
890 | 0 | reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type]; |
891 | 0 | } |
892 | 0 | if (is_compound) { |
893 | 0 | assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES); |
894 | 0 | inter_mode_bits_num += r->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)]; |
895 | 0 | } else { |
896 | 0 | int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK; |
897 | | //aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2); |
898 | 0 | inter_mode_bits_num += r->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV]; |
899 | 0 | if (inter_mode != NEWMV) { |
900 | 0 | const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; |
901 | | //aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx], 2); |
902 | 0 | inter_mode_bits_num += r->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV]; |
903 | 0 | if (inter_mode != GLOBALMV) { |
904 | 0 | int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; |
905 | | /*aom_write_symbol(ec_writer, mode != NEARESTMV, frame_context->refmv_cdf[refmv_ctx], 2);*/ |
906 | 0 | inter_mode_bits_num += r->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV]; |
907 | 0 | } |
908 | 0 | } |
909 | 0 | } |
910 | 0 | if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv) { |
911 | | //drLIdex cost estimation |
912 | 0 | const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV; |
913 | 0 | if (new_mv) { |
914 | 0 | int32_t idx; |
915 | 0 | for (idx = 0; idx < 2; ++idx) { |
916 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
917 | 0 | uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx); |
918 | 0 | inter_mode_bits_num += r->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx]; |
919 | 0 | if (cand->drl_index == idx) { |
920 | 0 | break; |
921 | 0 | } |
922 | 0 | } |
923 | 0 | } |
924 | 0 | } |
925 | 0 | if (have_nearmv) { |
926 | 0 | int32_t idx; |
927 | 0 | for (idx = 1; idx < 3; ++idx) { |
928 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
929 | 0 | uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx); |
930 | 0 | inter_mode_bits_num += r->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)]; |
931 | 0 | if (cand->drl_index == (idx - 1)) { |
932 | 0 | break; |
933 | 0 | } |
934 | 0 | } |
935 | 0 | } |
936 | 0 | } |
937 | 0 | } |
938 | 0 | if (svt_aom_have_newmv_in_inter_mode(inter_mode)) { |
939 | 0 | const uint16_t factor = pcs->ppcs->frm_hdr.allow_screen_content_tools ? 20 : 50; |
940 | 0 | if (is_compound) { |
941 | 0 | mv_rate = 0; |
942 | 0 | if (inter_mode == NEW_NEWMV) { |
943 | 0 | for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) { |
944 | 0 | Mv mv = cand->block_mi.mv[ref_list_idx]; |
945 | 0 | Mv ref_mv = cand->pred_mv[ref_list_idx]; |
946 | 0 | const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x); |
947 | 0 | const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y); |
948 | 0 | mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy)); |
949 | 0 | } |
950 | 0 | } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) { |
951 | | // New MV is second ref |
952 | 0 | Mv mv = cand->block_mi.mv[1]; |
953 | 0 | Mv ref_mv = cand->pred_mv[1]; |
954 | 0 | const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x); |
955 | 0 | const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y); |
956 | 0 | mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy)); |
957 | 0 | } else { |
958 | 0 | assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV); |
959 | | // New MV is first ref |
960 | 0 | Mv mv = cand->block_mi.mv[0]; |
961 | 0 | Mv ref_mv = cand->pred_mv[0]; |
962 | 0 | const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x); |
963 | 0 | const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y); |
964 | 0 | mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy)); |
965 | 0 | } |
966 | 0 | } else { |
967 | 0 | assert(!is_compound); // single ref inter prediction |
968 | | // unipred MV stored in idx0 |
969 | 0 | Mv mv = cand->block_mi.mv[0]; |
970 | 0 | Mv ref_mv = cand->pred_mv[0]; |
971 | 0 | const uint16_t absmvdiffx = ABS(mv.x - ref_mv.x); |
972 | 0 | const uint16_t absmvdiffy = ABS(mv.y - ref_mv.y); |
973 | 0 | mv_rate += 1296 + (factor * (absmvdiffx + absmvdiffy)); |
974 | 0 | } |
975 | 0 | } |
976 | | // Get the interpolation filter rate if IFS is performed at MDS0. Otherwise, the filter is unknown, so the rate will be updated after IFS is performed. |
977 | 0 | uint32_t ifs_rate = 0; |
978 | 0 | if (ctx->ifs_ctrls.level == IFS_MDS0 && |
979 | 0 | av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) && |
980 | 0 | pcs->ppcs->frm_hdr.interpolation_filter == SWITCHABLE) { |
981 | 0 | ifs_rate = svt_aom_get_switchable_rate( |
982 | 0 | &cand_bf->cand->block_mi, &pcs->ppcs->frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter); |
983 | 0 | } |
984 | 0 | uint32_t is_inter_rate = r->intra_inter_fac_bits[ctx->is_inter_ctx][1]; |
985 | |
|
986 | 0 | uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && |
987 | 0 | is_comp_ref_allowed(ctx->blk_geom->bsize) |
988 | 0 | ? r->skip_mode_fac_bits[skip_mode_ctx][0] |
989 | 0 | : 0; |
990 | 0 | luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate + |
991 | 0 | ifs_rate); |
992 | | // Keep the Fast Luma and Chroma rate for future use |
993 | 0 | cand_bf->fast_luma_rate = luma_rate; |
994 | 0 | cand_bf->fast_chroma_rate = 0; |
995 | | // Assign fast cost |
996 | 0 | if (cand->skip_mode_allowed) { |
997 | 0 | skip_mode_rate = r->skip_mode_fac_bits[skip_mode_ctx][1]; |
998 | 0 | if (skip_mode_rate < luma_rate) { |
999 | 0 | return (RDCOST(lambda, skip_mode_rate, luma_distortion)); |
1000 | 0 | } |
1001 | 0 | } |
1002 | 0 | return (RDCOST(lambda, luma_rate, luma_distortion)); |
1003 | 0 | } |
1004 | | |
1005 | | uint64_t svt_aom_inter_fast_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1006 | 0 | uint64_t lambda, uint64_t luma_distortion) { |
1007 | 0 | const BlockGeom* blk_geom = ctx->blk_geom; |
1008 | 0 | BlkStruct* blk_ptr = ctx->blk_ptr; |
1009 | 0 | ModeDecisionCandidate* cand = cand_bf->cand; |
1010 | 0 | MvReferenceFrame rf[2] = {cand->block_mi.ref_frame[0], cand->block_mi.ref_frame[1]}; |
1011 | 0 | const int8_t ref_frame_type = av1_ref_frame_type(cand->block_mi.ref_frame); |
1012 | 0 | CandidateMv* ref_mv_stack = &(ctx->ref_mv_stack[ref_frame_type][0]); |
1013 | |
|
1014 | 0 | if (ctx->approx_inter_rate) { |
1015 | 0 | return av1_inter_fast_cost_light(ctx, blk_ptr, cand_bf, luma_distortion, lambda, pcs, ref_mv_stack); |
1016 | 0 | } |
1017 | 0 | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
1018 | | |
1019 | | // Luma rate |
1020 | 0 | uint32_t luma_rate = 0; |
1021 | 0 | uint64_t mv_rate = 0; |
1022 | 0 | PredictionMode inter_mode = (PredictionMode)cand->block_mi.mode; |
1023 | |
|
1024 | 0 | uint64_t inter_mode_bits_num = 0; |
1025 | |
|
1026 | 0 | const uint8_t skip_mode_ctx = ctx->skip_mode_ctx; |
1027 | 0 | const uint8_t is_compound = is_inter_compound_mode(cand->block_mi.mode); |
1028 | 0 | uint32_t mode_context = svt_aom_mode_context_analyzer(ctx->inter_mode_ctx[ref_frame_type], rf); |
1029 | 0 | uint64_t reference_picture_bits_num = 0; |
1030 | | |
1031 | | //Reference Type and Mode Bit estimation |
1032 | 0 | reference_picture_bits_num = ctx->estimate_ref_frames_num_bits[ref_frame_type]; |
1033 | 0 | if (is_compound) { |
1034 | 0 | assert(INTER_COMPOUND_OFFSET(inter_mode) < INTER_COMPOUND_MODES); |
1035 | 0 | inter_mode_bits_num += |
1036 | 0 | ctx->md_rate_est_ctx->inter_compound_mode_fac_bits[mode_context][INTER_COMPOUND_OFFSET(inter_mode)]; |
1037 | 0 | } else { |
1038 | | // uint32_t newmv_ctx = mode_context & NEWMV_CTX_MASK; |
1039 | | // inter_mode_bits_num = cand_bf->cand->md_rate_est_ctx->new_mv_mode_fac_bits[mode_ctx][0]; |
1040 | |
|
1041 | 0 | int16_t newmv_ctx = mode_context & NEWMV_CTX_MASK; |
1042 | | // aom_write_symbol(ec_writer, mode != NEWMV, frame_context->newmv_cdf[newmv_ctx], 2); |
1043 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->new_mv_mode_fac_bits[newmv_ctx][inter_mode != NEWMV]; |
1044 | 0 | if (inter_mode != NEWMV) { |
1045 | 0 | const int16_t zero_mv_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; |
1046 | | // aom_write_symbol(ec_writer, mode != GLOBALMV, frame_context->zeromv_cdf[zero_mv_ctx], |
1047 | | // 2); |
1048 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->zero_mv_mode_fac_bits[zero_mv_ctx][inter_mode != GLOBALMV]; |
1049 | 0 | if (inter_mode != GLOBALMV) { |
1050 | 0 | int16_t ref_mv_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; |
1051 | | /*aom_write_symbol(ec_writer, mode != NEARESTMV, |
1052 | | * frame_context->refmv_cdf[refmv_ctx], 2);*/ |
1053 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->ref_mv_mode_fac_bits[ref_mv_ctx][inter_mode != NEARESTMV]; |
1054 | 0 | } |
1055 | 0 | } |
1056 | 0 | } |
1057 | 0 | if (inter_mode == NEWMV || inter_mode == NEW_NEWMV || have_nearmv_in_inter_mode(inter_mode)) { |
1058 | | //drLIdex cost estimation |
1059 | 0 | const int32_t new_mv = inter_mode == NEWMV || inter_mode == NEW_NEWMV; |
1060 | 0 | if (new_mv) { |
1061 | 0 | int32_t idx; |
1062 | 0 | for (idx = 0; idx < 2; ++idx) { |
1063 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
1064 | 0 | uint8_t drl_1_ctx = av1_drl_ctx(ref_mv_stack, idx); |
1065 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_1_ctx][cand->drl_index != idx]; |
1066 | 0 | if (cand->drl_index == idx) { |
1067 | 0 | break; |
1068 | 0 | } |
1069 | 0 | } |
1070 | 0 | } |
1071 | 0 | } |
1072 | |
|
1073 | 0 | if (have_nearmv_in_inter_mode(inter_mode)) { |
1074 | 0 | int32_t idx; |
1075 | 0 | for (idx = 1; idx < 3; ++idx) { |
1076 | 0 | if (blk_ptr->av1xd->ref_mv_count[ref_frame_type] > idx + 1) { |
1077 | 0 | uint8_t drl_ctx = av1_drl_ctx(ref_mv_stack, idx); |
1078 | 0 | inter_mode_bits_num += |
1079 | 0 | ctx->md_rate_est_ctx->drl_mode_fac_bits[drl_ctx][cand->drl_index != (idx - 1)]; |
1080 | |
|
1081 | 0 | if (cand->drl_index == (idx - 1)) { |
1082 | 0 | break; |
1083 | 0 | } |
1084 | 0 | } |
1085 | 0 | } |
1086 | 0 | } |
1087 | 0 | } |
1088 | |
|
1089 | 0 | if (svt_aom_have_newmv_in_inter_mode(inter_mode)) { |
1090 | 0 | if (is_compound) { |
1091 | 0 | mv_rate = 0; |
1092 | |
|
1093 | 0 | if (inter_mode == NEW_NEWMV) { |
1094 | 0 | for (RefList ref_list_idx = 0; ref_list_idx < 2; ++ref_list_idx) { |
1095 | 0 | Mv mv = cand->block_mi.mv[ref_list_idx]; |
1096 | 0 | Mv ref_mv = cand->pred_mv[ref_list_idx]; |
1097 | 0 | mv_rate += svt_av1_mv_bit_cost(&mv, |
1098 | 0 | &ref_mv, |
1099 | 0 | ctx->md_rate_est_ctx->nmv_vec_cost, |
1100 | 0 | ctx->md_rate_est_ctx->nmvcoststack, |
1101 | 0 | MV_COST_WEIGHT); |
1102 | 0 | } |
1103 | 0 | } else if (inter_mode == NEAREST_NEWMV || inter_mode == NEAR_NEWMV) { |
1104 | 0 | Mv mv = cand->block_mi.mv[1]; |
1105 | 0 | Mv ref_mv = cand->pred_mv[1]; |
1106 | 0 | mv_rate += svt_av1_mv_bit_cost(&mv, |
1107 | 0 | &ref_mv, |
1108 | 0 | ctx->md_rate_est_ctx->nmv_vec_cost, |
1109 | 0 | ctx->md_rate_est_ctx->nmvcoststack, |
1110 | 0 | MV_COST_WEIGHT); |
1111 | 0 | } else { |
1112 | 0 | assert(inter_mode == NEW_NEARESTMV || inter_mode == NEW_NEARMV); |
1113 | 0 | Mv mv = cand->block_mi.mv[0]; |
1114 | 0 | Mv ref_mv = cand->pred_mv[0]; |
1115 | 0 | mv_rate += svt_av1_mv_bit_cost(&mv, |
1116 | 0 | &ref_mv, |
1117 | 0 | ctx->md_rate_est_ctx->nmv_vec_cost, |
1118 | 0 | ctx->md_rate_est_ctx->nmvcoststack, |
1119 | 0 | MV_COST_WEIGHT); |
1120 | 0 | } |
1121 | 0 | } else { |
1122 | 0 | assert(!is_compound); // single ref inter prediction |
1123 | | // unipred MVs stored in idx0 |
1124 | 0 | Mv mv = cand->block_mi.mv[0]; |
1125 | 0 | Mv ref_mv = cand->pred_mv[0]; |
1126 | 0 | mv_rate = svt_av1_mv_bit_cost( |
1127 | 0 | &mv, &ref_mv, ctx->md_rate_est_ctx->nmv_vec_cost, ctx->md_rate_est_ctx->nmvcoststack, MV_COST_WEIGHT); |
1128 | 0 | } |
1129 | 0 | } |
1130 | | // inter intra mode rate |
1131 | 0 | if (pcs->ppcs->scs->seq_header.enable_interintra_compound && |
1132 | | /* Check if inter-intra is allowed for current block size / mode (even if the feature is off |
1133 | | * for the current block, we still need to signal inter-intra off. |
1134 | | */ |
1135 | 0 | svt_is_interintra_allowed(true, blk_geom->bsize, cand->block_mi.mode, rf)) { |
1136 | 0 | const int interintra = cand->block_mi.is_interintra_used; |
1137 | 0 | const int bsize_group = eb_size_group_lookup[blk_geom->bsize]; |
1138 | |
|
1139 | 0 | inter_mode_bits_num += |
1140 | 0 | ctx->md_rate_est_ctx->inter_intra_fac_bits[bsize_group][cand->block_mi.is_interintra_used]; |
1141 | |
|
1142 | 0 | if (interintra) { |
1143 | 0 | inter_mode_bits_num += |
1144 | 0 | ctx->md_rate_est_ctx->inter_intra_mode_fac_bits[bsize_group][cand->block_mi.interintra_mode]; |
1145 | |
|
1146 | 0 | if (svt_aom_is_interintra_wedge_used(blk_geom->bsize)) { |
1147 | 0 | inter_mode_bits_num += |
1148 | 0 | ctx->md_rate_est_ctx |
1149 | 0 | ->wedge_inter_intra_fac_bits[blk_geom->bsize][cand->block_mi.use_wedge_interintra]; |
1150 | |
|
1151 | 0 | if (cand->block_mi.use_wedge_interintra) { |
1152 | 0 | inter_mode_bits_num += |
1153 | 0 | ctx->md_rate_est_ctx |
1154 | 0 | ->wedge_idx_fac_bits[blk_geom->bsize][cand->block_mi.interintra_wedge_index]; |
1155 | 0 | } |
1156 | 0 | } |
1157 | 0 | } |
1158 | 0 | } |
1159 | 0 | if (is_inter_singleref_mode(inter_mode) && frm_hdr->is_motion_mode_switchable && rf[1] != INTRA_FRAME) { |
1160 | 0 | assert(!cand->block_mi.is_interintra_used); |
1161 | 0 | const MotionMode motion_mode_rd = cand->block_mi.motion_mode; |
1162 | 0 | const BlockSize bsize = blk_geom->bsize; |
1163 | 0 | const MotionMode last_motion_mode_allowed = svt_aom_motion_mode_allowed( |
1164 | 0 | pcs, cand->block_mi.num_proj_ref, blk_ptr->overlappable_neighbors, bsize, rf[0], rf[1], inter_mode); |
1165 | 0 | switch (last_motion_mode_allowed) { |
1166 | 0 | case SIMPLE_TRANSLATION: |
1167 | 0 | break; |
1168 | 0 | case OBMC_CAUSAL: |
1169 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits1[bsize][motion_mode_rd == OBMC_CAUSAL]; |
1170 | 0 | break; |
1171 | 0 | default: |
1172 | 0 | inter_mode_bits_num += ctx->md_rate_est_ctx->motion_mode_fac_bits[bsize][motion_mode_rd]; |
1173 | 0 | } |
1174 | 0 | } |
1175 | | // this func return 0 if masked=0 and distance=0 |
1176 | 0 | inter_mode_bits_num += get_compound_mode_rate(pcs, ctx, cand, blk_geom->bsize); |
1177 | | // Get the interpolation filter rate if IFS is performed at MDS0. Otherwise, the filter is unknown, so the rate will be updated after IFS is performed. |
1178 | 0 | uint32_t ifs_rate = 0; |
1179 | 0 | if (ctx->ifs_ctrls.level == IFS_MDS0 && |
1180 | 0 | av1_is_interp_needed_md(&cand_bf->cand->block_mi, pcs, ctx->blk_geom->bsize) && |
1181 | 0 | frm_hdr->interpolation_filter == SWITCHABLE) { |
1182 | 0 | ifs_rate = svt_aom_get_switchable_rate( |
1183 | 0 | &cand_bf->cand->block_mi, frm_hdr, ctx, pcs->scs->seq_header.enable_dual_filter); |
1184 | 0 | } |
1185 | 0 | uint32_t is_inter_rate = ctx->md_rate_est_ctx->intra_inter_fac_bits[ctx->is_inter_ctx][1]; |
1186 | 0 | uint32_t skip_mode_rate = pcs->ppcs->frm_hdr.skip_mode_params.skip_mode_flag && is_comp_ref_allowed(blk_geom->bsize) |
1187 | 0 | ? ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][0] |
1188 | 0 | : 0; |
1189 | 0 | luma_rate = (uint32_t)(reference_picture_bits_num + skip_mode_rate + inter_mode_bits_num + mv_rate + is_inter_rate + |
1190 | 0 | ifs_rate); |
1191 | | // Keep the Fast Luma and Chroma rate for future use |
1192 | 0 | cand_bf->fast_luma_rate = luma_rate; |
1193 | 0 | cand_bf->fast_chroma_rate = 0; |
1194 | | // Assign fast cost |
1195 | 0 | if (cand->skip_mode_allowed) { |
1196 | 0 | skip_mode_rate = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1]; |
1197 | 0 | if (skip_mode_rate < luma_rate) { |
1198 | 0 | return (RDCOST(lambda, skip_mode_rate, luma_distortion)); |
1199 | 0 | } |
1200 | 0 | } |
1201 | 0 | return (RDCOST(lambda, luma_rate, luma_distortion)); |
1202 | 0 | } |
1203 | | |
1204 | | /* |
1205 | | */ |
1206 | | EbErrorType svt_aom_txb_estimate_coeff_bits_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1207 | | uint32_t txb_origin_index, EbPictureBufferDesc* coeff_buffer_sb, |
1208 | 0 | uint32_t y_eob, uint64_t* y_txb_coeff_bits, TxSize txsize) { |
1209 | 0 | if (y_eob) { |
1210 | 0 | *y_txb_coeff_bits = svt_av1_cost_coeffs_txb( |
1211 | 0 | ctx, |
1212 | 0 | 0, |
1213 | 0 | 0, |
1214 | 0 | cand_bf, |
1215 | 0 | (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)], |
1216 | 0 | (uint16_t)y_eob, |
1217 | 0 | PLANE_TYPE_Y, |
1218 | 0 | txsize, |
1219 | 0 | DCT_DCT, |
1220 | 0 | 0, |
1221 | 0 | 0, |
1222 | 0 | 0); |
1223 | |
|
1224 | 0 | *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step; |
1225 | |
|
1226 | 0 | } else { |
1227 | 0 | *y_txb_coeff_bits = av1_cost_skip_txb(ctx, 0, 0, txsize, PLANE_TYPE_Y, 0); |
1228 | 0 | } |
1229 | |
|
1230 | 0 | return EB_ErrorNone; |
1231 | 0 | } |
1232 | | |
1233 | | EbErrorType svt_aom_txb_estimate_coeff_bits(ModeDecisionContext* ctx, uint8_t allow_update_cdf, FRAME_CONTEXT* ec_ctx, |
1234 | | PictureControlSet* pcs, ModeDecisionCandidateBuffer* cand_bf, |
1235 | | uint32_t txb_origin_index, uint32_t txb_chroma_origin_index, |
1236 | | EbPictureBufferDesc* coeff_buffer_sb, uint32_t y_eob, uint32_t cb_eob, |
1237 | | uint32_t cr_eob, uint64_t* y_txb_coeff_bits, uint64_t* cb_txb_coeff_bits, |
1238 | | uint64_t* cr_txb_coeff_bits, TxSize txsize, TxSize txsize_uv, |
1239 | 146k | TxType tx_type, TxType tx_type_uv, COMPONENT_TYPE component_type) { |
1240 | 146k | EbErrorType return_error = EB_ErrorNone; |
1241 | | |
1242 | 146k | FrameHeader* frm_hdr = &pcs->ppcs->frm_hdr; |
1243 | | |
1244 | 146k | int32_t* coeff_buffer; |
1245 | 146k | int16_t luma_txb_skip_context = ctx->luma_txb_skip_context; |
1246 | 146k | int16_t luma_dc_sign_context = ctx->luma_dc_sign_context; |
1247 | 146k | int16_t cb_txb_skip_context = ctx->cb_txb_skip_context; |
1248 | 146k | int16_t cb_dc_sign_context = ctx->cb_dc_sign_context; |
1249 | 146k | int16_t cr_txb_skip_context = ctx->cr_txb_skip_context; |
1250 | 146k | int16_t cr_dc_sign_context = ctx->cr_dc_sign_context; |
1251 | | |
1252 | 146k | bool reduced_transform_set_flag = frm_hdr->reduced_tx_set ? true : false; |
1253 | | |
1254 | | //Estimate the rate of the transform type and coefficient for Luma |
1255 | | |
1256 | 146k | if (component_type == COMPONENT_LUMA || component_type == COMPONENT_ALL) { |
1257 | 0 | if (y_eob) { |
1258 | 0 | coeff_buffer = (int32_t*)&coeff_buffer_sb->y_buffer[txb_origin_index * sizeof(int32_t)]; |
1259 | |
|
1260 | 0 | *y_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx, |
1261 | 0 | allow_update_cdf, |
1262 | 0 | ec_ctx, |
1263 | 0 | cand_bf, |
1264 | 0 | coeff_buffer, |
1265 | 0 | (uint16_t)y_eob, |
1266 | 0 | PLANE_TYPE_Y, |
1267 | 0 | txsize, |
1268 | 0 | tx_type, |
1269 | 0 | luma_txb_skip_context, |
1270 | 0 | luma_dc_sign_context, |
1271 | 0 | reduced_transform_set_flag); |
1272 | 0 | *y_txb_coeff_bits = (*y_txb_coeff_bits) << ctx->mds_subres_step; |
1273 | 0 | } else { |
1274 | 0 | *y_txb_coeff_bits = av1_cost_skip_txb( |
1275 | 0 | ctx, allow_update_cdf, ec_ctx, txsize, PLANE_TYPE_Y, luma_txb_skip_context); |
1276 | 0 | } |
1277 | 0 | } |
1278 | | // Estimate the rate of the transform type and coefficient for chroma Cb |
1279 | | |
1280 | 146k | if (component_type == COMPONENT_CHROMA_CB || component_type == COMPONENT_CHROMA || |
1281 | 146k | component_type == COMPONENT_ALL) { |
1282 | 146k | if (cb_eob) { |
1283 | 6.22k | coeff_buffer = (int32_t*)&coeff_buffer_sb->u_buffer[txb_chroma_origin_index * sizeof(int32_t)]; |
1284 | | |
1285 | 6.22k | *cb_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx, |
1286 | 6.22k | allow_update_cdf, |
1287 | 6.22k | ec_ctx, |
1288 | 6.22k | cand_bf, |
1289 | 6.22k | coeff_buffer, |
1290 | 6.22k | (uint16_t)cb_eob, |
1291 | 6.22k | PLANE_TYPE_UV, |
1292 | 6.22k | txsize_uv, |
1293 | 6.22k | tx_type_uv, |
1294 | 6.22k | cb_txb_skip_context, |
1295 | 6.22k | cb_dc_sign_context, |
1296 | 6.22k | reduced_transform_set_flag); |
1297 | 139k | } else { |
1298 | 139k | *cb_txb_coeff_bits = av1_cost_skip_txb( |
1299 | 139k | ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cb_txb_skip_context); |
1300 | 139k | } |
1301 | 146k | } |
1302 | | |
1303 | 146k | if (component_type == COMPONENT_CHROMA_CR || component_type == COMPONENT_CHROMA || |
1304 | 145k | component_type == COMPONENT_ALL) { |
1305 | | //Estimate the rate of the transform type and coefficient for chroma Cr |
1306 | 145k | if (cr_eob) { |
1307 | 6.22k | coeff_buffer = (int32_t*)&coeff_buffer_sb->v_buffer[txb_chroma_origin_index * sizeof(int32_t)]; |
1308 | | |
1309 | 6.22k | *cr_txb_coeff_bits = svt_av1_cost_coeffs_txb(ctx, |
1310 | 6.22k | allow_update_cdf, |
1311 | 6.22k | ec_ctx, |
1312 | 6.22k | cand_bf, |
1313 | 6.22k | coeff_buffer, |
1314 | 6.22k | (uint16_t)cr_eob, |
1315 | 6.22k | PLANE_TYPE_UV, |
1316 | 6.22k | txsize_uv, |
1317 | 6.22k | tx_type_uv, |
1318 | 6.22k | cr_txb_skip_context, |
1319 | 6.22k | cr_dc_sign_context, |
1320 | 6.22k | reduced_transform_set_flag); |
1321 | 139k | } else { |
1322 | 139k | *cr_txb_coeff_bits = av1_cost_skip_txb( |
1323 | 139k | ctx, allow_update_cdf, ec_ctx, txsize_uv, PLANE_TYPE_UV, cr_txb_skip_context); |
1324 | 139k | } |
1325 | 145k | } |
1326 | | |
1327 | 146k | return return_error; |
1328 | 146k | } |
1329 | | |
1330 | | EbErrorType svt_aom_full_cost_light_pd0(ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1331 | 8.21k | uint64_t* y_distortion, uint64_t lambda, uint64_t* y_coeff_bits) { |
1332 | 8.21k | EbErrorType return_error = EB_ErrorNone; |
1333 | | |
1334 | 8.21k | uint64_t coeff_rate = (*y_coeff_bits + (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[0][0]); |
1335 | | |
1336 | | // Assign full cost |
1337 | | // Use context index 0 for the partition rate as an approximation to skip call to |
1338 | | // av1_partition_rate_cost Partition cost is only needed for > 4x4 blocks, but light-PD0 assumes |
1339 | | // 4x4 blocks are disallowed |
1340 | 8.21k | *(cand_bf->full_cost) = RDCOST( |
1341 | 8.21k | lambda, coeff_rate + ctx->md_rate_est_ctx->partition_fac_bits[0][PARTITION_NONE], y_distortion[0]); |
1342 | 8.21k | return return_error; |
1343 | 8.21k | } |
1344 | | |
1345 | | /********************************************************************************* |
1346 | | * svt_aom_av1_full_cost function is used to estimate the cost of a candidate mode |
1347 | | * for full mode decision module. |
1348 | | **********************************************************************************/ |
1349 | | void svt_aom_full_cost(PictureControlSet* pcs, ModeDecisionContext* ctx, ModeDecisionCandidateBuffer* cand_bf, |
1350 | | uint64_t lambda, uint64_t y_distortion[DIST_TOTAL][DIST_CALC_TOTAL], |
1351 | | uint64_t cb_distortion[DIST_TOTAL][DIST_CALC_TOTAL], |
1352 | | uint64_t cr_distortion[DIST_TOTAL][DIST_CALC_TOTAL], uint64_t* y_coeff_bits, |
1353 | 282k | uint64_t* cb_coeff_bits, uint64_t* cr_coeff_bits) { |
1354 | 282k | const uint8_t skip_coeff_ctx = ctx->skip_coeff_ctx; |
1355 | 282k | const bool update_full_cost_ssim = ctx->tune_ssim_level > SSIM_LVL_0 ? true : false; |
1356 | | |
1357 | | // Get the TX size rate for skip and non-skip block. Need both to make non-skip decision |
1358 | 282k | uint64_t non_skip_tx_size_bits = 0, skip_tx_size_bits = 0; |
1359 | 282k | if (!ctx->shut_fast_rate && pcs->ppcs->frm_hdr.tx_mode == TX_MODE_SELECT) { |
1360 | 145k | if (cand_bf->block_has_coeff) { |
1361 | 6.65k | non_skip_tx_size_bits = svt_aom_get_tx_size_bits( |
1362 | 6.65k | cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 1); |
1363 | 6.65k | } |
1364 | | |
1365 | 145k | skip_tx_size_bits = svt_aom_get_tx_size_bits( |
1366 | 145k | cand_bf, ctx, pcs, cand_bf->cand->block_mi.tx_depth, /*cand_bf->block_has_coeff*/ 0); |
1367 | 145k | } |
1368 | | |
1369 | 282k | assert(IMPLIES(is_inter_mode(cand_bf->cand->block_mi.mode), skip_tx_size_bits == 0)); |
1370 | | |
1371 | | // Decide if block should be signalled as skip (send no coeffs) |
1372 | 282k | if (!svt_av1_is_lossless_segment(pcs, ctx->blk_ptr->segment_id) && ctx->blk_skip_decision && |
1373 | 8.21k | cand_bf->block_has_coeff && is_inter_mode(cand_bf->cand->block_mi.mode)) { |
1374 | 0 | const uint64_t non_skip_cost = RDCOST( |
1375 | 0 | lambda, |
1376 | 0 | (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits + |
1377 | 0 | (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]), |
1378 | 0 | (y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0])); |
1379 | |
|
1380 | 0 | const uint64_t skip_cost = RDCOST( |
1381 | 0 | lambda, |
1382 | 0 | ((uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1]) + skip_tx_size_bits, |
1383 | 0 | (y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] + cr_distortion[DIST_SSD][1])); |
1384 | | |
1385 | | // Update signals to correspond to skip_mode values (no coeffs, etc.) |
1386 | 0 | if (skip_cost < non_skip_cost) { |
1387 | 0 | y_distortion[DIST_SSD][0] = y_distortion[DIST_SSD][1]; |
1388 | 0 | cb_distortion[DIST_SSD][0] = cb_distortion[DIST_SSD][1]; |
1389 | 0 | cr_distortion[DIST_SSD][0] = cr_distortion[DIST_SSD][1]; |
1390 | |
|
1391 | 0 | y_distortion[DIST_SSIM][0] = y_distortion[DIST_SSIM][1]; |
1392 | 0 | cb_distortion[DIST_SSIM][0] = cb_distortion[DIST_SSIM][1]; |
1393 | 0 | cr_distortion[DIST_SSIM][0] = cr_distortion[DIST_SSIM][1]; |
1394 | 0 | cand_bf->block_has_coeff = 0; |
1395 | 0 | cand_bf->y_has_coeff = 0; |
1396 | 0 | cand_bf->u_has_coeff = 0; |
1397 | 0 | cand_bf->v_has_coeff = 0; |
1398 | 0 | cand_bf->cnt_nz_coeff = 0; |
1399 | | |
1400 | | // For inter modes, signalling skip means no TX depth is used and the TX type will be DCT_DCT |
1401 | 0 | cand_bf->cand->block_mi.tx_depth = 0; |
1402 | 0 | cand_bf->cand->transform_type_uv = DCT_DCT; |
1403 | 0 | memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0])); |
1404 | 0 | memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData)); |
1405 | 0 | memset(&cand_bf->eob, 0, sizeof(EobData)); |
1406 | 0 | } |
1407 | 0 | } |
1408 | | |
1409 | 282k | uint64_t coeff_rate = 0; |
1410 | 282k | if (cand_bf->block_has_coeff) { |
1411 | 9.04k | coeff_rate = (*y_coeff_bits + *cb_coeff_bits + *cr_coeff_bits + non_skip_tx_size_bits + |
1412 | 9.04k | (uint64_t)ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][0]); |
1413 | 273k | } else { |
1414 | 273k | coeff_rate = ctx->md_rate_est_ctx->skip_fac_bits[skip_coeff_ctx][1] + skip_tx_size_bits; |
1415 | 273k | } |
1416 | | |
1417 | 282k | uint64_t mode_rate = cand_bf->fast_luma_rate + cand_bf->fast_chroma_rate + coeff_rate; |
1418 | 282k | uint64_t mode_distortion = y_distortion[DIST_SSD][0] + cb_distortion[DIST_SSD][0] + cr_distortion[DIST_SSD][0]; |
1419 | 282k | uint64_t mode_ssim_distortion = update_full_cost_ssim |
1420 | 282k | ? y_distortion[DIST_SSIM][0] + cb_distortion[DIST_SSIM][0] + cr_distortion[DIST_SSIM][0] |
1421 | 282k | : 0; |
1422 | 282k | uint64_t mode_cost = RDCOST(lambda, mode_rate, mode_distortion); |
1423 | | |
1424 | | // If skip_mode is allowed for this candidate, check cost of skip mode compared to regular cost |
1425 | 282k | if (cand_bf->cand->skip_mode_allowed == true) { |
1426 | 0 | const uint8_t skip_mode_ctx = ctx->skip_mode_ctx; |
1427 | | |
1428 | | // Skip mode cost |
1429 | 0 | const uint64_t skip_mode_rate = ctx->md_rate_est_ctx->skip_mode_fac_bits[skip_mode_ctx][1]; |
1430 | 0 | const uint64_t skip_mode_distortion = y_distortion[DIST_SSD][1] + cb_distortion[DIST_SSD][1] + |
1431 | 0 | cr_distortion[DIST_SSD][1]; |
1432 | 0 | const uint64_t skip_mode_ssim_distortion = update_full_cost_ssim |
1433 | 0 | ? y_distortion[DIST_SSIM][1] + cb_distortion[DIST_SSIM][1] + cr_distortion[DIST_SSIM][1] |
1434 | 0 | : 0; |
1435 | 0 | const uint64_t skip_mode_cost = RDCOST(lambda, skip_mode_rate, skip_mode_distortion); |
1436 | |
|
1437 | 0 | cand_bf->cand->block_mi.skip_mode = false; |
1438 | 0 | if (skip_mode_cost <= mode_cost) { |
1439 | | // Update candidate cost |
1440 | 0 | mode_cost = skip_mode_cost; |
1441 | 0 | mode_rate = skip_mode_rate; |
1442 | 0 | mode_distortion = skip_mode_distortion; |
1443 | 0 | mode_ssim_distortion = skip_mode_ssim_distortion; |
1444 | 0 | cand_bf->cand->block_mi.skip_mode = true; |
1445 | | |
1446 | | // Update signals to correspond to skip_mode values (no coeffs, etc.) |
1447 | 0 | cand_bf->block_has_coeff = 0; |
1448 | 0 | cand_bf->y_has_coeff = 0; |
1449 | 0 | cand_bf->u_has_coeff = 0; |
1450 | 0 | cand_bf->v_has_coeff = 0; |
1451 | 0 | cand_bf->cnt_nz_coeff = 0; |
1452 | 0 | cand_bf->cand->block_mi.tx_depth = 0; |
1453 | 0 | memset(cand_bf->cand->transform_type, DCT_DCT, 16 * sizeof(cand_bf->cand->transform_type[0])); |
1454 | 0 | cand_bf->cand->transform_type_uv = DCT_DCT; |
1455 | 0 | memset(&cand_bf->quant_dc, 0, sizeof(QuantDcData)); |
1456 | 0 | memset(&cand_bf->eob, 0, sizeof(EobData)); |
1457 | 0 | } |
1458 | 0 | } |
1459 | | |
1460 | | // Assign full cost |
1461 | 282k | *(cand_bf->full_cost) = mode_cost; |
1462 | 282k | cand_bf->total_rate = mode_rate; |
1463 | 282k | cand_bf->full_dist = (uint32_t)mode_distortion; |
1464 | 282k | if (update_full_cost_ssim) { |
1465 | 0 | assert(ctx->pd_pass == PD_PASS_1); |
1466 | 0 | assert(ctx->md_stage == MD_STAGE_3); |
1467 | 0 | *(cand_bf->full_cost_ssim) = RDCOST(lambda, mode_rate, mode_ssim_distortion); |
1468 | 0 | } |
1469 | 282k | return; |
1470 | 282k | } |
1471 | | |
1472 | | /************************************************************ |
1473 | | * Coding Loop Context Generation |
1474 | | ************************************************************/ |
1475 | 278k | void svt_aom_coding_loop_context_generation(PictureControlSet* pcs, ModeDecisionContext* ctx) { |
1476 | 278k | BlkStruct* blk_ptr = ctx->blk_ptr; |
1477 | 278k | MacroBlockD* xd = blk_ptr->av1xd; |
1478 | 278k | if (!ctx->shut_fast_rate) { |
1479 | 144k | if (pcs->slice_type == I_SLICE) { |
1480 | 144k | svt_aom_get_kf_y_mode_ctx(xd, &ctx->intra_luma_top_ctx, &ctx->intra_luma_left_ctx); |
1481 | 144k | } |
1482 | 144k | ctx->is_inter_ctx = svt_av1_get_intra_inter_context(xd); |
1483 | 144k | ctx->skip_mode_ctx = av1_get_skip_mode_context(xd); |
1484 | 144k | } |
1485 | | // Collect Neighbor ref cout |
1486 | 279k | if (pcs->slice_type != I_SLICE || pcs->ppcs->frm_hdr.allow_intrabc) { |
1487 | 0 | svt_aom_collect_neighbors_ref_counts_new(blk_ptr->av1xd); |
1488 | 0 | } |
1489 | | |
1490 | | // Skip Coeff Context |
1491 | 278k | ctx->skip_coeff_ctx = ctx->rate_est_ctrls.update_skip_coeff_ctx ? av1_get_skip_context(xd) : 0; |
1492 | 278k | } |
1493 | | |
1494 | 440k | static INLINE int block_signals_txsize(BlockSize bsize) { |
1495 | 440k | return bsize > BLOCK_4X4; |
1496 | 440k | } |
1497 | | |
1498 | 0 | static INLINE int get_vartx_max_txsize(/*const MbModeInfo *xd,*/ BlockSize bsize, int plane) { |
1499 | | /* if (xd->lossless[xd->mi[0]->segment_id]) return TX_4X4;*/ |
1500 | 0 | const TxSize max_txsize = blocksize_to_txsize[bsize]; |
1501 | 0 | if (plane == 0) { |
1502 | 0 | return max_txsize; // luma |
1503 | 0 | } |
1504 | 0 | return av1_get_adjusted_tx_size(max_txsize); // chroma |
1505 | 0 | } |
1506 | | |
1507 | 0 | static INLINE int max_block_wide(const MacroBlockD* xd, BlockSize bsize, int plane) { |
1508 | 0 | int max_blocks_wide = block_size_wide[bsize]; |
1509 | |
|
1510 | 0 | if (xd->mb_to_right_edge < 0) { |
1511 | 0 | max_blocks_wide += gcc_right_shift(xd->mb_to_right_edge, 3 + !!plane); |
1512 | 0 | } |
1513 | | |
1514 | | // Scale the width in the transform block unit. |
1515 | 0 | return max_blocks_wide >> tx_size_wide_log2[0]; |
1516 | 0 | } |
1517 | | |
1518 | 0 | static INLINE int max_block_high(const MacroBlockD* xd, BlockSize bsize, int plane) { |
1519 | 0 | int max_blocks_high = block_size_high[bsize]; |
1520 | |
|
1521 | 0 | if (xd->mb_to_bottom_edge < 0) { |
1522 | 0 | max_blocks_high += gcc_right_shift(xd->mb_to_bottom_edge, 3 + !!plane); |
1523 | 0 | } |
1524 | | |
1525 | | // Scale the height in the transform block unit. |
1526 | 0 | return max_blocks_high >> tx_size_high_log2[0]; |
1527 | 0 | } |
1528 | | |
1529 | | static INLINE void txfm_partition_update(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, TxSize tx_size, |
1530 | 0 | TxSize txb_size) { |
1531 | 0 | BlockSize bsize = txsize_to_bsize[txb_size]; |
1532 | 0 | assert(bsize < BLOCK_SIZES_ALL); |
1533 | 0 | int bh = mi_size_high[bsize]; |
1534 | 0 | int bw = mi_size_wide[bsize]; |
1535 | 0 | uint8_t txw = tx_size_wide[tx_size]; |
1536 | 0 | uint8_t txh = tx_size_high[tx_size]; |
1537 | 0 | int i; |
1538 | 0 | for (i = 0; i < bh; ++i) { |
1539 | 0 | left_ctx[i] = txh; |
1540 | 0 | } |
1541 | 0 | for (i = 0; i < bw; ++i) { |
1542 | 0 | above_ctx[i] = txw; |
1543 | 0 | } |
1544 | 0 | } |
1545 | | |
1546 | 0 | static INLINE TxSize get_sqr_tx_size(int tx_dim) { |
1547 | 0 | switch (tx_dim) { |
1548 | 0 | case 128: |
1549 | 0 | case 64: |
1550 | 0 | return TX_64X64; |
1551 | 0 | break; |
1552 | 0 | case 32: |
1553 | 0 | return TX_32X32; |
1554 | 0 | break; |
1555 | 0 | case 16: |
1556 | 0 | return TX_16X16; |
1557 | 0 | break; |
1558 | 0 | case 8: |
1559 | 0 | return TX_8X8; |
1560 | 0 | break; |
1561 | 0 | default: |
1562 | 0 | return TX_4X4; |
1563 | 0 | } |
1564 | 0 | } |
1565 | | |
1566 | | static INLINE int txfm_partition_context(TXFM_CONTEXT* above_ctx, TXFM_CONTEXT* left_ctx, BlockSize bsize, |
1567 | 0 | TxSize tx_size) { |
1568 | 0 | const uint8_t txw = tx_size_wide[tx_size]; |
1569 | 0 | const uint8_t txh = tx_size_high[tx_size]; |
1570 | 0 | const int above = *above_ctx < txw; |
1571 | 0 | const int left = *left_ctx < txh; |
1572 | 0 | int category = TXFM_PARTITION_CONTEXTS; |
1573 | | |
1574 | | // dummy return, not used by others. |
1575 | 0 | if (tx_size == TX_4X4) { |
1576 | 0 | return 0; |
1577 | 0 | } |
1578 | | |
1579 | 0 | TxSize max_tx_size = get_sqr_tx_size(AOMMAX(block_size_wide[bsize], block_size_high[bsize])); |
1580 | |
|
1581 | 0 | if (max_tx_size >= TX_8X8) { |
1582 | 0 | category = (txsize_sqr_up_map[tx_size] != max_tx_size && max_tx_size > TX_8X8) + |
1583 | 0 | (TX_SIZES - 1 - max_tx_size) * 2; |
1584 | 0 | } |
1585 | 0 | assert(category != TXFM_PARTITION_CONTEXTS); |
1586 | 0 | return category * 3 + above + left; |
1587 | 0 | } |
1588 | | |
1589 | | static uint64_t cost_tx_size_vartx(MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, int depth, int blk_row, |
1590 | | int blk_col, MdRateEstimationContext* md_rate_est_ctx, FRAME_CONTEXT* ec_ctx, |
1591 | 0 | uint8_t allow_update_cdf) { |
1592 | 0 | uint64_t bits = 0; |
1593 | 0 | const int max_blocks_high = max_block_high(xd, mbmi->bsize, 0); |
1594 | 0 | const int max_blocks_wide = max_block_wide(xd, mbmi->bsize, 0); |
1595 | |
|
1596 | 0 | if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) { |
1597 | 0 | return bits; |
1598 | 0 | } |
1599 | | |
1600 | 0 | if (depth == MAX_VARTX_DEPTH) { |
1601 | 0 | txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size); |
1602 | |
|
1603 | 0 | return bits; |
1604 | 0 | } |
1605 | | |
1606 | 0 | const int ctx = txfm_partition_context( |
1607 | 0 | xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, mbmi->bsize, tx_size); |
1608 | 0 | const int write_txfm_partition = (tx_size == tx_depth_to_tx_size[mbmi->block_mi.tx_depth][mbmi->bsize]); |
1609 | 0 | if (write_txfm_partition) { |
1610 | 0 | bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][0]; |
1611 | |
|
1612 | 0 | if (allow_update_cdf) { |
1613 | 0 | update_cdf(ec_ctx->txfm_partition_cdf[ctx], 0, 2); |
1614 | 0 | } |
1615 | |
|
1616 | 0 | txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size); |
1617 | |
|
1618 | 0 | } else { |
1619 | 0 | assert(tx_size < TX_SIZES_ALL); |
1620 | 0 | const TxSize sub_txs = eb_sub_tx_size_map[tx_size]; |
1621 | 0 | const int bsw = eb_tx_size_wide_unit[sub_txs]; |
1622 | 0 | const int bsh = eb_tx_size_high_unit[sub_txs]; |
1623 | |
|
1624 | 0 | bits += md_rate_est_ctx->txfm_partition_fac_bits[ctx][1]; |
1625 | |
|
1626 | 0 | if (allow_update_cdf) { |
1627 | 0 | update_cdf(ec_ctx->txfm_partition_cdf[ctx], 1, 2); |
1628 | 0 | } |
1629 | |
|
1630 | 0 | if (sub_txs == TX_4X4) { |
1631 | 0 | txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, sub_txs, tx_size); |
1632 | |
|
1633 | 0 | return bits; |
1634 | 0 | } |
1635 | | |
1636 | 0 | assert(bsw > 0 && bsh > 0); |
1637 | 0 | for (int row = 0; row < eb_tx_size_high_unit[tx_size]; row += bsh) { |
1638 | 0 | for (int col = 0; col < eb_tx_size_wide_unit[tx_size]; col += bsw) { |
1639 | 0 | int offsetr = blk_row + row; |
1640 | 0 | int offsetc = blk_col + col; |
1641 | 0 | bits += cost_tx_size_vartx( |
1642 | 0 | xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, md_rate_est_ctx, ec_ctx, allow_update_cdf); |
1643 | 0 | } |
1644 | 0 | } |
1645 | 0 | } |
1646 | 0 | return bits; |
1647 | 0 | } |
1648 | | |
1649 | 849k | static INLINE void set_txfm_ctx(TXFM_CONTEXT* txfm_ctx, uint8_t txs, int len) { |
1650 | 849k | int i; |
1651 | 2.73M | for (i = 0; i < len; ++i) { |
1652 | 1.88M | txfm_ctx[i] = txs; |
1653 | 1.88M | } |
1654 | 849k | } |
1655 | | |
1656 | 426k | static INLINE void set_txfm_ctxs(TxSize tx_size, int n8_w, int n8_h, int skip, const MacroBlockD* xd) { |
1657 | 426k | uint8_t bw = tx_size_wide[tx_size]; |
1658 | 426k | uint8_t bh = tx_size_high[tx_size]; |
1659 | | |
1660 | 426k | if (skip) { |
1661 | 0 | bw = n8_w * MI_SIZE; |
1662 | 0 | bh = n8_h * MI_SIZE; |
1663 | 0 | } |
1664 | | |
1665 | 426k | set_txfm_ctx(xd->above_txfm_context, bw, n8_w); |
1666 | 426k | set_txfm_ctx(xd->left_txfm_context, bh, n8_h); |
1667 | 426k | } |
1668 | | |
1669 | 12.4k | static INLINE int tx_size_to_depth(TxSize tx_size, BlockSize bsize) { |
1670 | 12.4k | TxSize ctx_size = blocksize_to_txsize[bsize]; |
1671 | 12.4k | int depth = 0; |
1672 | 12.4k | while (tx_size != ctx_size) { |
1673 | 0 | depth++; |
1674 | 0 | ctx_size = eb_sub_tx_size_map[ctx_size]; |
1675 | 0 | assert(depth <= MAX_TX_DEPTH); |
1676 | 0 | } |
1677 | 12.4k | return depth; |
1678 | 12.4k | } |
1679 | | |
1680 | | // Returns a context number for the given MB prediction signal |
1681 | | // The mode info data structure has a one element border above and to the |
1682 | | // left of the entries corresponding to real blocks. |
1683 | | // The prediction flags in these dummy entries are initialized to 0. |
1684 | 12.4k | static INLINE int get_tx_size_context(const MacroBlockD* xd) { |
1685 | 12.4k | const MbModeInfo* mbmi = xd->mi[0]; |
1686 | 12.4k | const MbModeInfo* const above_mbmi = xd->above_mbmi; |
1687 | 12.4k | const MbModeInfo* const left_mbmi = xd->left_mbmi; |
1688 | 12.4k | const TxSize max_tx_size = blocksize_to_txsize[mbmi->bsize]; |
1689 | 12.4k | const int max_tx_wide = tx_size_wide[max_tx_size]; |
1690 | 12.4k | const int max_tx_high = tx_size_high[max_tx_size]; |
1691 | 12.4k | const int has_above = xd->up_available; |
1692 | 12.4k | const int has_left = xd->left_available; |
1693 | | |
1694 | 12.4k | int above = xd->above_txfm_context[0] >= max_tx_wide; |
1695 | 12.4k | int left = xd->left_txfm_context[0] >= max_tx_high; |
1696 | | |
1697 | 12.4k | if (has_above) { |
1698 | 2.99k | if (is_inter_block(&above_mbmi->block_mi)) { |
1699 | 0 | above = block_size_wide[above_mbmi->bsize] >= max_tx_wide; |
1700 | 0 | } |
1701 | 2.99k | } |
1702 | | |
1703 | 12.4k | if (has_left) { |
1704 | 3.13k | if (is_inter_block(&left_mbmi->block_mi)) { |
1705 | 0 | left = block_size_high[left_mbmi->bsize] >= max_tx_high; |
1706 | 0 | } |
1707 | 3.13k | } |
1708 | | |
1709 | 12.4k | if (has_above && has_left) { |
1710 | 213 | return (above + left); |
1711 | 12.2k | } else if (has_above) { |
1712 | 2.78k | return above; |
1713 | 9.48k | } else if (has_left) { |
1714 | 2.91k | return left; |
1715 | 6.56k | } else { |
1716 | 6.56k | return 0; |
1717 | 6.56k | } |
1718 | 12.4k | } |
1719 | | |
1720 | | static uint64_t cost_selected_tx_size(const MacroBlockD* xd, MdRateEstimationContext* md_rate_est_ctx, TxSize tx_size, |
1721 | 12.4k | FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) { |
1722 | 12.4k | const MbModeInfo* const mbmi = xd->mi[0]; |
1723 | 12.4k | const BlockSize bsize = mbmi->bsize; |
1724 | 12.4k | uint64_t bits = 0; |
1725 | | |
1726 | 12.4k | if (block_signals_txsize(bsize)) { |
1727 | 12.4k | const int tx_size_ctx = get_tx_size_context(xd); |
1728 | 12.4k | assert(bsize < BLOCK_SIZES_ALL); |
1729 | 12.4k | const int depth = tx_size_to_depth(tx_size, bsize); |
1730 | 12.4k | const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize); |
1731 | 12.4k | bits += md_rate_est_ctx->tx_size_fac_bits[tx_size_cat][tx_size_ctx][depth]; |
1732 | | |
1733 | 12.4k | if (allow_update_cdf) { |
1734 | 0 | const int max_depths = bsize_to_max_depth(bsize); |
1735 | 0 | assert(depth >= 0 && depth <= max_depths); |
1736 | 0 | assert(!is_inter_block(&mbmi->block_mi)); |
1737 | 0 | assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(/*xd,*/ mbmi))); |
1738 | 0 | update_cdf(ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], depth, max_depths + 1); |
1739 | 0 | } |
1740 | 12.4k | } |
1741 | | |
1742 | 12.4k | return bits; |
1743 | 12.4k | } |
1744 | | |
1745 | | /* Get the TXS rate and update the txfm context. If allow_update_cdf is true, the TX size CDFs will |
1746 | | be updated. */ |
1747 | | uint64_t svt_aom_tx_size_bits(PictureControlSet* pcs, uint8_t segment_id, MdRateEstimationContext* md_rate_est_ctx, |
1748 | | MacroBlockD* xd, const MbModeInfo* mbmi, TxSize tx_size, TxMode tx_mode, BlockSize bsize, |
1749 | 428k | uint8_t skip, FRAME_CONTEXT* ec_ctx, uint8_t allow_update_cdf) { |
1750 | 428k | uint64_t bits = 0; |
1751 | 428k | int is_inter_tx = is_inter_block(&mbmi->block_mi); |
1752 | 428k | if (tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) && !(is_inter_tx && skip) && |
1753 | 427k | !svt_av1_is_lossless_segment(pcs, segment_id)) { |
1754 | 12.4k | if (is_inter_tx) { // This implies skip flag is 0. |
1755 | 0 | const TxSize max_tx_size = get_vartx_max_txsize(/*xd,*/ bsize, 0); |
1756 | 0 | const int txbh = eb_tx_size_high_unit[max_tx_size]; |
1757 | 0 | const int txbw = eb_tx_size_wide_unit[max_tx_size]; |
1758 | 0 | const int width = block_size_wide[bsize] >> tx_size_wide_log2[0]; |
1759 | 0 | const int height = block_size_high[bsize] >> tx_size_high_log2[0]; |
1760 | 0 | int idx, idy; |
1761 | 0 | for (idy = 0; idy < height; idy += txbh) { |
1762 | 0 | for (idx = 0; idx < width; idx += txbw) { |
1763 | 0 | bits += cost_tx_size_vartx( |
1764 | 0 | xd, mbmi, max_tx_size, 0, idy, idx, md_rate_est_ctx, ec_ctx, allow_update_cdf); |
1765 | 0 | } |
1766 | 0 | } |
1767 | 12.4k | } else { |
1768 | 12.4k | bits += cost_selected_tx_size(xd, md_rate_est_ctx, tx_size, ec_ctx, allow_update_cdf); |
1769 | 12.4k | set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, 0, xd); |
1770 | 12.4k | } |
1771 | 415k | } else { |
1772 | 415k | set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, skip && is_inter_block(&mbmi->block_mi), xd); |
1773 | 415k | } |
1774 | | |
1775 | 428k | return bits; |
1776 | 428k | } |
1777 | | |
1778 | | /* Get the TXS rate. A dummy txfm context array will be used, so context updates will not be saved for |
1779 | | future blocks. */ |
1780 | | uint64_t svt_aom_get_tx_size_bits(ModeDecisionCandidateBuffer* candidateBuffer, ModeDecisionContext* ctx, |
1781 | 428k | PictureControlSet* pcs, uint8_t tx_depth, bool block_has_coeff) { |
1782 | 428k | NeighborArrayUnit* txfm_context_array = ctx->txfm_context_array; |
1783 | 428k | const uint8_t* txfm_above_ptr = svt_aom_na_top_ptr_pu(txfm_context_array, ctx->blk_org_x); |
1784 | 428k | const uint8_t* txfm_left_ptr = svt_aom_na_left_ptr_pu(txfm_context_array, ctx->blk_org_y); |
1785 | | |
1786 | 428k | TxMode tx_mode = pcs->ppcs->frm_hdr.tx_mode; |
1787 | 428k | MacroBlockD* xd = ctx->blk_ptr->av1xd; |
1788 | 428k | BlockSize bsize = ctx->blk_geom->bsize; |
1789 | 428k | const TxSize tx_size = tx_depth_to_tx_size[tx_depth][bsize]; |
1790 | 428k | MbModeInfo* mbmi = xd->mi[0]; |
1791 | | |
1792 | 428k | svt_memcpy(ctx->above_txfm_context, txfm_above_ptr, (ctx->blk_geom->bwidth >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT)); |
1793 | 428k | svt_memcpy(ctx->left_txfm_context, txfm_left_ptr, (ctx->blk_geom->bheight >> MI_SIZE_LOG2) * sizeof(TXFM_CONTEXT)); |
1794 | | |
1795 | 428k | xd->above_txfm_context = ctx->above_txfm_context; |
1796 | 428k | xd->left_txfm_context = ctx->left_txfm_context; |
1797 | 428k | mbmi->bsize = ctx->blk_geom->bsize; |
1798 | 428k | mbmi->block_mi.use_intrabc = candidateBuffer->cand->block_mi.use_intrabc; |
1799 | 428k | mbmi->block_mi.ref_frame[0] = candidateBuffer->cand->block_mi.ref_frame[0]; |
1800 | 428k | mbmi->block_mi.tx_depth = tx_depth; |
1801 | | |
1802 | 428k | const uint64_t bits = svt_aom_tx_size_bits(pcs, |
1803 | 428k | ctx->blk_ptr->segment_id, |
1804 | 428k | ctx->md_rate_est_ctx, |
1805 | 428k | xd, |
1806 | 428k | mbmi, |
1807 | 428k | tx_size, |
1808 | 428k | tx_mode, |
1809 | 428k | bsize, |
1810 | 428k | !block_has_coeff, |
1811 | 428k | NULL, |
1812 | 428k | 0); |
1813 | 428k | return bits; |
1814 | 428k | } |
1815 | | |
1816 | | /* |
1817 | | * av1_partition_rate_cost function is used to generate the rate of signaling the |
1818 | | * partition type for a given block. |
1819 | | */ |
1820 | | int64_t svt_aom_partition_rate_cost(PictureParentControlSet* ppcs, const BlockSize bsize, const int mi_row, |
1821 | | const int mi_col, MdRateEstimationContext* md_rate_est_ctx, PartitionType p, |
1822 | 393k | const PartitionContextType left_ctx, const PartitionContextType above_ctx) { |
1823 | 393k | if (bsize < BLOCK_8X8) { |
1824 | 0 | return 0; |
1825 | 0 | } |
1826 | 393k | assert(bsize < BLOCK_SIZES_ALL && mi_size_wide_log2[bsize] == mi_size_high_log2[bsize]); |
1827 | | |
1828 | 393k | const int hbs = mi_size_wide[bsize] >> 1; |
1829 | 393k | const int has_rows = (mi_row + hbs) < ppcs->av1_cm->mi_rows; |
1830 | 393k | const int has_cols = (mi_col + hbs) < ppcs->av1_cm->mi_cols; |
1831 | | // Don't consider invalid partitions or blocks outside the picture |
1832 | 393k | if (!has_rows && !has_cols) { |
1833 | 912 | return 0; |
1834 | 912 | } |
1835 | | |
1836 | 392k | const int bsl = mi_size_wide_log2[bsize] - mi_size_wide_log2[BLOCK_8X8]; |
1837 | 392k | assert(bsl >= 0); |
1838 | | |
1839 | 392k | const int above = (above_ctx >> bsl) & 1, left = (left_ctx >> bsl) & 1; |
1840 | 392k | const uint32_t context_index = (left * 2 + above) + bsl * PARTITION_PLOFFSET; |
1841 | | |
1842 | 392k | uint64_t split_rate = 0; |
1843 | | |
1844 | 392k | if (has_rows && has_cols) { |
1845 | 373k | split_rate = (uint64_t)md_rate_est_ctx->partition_fac_bits[context_index][p]; |
1846 | 373k | } else if (!has_rows && has_cols) { |
1847 | | // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per |
1848 | | // section 8.3.2 of the AV1 spec (Cdf selection process). Therefore, only update partition ctx 4+, |
1849 | | // which corresponds to the paritition CDFs for 16x16 and larger blocks |
1850 | 9.25k | assert(bsize != BLOCK_8X8); |
1851 | 9.25k | split_rate = bsize == BLOCK_128X128 |
1852 | 9.25k | ? (uint64_t)md_rate_est_ctx->partition_vert_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT] |
1853 | 9.25k | : (uint64_t)md_rate_est_ctx->partition_vert_alike_fac_bits[context_index][p == PARTITION_SPLIT]; |
1854 | 9.25k | } else { |
1855 | | // 8x8 blocks will not use the split_or_horz or the split_or_vert paritition CDFs, per |
1856 | | // section 8.3.2 of the AV1 spec (Cdf selection process). Therefore, only update partition ctx 4+, |
1857 | | // which corresponds to the paritition CDFs for 16x16 and larger blocks |
1858 | 9.08k | assert(bsize != BLOCK_8X8); |
1859 | 9.08k | split_rate = bsize == BLOCK_128X128 |
1860 | 9.08k | ? (uint64_t)md_rate_est_ctx->partition_horz_alike_128x128_fac_bits[context_index][p == PARTITION_SPLIT] |
1861 | 9.08k | : (uint64_t)md_rate_est_ctx->partition_horz_alike_fac_bits[context_index][p == PARTITION_SPLIT]; |
1862 | 9.08k | } |
1863 | | |
1864 | 392k | return split_rate; |
1865 | 393k | } |