/src/aom/av1/encoder/intra_mode_search.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2020, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include "av1/common/av1_common_int.h" |
13 | | #include "av1/common/reconintra.h" |
14 | | |
15 | | #include "av1/encoder/intra_mode_search.h" |
16 | | #include "av1/encoder/intra_mode_search_utils.h" |
17 | | #include "av1/encoder/palette.h" |
18 | | #include "av1/encoder/speed_features.h" |
19 | | #include "av1/encoder/tx_search.h" |
20 | | |
21 | | /*!\cond */ |
22 | | static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = { |
23 | | DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED, |
24 | | SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED, D157_PRED, |
25 | | D67_PRED, D113_PRED, D45_PRED, |
26 | | }; |
27 | | |
28 | | static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = { |
29 | | UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED, |
30 | | UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED, |
31 | | UV_D135_PRED, UV_D203_PRED, UV_D157_PRED, UV_D67_PRED, |
32 | | UV_D113_PRED, UV_D45_PRED, |
33 | | }; |
34 | | |
35 | | // The bitmask corresponds to the filter intra modes as defined in enums.h |
36 | | // FILTER_INTRA_MODE enumeration type. Setting a bit to 0 in the mask means to |
37 | | // disable the evaluation of corresponding filter intra mode. The table |
38 | | // av1_derived_filter_intra_mode_used_flag is used when speed feature |
39 | | // prune_filter_intra_level is 1. The evaluated filter intra modes are union |
40 | | // of the following: |
41 | | // 1) FILTER_DC_PRED |
42 | | // 2) mode that corresponds to best mode so far of DC_PRED, V_PRED, H_PRED, |
43 | | // D157_PRED and PAETH_PRED. (Eg: FILTER_V_PRED if best mode so far is V_PRED). |
44 | | static const uint8_t av1_derived_filter_intra_mode_used_flag[INTRA_MODES] = { |
45 | | 0x01, // DC_PRED: 0000 0001 |
46 | | 0x03, // V_PRED: 0000 0011 |
47 | | 0x05, // H_PRED: 0000 0101 |
48 | | 0x01, // D45_PRED: 0000 0001 |
49 | | 0x01, // D135_PRED: 0000 0001 |
50 | | 0x01, // D113_PRED: 0000 0001 |
51 | | 0x09, // D157_PRED: 0000 1001 |
52 | | 0x01, // D203_PRED: 0000 0001 |
53 | | 0x01, // D67_PRED: 0000 0001 |
54 | | 0x01, // SMOOTH_PRED: 0000 0001 |
55 | | 0x01, // SMOOTH_V_PRED: 0000 0001 |
56 | | 0x01, // SMOOTH_H_PRED: 0000 0001 |
57 | | 0x11 // PAETH_PRED: 0001 0001 |
58 | | }; |
59 | | |
60 | | // The bitmask corresponds to the chroma intra modes as defined in enums.h |
61 | | // UV_PREDICTION_MODE enumeration type. Setting a bit to 0 in the mask means to |
62 | | // disable the evaluation of corresponding chroma intra mode. The table |
63 | | // av1_derived_chroma_intra_mode_used_flag is used when speed feature |
64 | | // prune_chroma_modes_using_luma_winner is enabled. The evaluated chroma |
65 | | // intra modes are union of the following: |
66 | | // 1) UV_DC_PRED |
67 | | // 2) UV_SMOOTH_PRED |
68 | | // 3) UV_CFL_PRED |
69 | | // 4) mode that corresponds to luma intra mode winner (Eg : UV_V_PRED if luma |
70 | | // intra mode winner is V_PRED). |
71 | | static const uint16_t av1_derived_chroma_intra_mode_used_flag[INTRA_MODES] = { |
72 | | 0x2201, // DC_PRED: 0010 0010 0000 0001 |
73 | | 0x2203, // V_PRED: 0010 0010 0000 0011 |
74 | | 0x2205, // H_PRED: 0010 0010 0000 0101 |
75 | | 0x2209, // D45_PRED: 0010 0010 0000 1001 |
76 | | 0x2211, // D135_PRED: 0010 0010 0001 0001 |
77 | | 0x2221, // D113_PRED: 0010 0010 0010 0001 |
78 | | 0x2241, // D157_PRED: 0010 0010 0100 0001 |
79 | | 0x2281, // D203_PRED: 0010 0010 1000 0001 |
80 | | 0x2301, // D67_PRED: 0010 0011 0000 0001 |
81 | | 0x2201, // SMOOTH_PRED: 0010 0010 0000 0001 |
82 | | 0x2601, // SMOOTH_V_PRED: 0010 0110 0000 0001 |
83 | | 0x2a01, // SMOOTH_H_PRED: 0010 1010 0000 0001 |
84 | | 0x3201 // PAETH_PRED: 0011 0010 0000 0001 |
85 | | }; |
86 | | |
87 | | DECLARE_ALIGNED(16, static const uint8_t, all_zeros[MAX_SB_SIZE]) = { 0 }; |
88 | | DECLARE_ALIGNED(16, static const uint16_t, |
89 | | highbd_all_zeros[MAX_SB_SIZE]) = { 0 }; |
90 | | // Returns a factor to be applied to the RD value based on how well the |
91 | | // reconstructed block variance matches the source variance. |
92 | | static double intra_rd_variance_factor(const AV1_COMP *cpi, MACROBLOCK *x, |
93 | 3.02M | BLOCK_SIZE bs) { |
94 | 3.02M | double threshold = 1.0 - (0.25 * cpi->oxcf.speed); |
95 | | // For non-positive threshold values, the comparison of source and |
96 | | // reconstructed variances with threshold evaluates to false |
97 | | // (src_var < threshold/rec_var < threshold) as these metrics are greater than |
98 | | // than 0. Hence further calculations are skipped. |
99 | 3.02M | if (threshold <= 0) return 1.0; |
100 | | |
101 | 18.4E | MACROBLOCKD *xd = &x->e_mbd; |
102 | 18.4E | double variance_rd_factor = 1.0; |
103 | 18.4E | double src_var = 0.0; |
104 | 18.4E | double rec_var = 0.0; |
105 | 18.4E | double var_diff = 0.0; |
106 | 18.4E | unsigned int sse; |
107 | 18.4E | int i, j; |
108 | 18.4E | int right_overflow = |
109 | 18.4E | (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0; |
110 | 18.4E | int bottom_overflow = |
111 | 18.4E | (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0; |
112 | | |
113 | 18.4E | const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow; |
114 | 18.4E | const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow; |
115 | 18.4E | const int blocks = (bw * bh) / 16; |
116 | | |
117 | 18.4E | for (i = 0; i < bh; i += 4) { |
118 | 0 | for (j = 0; j < bw; j += 4) { |
119 | 0 | if (is_cur_buf_hbd(xd)) { |
120 | 0 | src_var += |
121 | 0 | log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf( |
122 | 0 | x->plane[0].src.buf + i * x->plane[0].src.stride + j, |
123 | 0 | x->plane[0].src.stride, |
124 | 0 | CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse) / |
125 | 0 | 16.0); |
126 | 0 | rec_var += log( |
127 | 0 | 1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf( |
128 | 0 | xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j, |
129 | 0 | xd->plane[0].dst.stride, |
130 | 0 | CONVERT_TO_BYTEPTR(highbd_all_zeros), 0, &sse) / |
131 | 0 | 16.0); |
132 | 0 | } else { |
133 | 0 | src_var += |
134 | 0 | log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf( |
135 | 0 | x->plane[0].src.buf + i * x->plane[0].src.stride + j, |
136 | 0 | x->plane[0].src.stride, all_zeros, 0, &sse) / |
137 | 0 | 16.0); |
138 | 0 | rec_var += log( |
139 | 0 | 1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf( |
140 | 0 | xd->plane[0].dst.buf + i * xd->plane[0].dst.stride + j, |
141 | 0 | xd->plane[0].dst.stride, all_zeros, 0, &sse) / |
142 | 0 | 16.0); |
143 | 0 | } |
144 | 0 | } |
145 | 0 | } |
146 | 18.4E | src_var /= (double)blocks; |
147 | 18.4E | rec_var /= (double)blocks; |
148 | | |
149 | | // Dont allow 0 to prevent / 0 below. |
150 | 18.4E | src_var += 0.000001; |
151 | 18.4E | rec_var += 0.000001; |
152 | | |
153 | 18.4E | if (src_var >= rec_var) { |
154 | 0 | var_diff = (src_var - rec_var); |
155 | 0 | if ((var_diff > 0.5) && (rec_var < threshold)) { |
156 | 0 | variance_rd_factor = 1.0 + ((var_diff * 2) / src_var); |
157 | 0 | } |
158 | 18.4E | } else { |
159 | 18.4E | var_diff = (rec_var - src_var); |
160 | 18.4E | if ((var_diff > 0.5) && (src_var < threshold)) { |
161 | 0 | variance_rd_factor = 1.0 + (var_diff / (2 * src_var)); |
162 | 0 | } |
163 | 18.4E | } |
164 | | |
165 | | // Limit adjustment; |
166 | 18.4E | variance_rd_factor = AOMMIN(3.0, variance_rd_factor); |
167 | | |
168 | 18.4E | return variance_rd_factor; |
169 | 3.02M | } |
170 | | /*!\endcond */ |
171 | | |
172 | | /*!\brief Search for the best filter_intra mode when coding intra frame. |
173 | | * |
174 | | * \ingroup intra_mode_search |
175 | | * \callergraph |
176 | | * This function loops through all filter_intra modes to find the best one. |
177 | | * |
178 | | * \return Returns 1 if a new filter_intra mode is selected; 0 otherwise. |
179 | | */ |
180 | | static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, |
181 | | int *rate, int *rate_tokenonly, |
182 | | int64_t *distortion, int *skippable, |
183 | | BLOCK_SIZE bsize, int mode_cost, |
184 | | PREDICTION_MODE best_mode_so_far, |
185 | | int64_t *best_rd, int64_t *best_model_rd, |
186 | 57.2k | PICK_MODE_CONTEXT *ctx) { |
187 | | // Skip the evaluation of filter intra modes. |
188 | 57.2k | if (cpi->sf.intra_sf.prune_filter_intra_level == 2) return 0; |
189 | | |
190 | 57.2k | MACROBLOCKD *const xd = &x->e_mbd; |
191 | 57.2k | MB_MODE_INFO *mbmi = xd->mi[0]; |
192 | 57.2k | int filter_intra_selected_flag = 0; |
193 | 57.2k | FILTER_INTRA_MODE mode; |
194 | 57.2k | TX_SIZE best_tx_size = TX_8X8; |
195 | 57.2k | FILTER_INTRA_MODE_INFO filter_intra_mode_info; |
196 | 57.2k | uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; |
197 | 57.2k | av1_zero(filter_intra_mode_info); |
198 | 57.2k | mbmi->filter_intra_mode_info.use_filter_intra = 1; |
199 | 57.2k | mbmi->mode = DC_PRED; |
200 | 57.2k | mbmi->palette_mode_info.palette_size[0] = 0; |
201 | | |
202 | | // Skip the evaluation of filter-intra if cached MB_MODE_INFO does not have |
203 | | // filter-intra as winner. |
204 | 57.2k | if (x->use_mb_mode_cache && |
205 | 57.2k | !x->mb_mode_cache->filter_intra_mode_info.use_filter_intra) |
206 | 0 | return 0; |
207 | | |
208 | 343k | for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) { |
209 | 286k | int64_t this_rd; |
210 | 286k | RD_STATS tokenonly_rd_stats; |
211 | 286k | mbmi->filter_intra_mode_info.filter_intra_mode = mode; |
212 | | |
213 | 286k | if ((cpi->sf.intra_sf.prune_filter_intra_level == 1) && |
214 | 286k | !(av1_derived_filter_intra_mode_used_flag[best_mode_so_far] & |
215 | 286k | (1 << mode))) |
216 | 227k | continue; |
217 | | |
218 | | // Skip the evaluation of modes that do not match with the winner mode in |
219 | | // x->mb_mode_cache. |
220 | 58.8k | if (x->use_mb_mode_cache && |
221 | 58.8k | mode != x->mb_mode_cache->filter_intra_mode_info.filter_intra_mode) |
222 | 0 | continue; |
223 | | |
224 | 58.8k | if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) { |
225 | 1.16k | continue; |
226 | 1.16k | } |
227 | 57.7k | av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize, |
228 | 57.7k | *best_rd); |
229 | 57.7k | if (tokenonly_rd_stats.rate == INT_MAX) continue; |
230 | 54.1k | const int this_rate = |
231 | 54.1k | tokenonly_rd_stats.rate + |
232 | 54.1k | intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost); |
233 | 54.1k | this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); |
234 | | |
235 | | // Visual quality adjustment based on recon vs source variance. |
236 | 54.1k | if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) { |
237 | 54.1k | this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize)); |
238 | 54.1k | } |
239 | | |
240 | | // Collect mode stats for multiwinner mode processing |
241 | 54.1k | const int txfm_search_done = 1; |
242 | 54.1k | store_winner_mode_stats( |
243 | 54.1k | &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd, |
244 | 54.1k | cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done); |
245 | 54.1k | if (this_rd < *best_rd) { |
246 | 1.97k | *best_rd = this_rd; |
247 | 1.97k | best_tx_size = mbmi->tx_size; |
248 | 1.97k | filter_intra_mode_info = mbmi->filter_intra_mode_info; |
249 | 1.97k | av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); |
250 | 1.97k | memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip, |
251 | 1.97k | sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk); |
252 | 1.97k | *rate = this_rate; |
253 | 1.97k | *rate_tokenonly = tokenonly_rd_stats.rate; |
254 | 1.97k | *distortion = tokenonly_rd_stats.dist; |
255 | 1.97k | *skippable = tokenonly_rd_stats.skip_txfm; |
256 | 1.97k | filter_intra_selected_flag = 1; |
257 | 1.97k | } |
258 | 54.1k | } |
259 | | |
260 | 57.2k | if (filter_intra_selected_flag) { |
261 | 1.97k | mbmi->mode = DC_PRED; |
262 | 1.97k | mbmi->tx_size = best_tx_size; |
263 | 1.97k | mbmi->filter_intra_mode_info = filter_intra_mode_info; |
264 | 1.97k | av1_copy_array(ctx->tx_type_map, best_tx_type_map, ctx->num_4x4_blk); |
265 | 1.97k | return 1; |
266 | 55.2k | } else { |
267 | 55.2k | return 0; |
268 | 55.2k | } |
269 | 57.2k | } |
270 | | |
271 | | void av1_count_colors(const uint8_t *src, int stride, int rows, int cols, |
272 | 118k | int *val_count, int *num_colors) { |
273 | 118k | const int max_pix_val = 1 << 8; |
274 | 118k | memset(val_count, 0, max_pix_val * sizeof(val_count[0])); |
275 | 2.01M | for (int r = 0; r < rows; ++r) { |
276 | 32.2M | for (int c = 0; c < cols; ++c) { |
277 | 30.3M | const int this_val = src[r * stride + c]; |
278 | 30.3M | assert(this_val < max_pix_val); |
279 | 30.3M | ++val_count[this_val]; |
280 | 30.3M | } |
281 | 1.89M | } |
282 | 118k | int n = 0; |
283 | 30.4M | for (int i = 0; i < max_pix_val; ++i) { |
284 | 30.3M | if (val_count[i]) ++n; |
285 | 30.3M | } |
286 | 118k | *num_colors = n; |
287 | 118k | } |
288 | | |
289 | | void av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, |
290 | | int cols, int bit_depth, int *val_count, |
291 | | int *bin_val_count, int *num_color_bins, |
292 | 0 | int *num_colors) { |
293 | 0 | assert(bit_depth <= 12); |
294 | 0 | const int max_bin_val = 1 << 8; |
295 | 0 | const int max_pix_val = 1 << bit_depth; |
296 | 0 | const uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
297 | 0 | memset(bin_val_count, 0, max_bin_val * sizeof(val_count[0])); |
298 | 0 | if (val_count != NULL) |
299 | 0 | memset(val_count, 0, max_pix_val * sizeof(val_count[0])); |
300 | 0 | for (int r = 0; r < rows; ++r) { |
301 | 0 | for (int c = 0; c < cols; ++c) { |
302 | | /* |
303 | | * Down-convert the pixels to 8-bit domain before counting. |
304 | | * This provides consistency of behavior for palette search |
305 | | * between lbd and hbd encodes. This down-converted pixels |
306 | | * are only used for calculating the threshold (n). |
307 | | */ |
308 | 0 | const int this_val = ((src[r * stride + c]) >> (bit_depth - 8)); |
309 | 0 | assert(this_val < max_bin_val); |
310 | 0 | if (this_val >= max_bin_val) continue; |
311 | 0 | ++bin_val_count[this_val]; |
312 | 0 | if (val_count != NULL) ++val_count[(src[r * stride + c])]; |
313 | 0 | } |
314 | 0 | } |
315 | 0 | int n = 0; |
316 | | // Count the colors based on 8-bit domain used to gate the palette path |
317 | 0 | for (int i = 0; i < max_bin_val; ++i) { |
318 | 0 | if (bin_val_count[i]) ++n; |
319 | 0 | } |
320 | 0 | *num_color_bins = n; |
321 | | |
322 | | // Count the actual hbd colors used to create top_colors |
323 | 0 | n = 0; |
324 | 0 | if (val_count != NULL) { |
325 | 0 | for (int i = 0; i < max_pix_val; ++i) { |
326 | 0 | if (val_count[i]) ++n; |
327 | 0 | } |
328 | 0 | *num_colors = n; |
329 | 0 | } |
330 | 0 | } |
331 | | |
332 | 6.83M | void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi) { |
333 | 6.83M | if (mode_idx < INTRA_MODE_END) { |
334 | 1.45M | mbmi->mode = intra_rd_search_mode_order[mode_idx]; |
335 | 1.45M | mbmi->angle_delta[PLANE_TYPE_Y] = 0; |
336 | 5.38M | } else { |
337 | 5.38M | mbmi->mode = (mode_idx - INTRA_MODE_END) / (MAX_ANGLE_DELTA * 2) + V_PRED; |
338 | 5.38M | int angle_delta = (mode_idx - INTRA_MODE_END) % (MAX_ANGLE_DELTA * 2); |
339 | 5.38M | mbmi->angle_delta[PLANE_TYPE_Y] = |
340 | 5.38M | (angle_delta < 3 ? (angle_delta - 3) : (angle_delta - 2)); |
341 | 5.38M | } |
342 | 6.83M | } |
343 | | |
344 | | static AOM_INLINE int get_model_rd_index_for_pruning( |
345 | | const MACROBLOCK *const x, |
346 | 3.92M | const INTRA_MODE_SPEED_FEATURES *const intra_sf) { |
347 | 3.92M | const int top_intra_model_count_allowed = |
348 | 3.92M | intra_sf->top_intra_model_count_allowed; |
349 | 3.92M | if (!intra_sf->adapt_top_model_rd_count_using_neighbors) |
350 | 3.92M | return top_intra_model_count_allowed - 1; |
351 | | |
352 | 18.4E | const MACROBLOCKD *const xd = &x->e_mbd; |
353 | 18.4E | const PREDICTION_MODE mode = xd->mi[0]->mode; |
354 | 18.4E | int model_rd_index_for_pruning = top_intra_model_count_allowed - 1; |
355 | 18.4E | int is_left_mode_neq_cur_mode = 0, is_above_mode_neq_cur_mode = 0; |
356 | 18.4E | if (xd->left_available) |
357 | 0 | is_left_mode_neq_cur_mode = xd->left_mbmi->mode != mode; |
358 | 18.4E | if (xd->up_available) |
359 | 0 | is_above_mode_neq_cur_mode = xd->above_mbmi->mode != mode; |
360 | | // The pruning of luma intra modes is made more aggressive at lower quantizers |
361 | | // and vice versa. The value for model_rd_index_for_pruning is derived as |
362 | | // follows. |
363 | | // qidx 0 to 127: Reduce the index of a candidate used for comparison only if |
364 | | // the current mode does not match either of the available neighboring modes. |
365 | | // qidx 128 to 255: Reduce the index of a candidate used for comparison only |
366 | | // if the current mode does not match both the available neighboring modes. |
367 | 18.4E | if (x->qindex <= 127) { |
368 | 0 | if (is_left_mode_neq_cur_mode || is_above_mode_neq_cur_mode) |
369 | 0 | model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0); |
370 | 18.4E | } else { |
371 | 18.4E | if (is_left_mode_neq_cur_mode && is_above_mode_neq_cur_mode) |
372 | 0 | model_rd_index_for_pruning = AOMMAX(model_rd_index_for_pruning - 1, 0); |
373 | 18.4E | } |
374 | 18.4E | return model_rd_index_for_pruning; |
375 | 3.92M | } |
376 | | |
377 | | int prune_intra_y_mode(int64_t this_model_rd, int64_t *best_model_rd, |
378 | | int64_t top_intra_model_rd[], int max_model_cnt_allowed, |
379 | 3.92M | int model_rd_index_for_pruning) { |
380 | 3.92M | const double thresh_best = 1.50; |
381 | 3.92M | const double thresh_top = 1.00; |
382 | 14.9M | for (int i = 0; i < max_model_cnt_allowed; i++) { |
383 | 11.4M | if (this_model_rd < top_intra_model_rd[i]) { |
384 | 726k | for (int j = max_model_cnt_allowed - 1; j > i; j--) { |
385 | 364k | top_intra_model_rd[j] = top_intra_model_rd[j - 1]; |
386 | 364k | } |
387 | 362k | top_intra_model_rd[i] = this_model_rd; |
388 | 362k | break; |
389 | 362k | } |
390 | 11.4M | } |
391 | 3.92M | if (top_intra_model_rd[model_rd_index_for_pruning] != INT64_MAX && |
392 | 3.92M | this_model_rd > |
393 | 3.70M | thresh_top * top_intra_model_rd[model_rd_index_for_pruning]) |
394 | 231k | return 1; |
395 | | |
396 | 3.69M | if (this_model_rd != INT64_MAX && |
397 | 3.69M | this_model_rd > thresh_best * (*best_model_rd)) |
398 | 2.92k | return 1; |
399 | 3.69M | if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; |
400 | 3.69M | return 0; |
401 | 3.69M | } |
402 | | |
403 | | // Run RD calculation with given chroma intra prediction angle., and return |
404 | | // the RD cost. Update the best mode info. if the RD cost is the best so far. |
405 | | static int64_t pick_intra_angle_routine_sbuv( |
406 | | const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, |
407 | | int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats, |
408 | 3.04k | int *best_angle_delta, int64_t *best_rd) { |
409 | 3.04k | MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; |
410 | 3.04k | assert(!is_inter_block(mbmi)); |
411 | 3.04k | int this_rate; |
412 | 3.04k | int64_t this_rd; |
413 | 3.04k | RD_STATS tokenonly_rd_stats; |
414 | | |
415 | 3.04k | if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in)) |
416 | 33 | return INT64_MAX; |
417 | 3.01k | this_rate = tokenonly_rd_stats.rate + |
418 | 3.01k | intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead); |
419 | 3.01k | this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); |
420 | 3.01k | if (this_rd < *best_rd) { |
421 | 573 | *best_rd = this_rd; |
422 | 573 | *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV]; |
423 | 573 | *rate = this_rate; |
424 | 573 | rd_stats->rate = tokenonly_rd_stats.rate; |
425 | 573 | rd_stats->dist = tokenonly_rd_stats.dist; |
426 | 573 | rd_stats->skip_txfm = tokenonly_rd_stats.skip_txfm; |
427 | 573 | } |
428 | 3.01k | return this_rd; |
429 | 3.04k | } |
430 | | |
431 | | /*!\brief Search for the best angle delta for chroma prediction |
432 | | * |
433 | | * \ingroup intra_mode_search |
434 | | * \callergraph |
435 | | * Given a chroma directional intra prediction mode, this function will try to |
436 | | * estimate the best delta_angle. |
437 | | * |
438 | | * \returns Return if there is a new mode with smaller rdcost than best_rd. |
439 | | */ |
440 | | static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, |
441 | | BLOCK_SIZE bsize, int rate_overhead, |
442 | | int64_t best_rd, int *rate, |
443 | 606 | RD_STATS *rd_stats) { |
444 | 606 | MACROBLOCKD *const xd = &x->e_mbd; |
445 | 606 | MB_MODE_INFO *mbmi = xd->mi[0]; |
446 | 606 | assert(!is_inter_block(mbmi)); |
447 | 606 | int i, angle_delta, best_angle_delta = 0; |
448 | 606 | int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)]; |
449 | | |
450 | 606 | rd_stats->rate = INT_MAX; |
451 | 606 | rd_stats->skip_txfm = 0; |
452 | 606 | rd_stats->dist = INT64_MAX; |
453 | 6.66k | for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX; |
454 | | |
455 | 1.75k | for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { |
456 | 2.32k | for (i = 0; i < 2; ++i) { |
457 | 1.75k | best_rd_in = (best_rd == INT64_MAX) |
458 | 1.75k | ? INT64_MAX |
459 | 1.75k | : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5))); |
460 | 1.75k | mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta; |
461 | 1.75k | this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, |
462 | 1.75k | best_rd_in, rate, rd_stats, |
463 | 1.75k | &best_angle_delta, &best_rd); |
464 | 1.75k | rd_cost[2 * angle_delta + i] = this_rd; |
465 | 1.75k | if (angle_delta == 0) { |
466 | 606 | if (this_rd == INT64_MAX) return 0; |
467 | 573 | rd_cost[1] = this_rd; |
468 | 573 | break; |
469 | 606 | } |
470 | 1.75k | } |
471 | 1.17k | } |
472 | | |
473 | 573 | assert(best_rd != INT64_MAX); |
474 | 1.71k | for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { |
475 | 1.14k | int64_t rd_thresh; |
476 | 3.43k | for (i = 0; i < 2; ++i) { |
477 | 2.29k | int skip_search = 0; |
478 | 2.29k | rd_thresh = best_rd + (best_rd >> 5); |
479 | 2.29k | if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh && |
480 | 2.29k | rd_cost[2 * (angle_delta - 1) + i] > rd_thresh) |
481 | 999 | skip_search = 1; |
482 | 2.29k | if (!skip_search) { |
483 | 1.29k | mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta; |
484 | 1.29k | pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd, |
485 | 1.29k | rate, rd_stats, &best_angle_delta, |
486 | 1.29k | &best_rd); |
487 | 1.29k | } |
488 | 2.29k | } |
489 | 1.14k | } |
490 | | |
491 | 573 | mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta; |
492 | 573 | return rd_stats->rate != INT_MAX; |
493 | 606 | } |
494 | | |
495 | | #define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \ |
496 | 616k | (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1) |
497 | | |
498 | | static void cfl_idx_to_sign_and_alpha(int cfl_idx, CFL_SIGN_TYPE *cfl_sign, |
499 | 1.77M | int *cfl_alpha) { |
500 | 1.77M | int cfl_linear_idx = cfl_idx - CFL_INDEX_ZERO; |
501 | 1.77M | if (cfl_linear_idx == 0) { |
502 | 385k | *cfl_sign = CFL_SIGN_ZERO; |
503 | 385k | *cfl_alpha = 0; |
504 | 1.38M | } else { |
505 | 1.38M | *cfl_sign = cfl_linear_idx > 0 ? CFL_SIGN_POS : CFL_SIGN_NEG; |
506 | 1.38M | *cfl_alpha = abs(cfl_linear_idx) - 1; |
507 | 1.38M | } |
508 | 1.77M | } |
509 | | |
510 | | static int64_t cfl_compute_rd(const AV1_COMP *const cpi, MACROBLOCK *x, |
511 | | int plane, TX_SIZE tx_size, |
512 | | BLOCK_SIZE plane_bsize, int cfl_idx, |
513 | 616k | int fast_mode, RD_STATS *rd_stats) { |
514 | 616k | assert(IMPLIES(fast_mode, rd_stats == NULL)); |
515 | 616k | const AV1_COMMON *const cm = &cpi->common; |
516 | 616k | MACROBLOCKD *const xd = &x->e_mbd; |
517 | 616k | MB_MODE_INFO *const mbmi = xd->mi[0]; |
518 | 616k | int cfl_plane = get_cfl_pred_type(plane); |
519 | 616k | CFL_SIGN_TYPE cfl_sign; |
520 | 616k | int cfl_alpha; |
521 | 616k | cfl_idx_to_sign_and_alpha(cfl_idx, &cfl_sign, &cfl_alpha); |
522 | | // We conly build CFL for a given plane, the other plane's sign is dummy |
523 | 616k | int dummy_sign = CFL_SIGN_NEG; |
524 | 616k | const int8_t orig_cfl_alpha_signs = mbmi->cfl_alpha_signs; |
525 | 616k | const uint8_t orig_cfl_alpha_idx = mbmi->cfl_alpha_idx; |
526 | 616k | mbmi->cfl_alpha_signs = |
527 | 616k | PLANE_SIGN_TO_JOINT_SIGN(cfl_plane, cfl_sign, dummy_sign); |
528 | 616k | mbmi->cfl_alpha_idx = (cfl_alpha << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha; |
529 | 616k | int64_t cfl_cost; |
530 | 616k | if (fast_mode) { |
531 | 231k | cfl_cost = |
532 | 231k | intra_model_rd(cm, x, plane, plane_bsize, tx_size, /*use_hadamard=*/0); |
533 | 385k | } else { |
534 | 385k | av1_init_rd_stats(rd_stats); |
535 | 385k | av1_txfm_rd_in_plane(x, cpi, rd_stats, INT64_MAX, 0, plane, plane_bsize, |
536 | 385k | tx_size, FTXS_NONE, 0); |
537 | 385k | av1_rd_cost_update(x->rdmult, rd_stats); |
538 | 385k | cfl_cost = rd_stats->rdcost; |
539 | 385k | } |
540 | 616k | mbmi->cfl_alpha_signs = orig_cfl_alpha_signs; |
541 | 616k | mbmi->cfl_alpha_idx = orig_cfl_alpha_idx; |
542 | 616k | return cfl_cost; |
543 | 616k | } |
544 | | |
545 | | static void cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x, |
546 | | int plane, TX_SIZE tx_size, |
547 | | int cfl_search_range, |
548 | 77.0k | RD_STATS cfl_rd_arr[CFL_MAGS_SIZE]) { |
549 | 77.0k | assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE); |
550 | 77.0k | MACROBLOCKD *const xd = &x->e_mbd; |
551 | | |
552 | 77.0k | xd->cfl.use_dc_pred_cache = 1; |
553 | | |
554 | 77.0k | MB_MODE_INFO *const mbmi = xd->mi[0]; |
555 | 77.0k | assert(mbmi->uv_mode == UV_CFL_PRED); |
556 | 77.0k | const MACROBLOCKD_PLANE *pd = &xd->plane[plane]; |
557 | 77.0k | const BLOCK_SIZE plane_bsize = |
558 | 77.0k | get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y); |
559 | | |
560 | 77.0k | const int dir_ls[2] = { 1, -1 }; |
561 | | |
562 | 77.0k | int est_best_cfl_idx = CFL_INDEX_ZERO; |
563 | 77.0k | if (cfl_search_range < CFL_MAGS_SIZE) { |
564 | 77.0k | int fast_mode = 1; |
565 | 77.0k | int start_cfl_idx = CFL_INDEX_ZERO; |
566 | 77.0k | int64_t best_cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, |
567 | 77.0k | start_cfl_idx, fast_mode, NULL); |
568 | 231k | for (int si = 0; si < 2; ++si) { |
569 | 154k | const int dir = dir_ls[si]; |
570 | 154k | for (int i = 1; i < CFL_MAGS_SIZE; ++i) { |
571 | 154k | int cfl_idx = start_cfl_idx + dir * i; |
572 | 154k | if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break; |
573 | 154k | int64_t cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, |
574 | 154k | cfl_idx, fast_mode, NULL); |
575 | 154k | if (cfl_cost < best_cfl_cost) { |
576 | 0 | best_cfl_cost = cfl_cost; |
577 | 0 | est_best_cfl_idx = cfl_idx; |
578 | 154k | } else { |
579 | 154k | break; |
580 | 154k | } |
581 | 154k | } |
582 | 154k | } |
583 | 77.0k | } |
584 | | |
585 | 2.61M | for (int cfl_idx = 0; cfl_idx < CFL_MAGS_SIZE; ++cfl_idx) { |
586 | 2.54M | av1_invalid_rd_stats(&cfl_rd_arr[cfl_idx]); |
587 | 2.54M | } |
588 | | |
589 | 77.0k | int fast_mode = 0; |
590 | 77.0k | int start_cfl_idx = est_best_cfl_idx; |
591 | 77.0k | cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, start_cfl_idx, fast_mode, |
592 | 77.0k | &cfl_rd_arr[start_cfl_idx]); |
593 | 231k | for (int si = 0; si < 2; ++si) { |
594 | 154k | const int dir = dir_ls[si]; |
595 | 462k | for (int i = 1; i < cfl_search_range; ++i) { |
596 | 308k | int cfl_idx = start_cfl_idx + dir * i; |
597 | 308k | if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break; |
598 | 308k | cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, cfl_idx, fast_mode, |
599 | 308k | &cfl_rd_arr[cfl_idx]); |
600 | 308k | } |
601 | 154k | } |
602 | 77.0k | xd->cfl.use_dc_pred_cache = 0; |
603 | 77.0k | xd->cfl.dc_pred_is_cached[0] = 0; |
604 | 77.0k | xd->cfl.dc_pred_is_cached[1] = 0; |
605 | 77.0k | } |
606 | | |
607 | | /*!\brief Pick the optimal parameters for Chroma to Luma (CFL) component |
608 | | * |
609 | | * \ingroup intra_mode_search |
610 | | * \callergraph |
611 | | * |
612 | | * This function will use DCT_DCT followed by computing SATD (sum of absolute |
613 | | * transformed differences) to estimate the RD score and find the best possible |
614 | | * CFL parameter. |
615 | | * |
616 | | * Then the function will apply a full RD search near the best possible CFL |
617 | | * parameter to find the best actual CFL parameter. |
618 | | * |
619 | | * Side effect: |
620 | | * We use ths buffers in x->plane[] and xd->plane[] as throw-away buffers for RD |
621 | | * search. |
622 | | * |
623 | | * \param[in] x Encoder prediction block structure. |
624 | | * \param[in] cpi Top-level encoder instance structure. |
625 | | * \param[in] tx_size Transform size. |
626 | | * \param[in] ref_best_rd Reference best RD. |
627 | | * \param[in] cfl_search_range The search range of full RD search near the |
628 | | * estimated best CFL parameter. |
629 | | * |
630 | | * \param[out] best_rd_stats RD stats of the best CFL parameter |
631 | | * \param[out] best_cfl_alpha_idx Best CFL alpha index |
632 | | * \param[out] best_cfl_alpha_signs Best CFL joint signs |
633 | | * |
634 | | */ |
635 | | static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi, |
636 | | TX_SIZE tx_size, int64_t ref_best_rd, |
637 | | int cfl_search_range, RD_STATS *best_rd_stats, |
638 | | uint8_t *best_cfl_alpha_idx, |
639 | 38.5k | int8_t *best_cfl_alpha_signs) { |
640 | 38.5k | assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE); |
641 | 38.5k | const ModeCosts *mode_costs = &x->mode_costs; |
642 | 38.5k | RD_STATS cfl_rd_arr_u[CFL_MAGS_SIZE]; |
643 | 38.5k | RD_STATS cfl_rd_arr_v[CFL_MAGS_SIZE]; |
644 | | |
645 | 38.5k | av1_invalid_rd_stats(best_rd_stats); |
646 | | |
647 | 38.5k | cfl_pick_plane_parameter(cpi, x, 1, tx_size, cfl_search_range, cfl_rd_arr_u); |
648 | 38.5k | cfl_pick_plane_parameter(cpi, x, 2, tx_size, cfl_search_range, cfl_rd_arr_v); |
649 | | |
650 | 1.30M | for (int ui = 0; ui < CFL_MAGS_SIZE; ++ui) { |
651 | 1.27M | if (cfl_rd_arr_u[ui].rate == INT_MAX) continue; |
652 | 192k | int cfl_alpha_u; |
653 | 192k | CFL_SIGN_TYPE cfl_sign_u; |
654 | 192k | cfl_idx_to_sign_and_alpha(ui, &cfl_sign_u, &cfl_alpha_u); |
655 | 6.54M | for (int vi = 0; vi < CFL_MAGS_SIZE; ++vi) { |
656 | 6.35M | if (cfl_rd_arr_v[vi].rate == INT_MAX) continue; |
657 | 962k | int cfl_alpha_v; |
658 | 962k | CFL_SIGN_TYPE cfl_sign_v; |
659 | 962k | cfl_idx_to_sign_and_alpha(vi, &cfl_sign_v, &cfl_alpha_v); |
660 | | // cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO is not a |
661 | | // valid parameter for CFL |
662 | 962k | if (cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO) continue; |
663 | 924k | int joint_sign = cfl_sign_u * CFL_SIGNS + cfl_sign_v - 1; |
664 | 924k | RD_STATS rd_stats = cfl_rd_arr_u[ui]; |
665 | 924k | av1_merge_rd_stats(&rd_stats, &cfl_rd_arr_v[vi]); |
666 | 924k | if (rd_stats.rate != INT_MAX) { |
667 | 924k | rd_stats.rate += |
668 | 924k | mode_costs->cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u]; |
669 | 924k | rd_stats.rate += |
670 | 924k | mode_costs->cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v]; |
671 | 924k | } |
672 | 924k | av1_rd_cost_update(x->rdmult, &rd_stats); |
673 | 924k | if (rd_stats.rdcost < best_rd_stats->rdcost) { |
674 | 115k | *best_rd_stats = rd_stats; |
675 | 115k | *best_cfl_alpha_idx = |
676 | 115k | (cfl_alpha_u << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha_v; |
677 | 115k | *best_cfl_alpha_signs = joint_sign; |
678 | 115k | } |
679 | 924k | } |
680 | 192k | } |
681 | 38.5k | if (best_rd_stats->rdcost >= ref_best_rd) { |
682 | 38.4k | av1_invalid_rd_stats(best_rd_stats); |
683 | | // Set invalid CFL parameters here since the rdcost is not better than |
684 | | // ref_best_rd. |
685 | 38.4k | *best_cfl_alpha_idx = 0; |
686 | 38.4k | *best_cfl_alpha_signs = 0; |
687 | 38.4k | return 0; |
688 | 38.4k | } |
689 | 71 | return 1; |
690 | 38.5k | } |
691 | | |
692 | | int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, |
693 | | int *rate, int *rate_tokenonly, |
694 | | int64_t *distortion, int *skippable, |
695 | 68.3k | BLOCK_SIZE bsize, TX_SIZE max_tx_size) { |
696 | 68.3k | const AV1_COMMON *const cm = &cpi->common; |
697 | 68.3k | MACROBLOCKD *xd = &x->e_mbd; |
698 | 68.3k | MB_MODE_INFO *mbmi = xd->mi[0]; |
699 | 68.3k | assert(!is_inter_block(mbmi)); |
700 | 68.3k | MB_MODE_INFO best_mbmi = *mbmi; |
701 | 68.3k | int64_t best_rd = INT64_MAX, this_rd; |
702 | 68.3k | const ModeCosts *mode_costs = &x->mode_costs; |
703 | 68.3k | const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg; |
704 | | |
705 | 68.3k | init_sbuv_mode(mbmi); |
706 | | |
707 | | // Return if the current block does not correspond to a chroma block. |
708 | 68.3k | if (!xd->is_chroma_ref) { |
709 | 4.77k | *rate = 0; |
710 | 4.77k | *rate_tokenonly = 0; |
711 | 4.77k | *distortion = 0; |
712 | 4.77k | *skippable = 1; |
713 | 4.77k | return INT64_MAX; |
714 | 4.77k | } |
715 | | |
716 | | // Only store reconstructed luma when there's chroma RDO. When there's no |
717 | | // chroma RDO, the reconstructed luma will be stored in encode_superblock(). |
718 | 63.5k | xd->cfl.store_y = store_cfl_required_rdo(cm, x); |
719 | 63.5k | if (xd->cfl.store_y) { |
720 | | // Restore reconstructed luma values. |
721 | | // TODO(chiyotsai@google.com): right now we are re-computing the txfm in |
722 | | // this function everytime we search through uv modes. There is some |
723 | | // potential speed up here if we cache the result to avoid redundant |
724 | | // computation. |
725 | 38.5k | av1_encode_intra_block_plane(cpi, x, mbmi->bsize, AOM_PLANE_Y, |
726 | 38.5k | DRY_RUN_NORMAL, |
727 | 38.5k | cpi->optimize_seg_arr[mbmi->segment_id]); |
728 | 38.5k | xd->cfl.store_y = 0; |
729 | 38.5k | } |
730 | 63.5k | IntraModeSearchState intra_search_state; |
731 | 63.5k | init_intra_mode_search_state(&intra_search_state); |
732 | | |
733 | | // Search through all non-palette modes. |
734 | 953k | for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) { |
735 | 890k | int this_rate; |
736 | 890k | RD_STATS tokenonly_rd_stats; |
737 | 890k | UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx]; |
738 | 890k | const int is_diagonal_mode = av1_is_diagonal_mode(get_uv_mode(mode)); |
739 | 890k | const int is_directional_mode = av1_is_directional_mode(get_uv_mode(mode)); |
740 | | |
741 | 890k | if (is_diagonal_mode && !cpi->oxcf.intra_mode_cfg.enable_diagonal_intra) |
742 | 0 | continue; |
743 | 890k | if (is_directional_mode && |
744 | 890k | !cpi->oxcf.intra_mode_cfg.enable_directional_intra) |
745 | 0 | continue; |
746 | | |
747 | 890k | if (!(cpi->sf.intra_sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] & |
748 | 890k | (1 << mode))) |
749 | 0 | continue; |
750 | 890k | if (!intra_mode_cfg->enable_smooth_intra && mode >= UV_SMOOTH_PRED && |
751 | 890k | mode <= UV_SMOOTH_H_PRED) |
752 | 0 | continue; |
753 | | |
754 | 890k | if (!intra_mode_cfg->enable_paeth_intra && mode == UV_PAETH_PRED) continue; |
755 | | |
756 | 890k | assert(mbmi->mode < INTRA_MODES); |
757 | 890k | if (cpi->sf.intra_sf.prune_chroma_modes_using_luma_winner && |
758 | 890k | !(av1_derived_chroma_intra_mode_used_flag[mbmi->mode] & (1 << mode))) |
759 | 698k | continue; |
760 | | |
761 | 191k | mbmi->uv_mode = mode; |
762 | | |
763 | | // Init variables for cfl and angle delta |
764 | 191k | const SPEED_FEATURES *sf = &cpi->sf; |
765 | 191k | mbmi->angle_delta[PLANE_TYPE_UV] = 0; |
766 | 191k | if (mode == UV_CFL_PRED) { |
767 | 63.5k | if (!is_cfl_allowed(xd) || !intra_mode_cfg->enable_cfl_intra) continue; |
768 | 38.5k | assert(!is_directional_mode); |
769 | 38.5k | const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); |
770 | 38.5k | if (!cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd, |
771 | 38.5k | sf->intra_sf.cfl_search_range, &tokenonly_rd_stats, |
772 | 38.5k | &mbmi->cfl_alpha_idx, &mbmi->cfl_alpha_signs)) { |
773 | 38.4k | continue; |
774 | 38.4k | } |
775 | 128k | } else if (is_directional_mode && av1_use_angle_delta(mbmi->bsize) && |
776 | 128k | intra_mode_cfg->enable_angle_delta) { |
777 | 606 | if (sf->intra_sf.chroma_intra_pruning_with_hog && |
778 | 606 | !intra_search_state.dir_mode_skip_mask_ready) { |
779 | 606 | static const float thresh[2][4] = { |
780 | 606 | { -1.2f, 0.0f, 0.0f, 1.2f }, // Interframe |
781 | 606 | { -1.2f, -1.2f, -0.6f, 0.4f }, // Intraframe |
782 | 606 | }; |
783 | 606 | const int is_chroma = 1; |
784 | 606 | const int is_intra_frame = frame_is_intra_only(cm); |
785 | 606 | prune_intra_mode_with_hog( |
786 | 606 | x, bsize, cm->seq_params->sb_size, |
787 | 606 | thresh[is_intra_frame] |
788 | 606 | [sf->intra_sf.chroma_intra_pruning_with_hog - 1], |
789 | 606 | intra_search_state.directional_mode_skip_mask, is_chroma); |
790 | 606 | intra_search_state.dir_mode_skip_mask_ready = 1; |
791 | 606 | } |
792 | 606 | if (intra_search_state.directional_mode_skip_mask[mode]) { |
793 | 0 | continue; |
794 | 0 | } |
795 | | |
796 | | // Search through angle delta |
797 | 606 | const int rate_overhead = |
798 | 606 | mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode]; |
799 | 606 | if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd, |
800 | 606 | &this_rate, &tokenonly_rd_stats)) |
801 | 33 | continue; |
802 | 127k | } else { |
803 | | // Predict directly if we don't need to search for angle delta. |
804 | 127k | if (!av1_txfm_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) { |
805 | 8.03k | continue; |
806 | 8.03k | } |
807 | 127k | } |
808 | 120k | const int mode_cost = |
809 | 120k | mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode]; |
810 | 120k | this_rate = tokenonly_rd_stats.rate + |
811 | 120k | intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost); |
812 | 120k | this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); |
813 | | |
814 | 120k | if (this_rd < best_rd) { |
815 | 64.2k | best_mbmi = *mbmi; |
816 | 64.2k | best_rd = this_rd; |
817 | 64.2k | *rate = this_rate; |
818 | 64.2k | *rate_tokenonly = tokenonly_rd_stats.rate; |
819 | 64.2k | *distortion = tokenonly_rd_stats.dist; |
820 | 64.2k | *skippable = tokenonly_rd_stats.skip_txfm; |
821 | 64.2k | } |
822 | 120k | } |
823 | | |
824 | | // Search palette mode |
825 | 63.5k | const int try_palette = |
826 | 63.5k | cpi->oxcf.tool_cfg.enable_palette && |
827 | 63.5k | av1_allow_palette(cpi->common.features.allow_screen_content_tools, |
828 | 63.5k | mbmi->bsize); |
829 | 63.5k | if (try_palette) { |
830 | 0 | uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map; |
831 | 0 | av1_rd_pick_palette_intra_sbuv( |
832 | 0 | cpi, x, |
833 | 0 | mode_costs |
834 | 0 | ->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_DC_PRED], |
835 | 0 | best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly, |
836 | 0 | distortion, skippable); |
837 | 0 | } |
838 | | |
839 | 63.5k | *mbmi = best_mbmi; |
840 | | // Make sure we actually chose a mode |
841 | 63.5k | assert(best_rd < INT64_MAX); |
842 | 63.5k | return best_rd; |
843 | 68.3k | } |
844 | | |
845 | | // Searches palette mode for luma channel in inter frame. |
846 | | int av1_search_palette_mode(IntraModeSearchState *intra_search_state, |
847 | | const AV1_COMP *cpi, MACROBLOCK *x, |
848 | | BLOCK_SIZE bsize, unsigned int ref_frame_cost, |
849 | | PICK_MODE_CONTEXT *ctx, RD_STATS *this_rd_cost, |
850 | 0 | int64_t best_rd) { |
851 | 0 | const AV1_COMMON *const cm = &cpi->common; |
852 | 0 | MB_MODE_INFO *const mbmi = x->e_mbd.mi[0]; |
853 | 0 | PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; |
854 | 0 | const int num_planes = av1_num_planes(cm); |
855 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
856 | 0 | int rate2 = 0; |
857 | 0 | int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd; |
858 | 0 | int skippable = 0; |
859 | 0 | uint8_t *const best_palette_color_map = |
860 | 0 | x->palette_buffer->best_palette_color_map; |
861 | 0 | uint8_t *const color_map = xd->plane[0].color_index_map; |
862 | 0 | MB_MODE_INFO best_mbmi_palette = *mbmi; |
863 | 0 | uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; |
864 | 0 | uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; |
865 | 0 | const ModeCosts *mode_costs = &x->mode_costs; |
866 | 0 | const int *const intra_mode_cost = |
867 | 0 | mode_costs->mbmode_cost[size_group_lookup[bsize]]; |
868 | 0 | const int rows = block_size_high[bsize]; |
869 | 0 | const int cols = block_size_wide[bsize]; |
870 | |
|
871 | 0 | mbmi->mode = DC_PRED; |
872 | 0 | mbmi->uv_mode = UV_DC_PRED; |
873 | 0 | mbmi->ref_frame[0] = INTRA_FRAME; |
874 | 0 | mbmi->ref_frame[1] = NONE_FRAME; |
875 | 0 | av1_zero(pmi->palette_size); |
876 | |
|
877 | 0 | RD_STATS rd_stats_y; |
878 | 0 | av1_invalid_rd_stats(&rd_stats_y); |
879 | 0 | av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED], |
880 | 0 | &best_mbmi_palette, best_palette_color_map, |
881 | 0 | &best_rd_palette, &rd_stats_y.rate, NULL, |
882 | 0 | &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL, |
883 | 0 | ctx, best_blk_skip, best_tx_type_map); |
884 | 0 | if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) { |
885 | 0 | this_rd_cost->rdcost = INT64_MAX; |
886 | 0 | return skippable; |
887 | 0 | } |
888 | | |
889 | 0 | memcpy(x->txfm_search_info.blk_skip, best_blk_skip, |
890 | 0 | sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize)); |
891 | 0 | av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk); |
892 | 0 | memcpy(color_map, best_palette_color_map, |
893 | 0 | rows * cols * sizeof(best_palette_color_map[0])); |
894 | |
|
895 | 0 | skippable = rd_stats_y.skip_txfm; |
896 | 0 | distortion2 = rd_stats_y.dist; |
897 | 0 | rate2 = rd_stats_y.rate + ref_frame_cost; |
898 | 0 | if (num_planes > 1) { |
899 | 0 | if (intra_search_state->rate_uv_intra == INT_MAX) { |
900 | | // We have not found any good uv mode yet, so we need to search for it. |
901 | 0 | TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd); |
902 | 0 | av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra, |
903 | 0 | &intra_search_state->rate_uv_tokenonly, |
904 | 0 | &intra_search_state->dist_uvs, |
905 | 0 | &intra_search_state->skip_uvs, bsize, uv_tx); |
906 | 0 | intra_search_state->mode_uv = mbmi->uv_mode; |
907 | 0 | intra_search_state->pmi_uv = *pmi; |
908 | 0 | intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV]; |
909 | 0 | } |
910 | | |
911 | | // We have found at least one good uv mode before, so copy and paste it |
912 | | // over. |
913 | 0 | mbmi->uv_mode = intra_search_state->mode_uv; |
914 | 0 | pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1]; |
915 | 0 | if (pmi->palette_size[1] > 0) { |
916 | 0 | memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, |
917 | 0 | intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE, |
918 | 0 | 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0])); |
919 | 0 | } |
920 | 0 | mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta; |
921 | 0 | skippable = skippable && intra_search_state->skip_uvs; |
922 | 0 | distortion2 += intra_search_state->dist_uvs; |
923 | 0 | rate2 += intra_search_state->rate_uv_intra; |
924 | 0 | } |
925 | |
|
926 | 0 | if (skippable) { |
927 | 0 | rate2 -= rd_stats_y.rate; |
928 | 0 | if (num_planes > 1) rate2 -= intra_search_state->rate_uv_tokenonly; |
929 | 0 | rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][1]; |
930 | 0 | } else { |
931 | 0 | rate2 += mode_costs->skip_txfm_cost[av1_get_skip_txfm_context(xd)][0]; |
932 | 0 | } |
933 | 0 | this_rd = RDCOST(x->rdmult, rate2, distortion2); |
934 | 0 | this_rd_cost->rate = rate2; |
935 | 0 | this_rd_cost->dist = distortion2; |
936 | 0 | this_rd_cost->rdcost = this_rd; |
937 | 0 | return skippable; |
938 | 0 | } |
939 | | |
940 | | /*!\brief Get the intra prediction by searching through tx_type and tx_size. |
941 | | * |
942 | | * \ingroup intra_mode_search |
943 | | * \callergraph |
944 | | * Currently this function is only used in the intra frame code path for |
945 | | * winner-mode processing. |
946 | | * |
947 | | * \return Returns whether the current mode is an improvement over best_rd. |
948 | | */ |
949 | | static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, |
950 | | BLOCK_SIZE bsize, const int *bmode_costs, |
951 | | int64_t *best_rd, int *rate, |
952 | | int *rate_tokenonly, int64_t *distortion, |
953 | | int *skippable, MB_MODE_INFO *best_mbmi, |
954 | 136k | PICK_MODE_CONTEXT *ctx) { |
955 | 136k | MACROBLOCKD *const xd = &x->e_mbd; |
956 | 136k | MB_MODE_INFO *const mbmi = xd->mi[0]; |
957 | 136k | RD_STATS rd_stats; |
958 | | // In order to improve txfm search avoid rd based breakouts during winner |
959 | | // mode evaluation. Hence passing ref_best_rd as a maximum value |
960 | 136k | av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats, bsize, INT64_MAX); |
961 | 136k | if (rd_stats.rate == INT_MAX) return 0; |
962 | 136k | int this_rate_tokenonly = rd_stats.rate; |
963 | 136k | if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) { |
964 | | // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size |
965 | | // in the tokenonly rate, but for intra blocks, tx_size is always coded |
966 | | // (prediction granularity), so we account for it in the full rate, |
967 | | // not the tokenonly rate. |
968 | 87.7k | this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size); |
969 | 87.7k | } |
970 | 136k | const int this_rate = |
971 | 136k | rd_stats.rate + |
972 | 136k | intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]); |
973 | 136k | const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist); |
974 | 136k | if (this_rd < *best_rd) { |
975 | 28.7k | *best_mbmi = *mbmi; |
976 | 28.7k | *best_rd = this_rd; |
977 | 28.7k | *rate = this_rate; |
978 | 28.7k | *rate_tokenonly = this_rate_tokenonly; |
979 | 28.7k | *distortion = rd_stats.dist; |
980 | 28.7k | *skippable = rd_stats.skip_txfm; |
981 | 28.7k | av1_copy_array(ctx->blk_skip, x->txfm_search_info.blk_skip, |
982 | 28.7k | ctx->num_4x4_blk); |
983 | 28.7k | av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); |
984 | 28.7k | return 1; |
985 | 28.7k | } |
986 | 107k | return 0; |
987 | 136k | } |
988 | | |
989 | | /*!\brief Search for the best filter_intra mode when coding inter frame. |
990 | | * |
991 | | * \ingroup intra_mode_search |
992 | | * \callergraph |
993 | | * This function loops through all filter_intra modes to find the best one. |
994 | | * |
995 | | * \return Returns nothing, but updates the mbmi and rd_stats. |
996 | | */ |
997 | | static INLINE void handle_filter_intra_mode(const AV1_COMP *cpi, MACROBLOCK *x, |
998 | | BLOCK_SIZE bsize, |
999 | | const PICK_MODE_CONTEXT *ctx, |
1000 | | RD_STATS *rd_stats_y, int mode_cost, |
1001 | | int64_t best_rd, |
1002 | 0 | int64_t best_rd_so_far) { |
1003 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
1004 | 0 | MB_MODE_INFO *const mbmi = xd->mi[0]; |
1005 | 0 | assert(mbmi->mode == DC_PRED && |
1006 | 0 | av1_filter_intra_allowed_bsize(&cpi->common, bsize)); |
1007 | |
|
1008 | 0 | RD_STATS rd_stats_y_fi; |
1009 | 0 | int filter_intra_selected_flag = 0; |
1010 | 0 | TX_SIZE best_tx_size = mbmi->tx_size; |
1011 | 0 | FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED; |
1012 | 0 | uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; |
1013 | 0 | memcpy(best_blk_skip, x->txfm_search_info.blk_skip, |
1014 | 0 | sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); |
1015 | 0 | uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE]; |
1016 | 0 | av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); |
1017 | 0 | mbmi->filter_intra_mode_info.use_filter_intra = 1; |
1018 | 0 | for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; fi_mode < FILTER_INTRA_MODES; |
1019 | 0 | ++fi_mode) { |
1020 | 0 | mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode; |
1021 | 0 | av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y_fi, bsize, best_rd); |
1022 | 0 | if (rd_stats_y_fi.rate == INT_MAX) continue; |
1023 | 0 | const int this_rate_tmp = |
1024 | 0 | rd_stats_y_fi.rate + |
1025 | 0 | intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost); |
1026 | 0 | const int64_t this_rd_tmp = |
1027 | 0 | RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist); |
1028 | |
|
1029 | 0 | if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > best_rd) { |
1030 | 0 | break; |
1031 | 0 | } |
1032 | 0 | if (this_rd_tmp < best_rd_so_far) { |
1033 | 0 | best_tx_size = mbmi->tx_size; |
1034 | 0 | av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); |
1035 | 0 | memcpy(best_blk_skip, x->txfm_search_info.blk_skip, |
1036 | 0 | sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); |
1037 | 0 | best_fi_mode = fi_mode; |
1038 | 0 | *rd_stats_y = rd_stats_y_fi; |
1039 | 0 | filter_intra_selected_flag = 1; |
1040 | 0 | best_rd_so_far = this_rd_tmp; |
1041 | 0 | } |
1042 | 0 | } |
1043 | |
|
1044 | 0 | mbmi->tx_size = best_tx_size; |
1045 | 0 | av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk); |
1046 | 0 | memcpy(x->txfm_search_info.blk_skip, best_blk_skip, |
1047 | 0 | sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk); |
1048 | |
|
1049 | 0 | if (filter_intra_selected_flag) { |
1050 | 0 | mbmi->filter_intra_mode_info.use_filter_intra = 1; |
1051 | 0 | mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode; |
1052 | 0 | } else { |
1053 | 0 | mbmi->filter_intra_mode_info.use_filter_intra = 0; |
1054 | 0 | } |
1055 | 0 | } |
1056 | | |
1057 | | // Evaluate a given luma intra-mode in inter frames. |
1058 | | int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state, |
1059 | | const AV1_COMP *cpi, MACROBLOCK *x, |
1060 | | BLOCK_SIZE bsize, unsigned int ref_frame_cost, |
1061 | | const PICK_MODE_CONTEXT *ctx, RD_STATS *rd_stats_y, |
1062 | | int64_t best_rd, int *mode_cost_y, int64_t *rd_y, |
1063 | | int64_t *best_model_rd, |
1064 | 0 | int64_t top_intra_model_rd[]) { |
1065 | 0 | const AV1_COMMON *cm = &cpi->common; |
1066 | 0 | const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf; |
1067 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
1068 | 0 | MB_MODE_INFO *const mbmi = xd->mi[0]; |
1069 | 0 | assert(mbmi->ref_frame[0] == INTRA_FRAME); |
1070 | 0 | const PREDICTION_MODE mode = mbmi->mode; |
1071 | 0 | const ModeCosts *mode_costs = &x->mode_costs; |
1072 | 0 | const int mode_cost = |
1073 | 0 | mode_costs->mbmode_cost[size_group_lookup[bsize]][mode] + ref_frame_cost; |
1074 | 0 | const int skip_ctx = av1_get_skip_txfm_context(xd); |
1075 | |
|
1076 | 0 | int known_rate = mode_cost; |
1077 | 0 | const int intra_cost_penalty = av1_get_intra_cost_penalty( |
1078 | 0 | cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q, |
1079 | 0 | cm->seq_params->bit_depth); |
1080 | |
|
1081 | 0 | if (mode != DC_PRED && mode != PAETH_PRED) known_rate += intra_cost_penalty; |
1082 | 0 | known_rate += AOMMIN(mode_costs->skip_txfm_cost[skip_ctx][0], |
1083 | 0 | mode_costs->skip_txfm_cost[skip_ctx][1]); |
1084 | 0 | const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0); |
1085 | 0 | if (known_rd > best_rd) { |
1086 | 0 | intra_search_state->skip_intra_modes = 1; |
1087 | 0 | return 0; |
1088 | 0 | } |
1089 | | |
1090 | 0 | const int is_directional_mode = av1_is_directional_mode(mode); |
1091 | 0 | if (is_directional_mode && av1_use_angle_delta(bsize) && |
1092 | 0 | cpi->oxcf.intra_mode_cfg.enable_angle_delta) { |
1093 | 0 | if (intra_sf->intra_pruning_with_hog && |
1094 | 0 | !intra_search_state->dir_mode_skip_mask_ready) { |
1095 | 0 | const float thresh[4] = { -1.2f, 0.0f, 0.0f, 1.2f }; |
1096 | 0 | const int is_chroma = 0; |
1097 | 0 | prune_intra_mode_with_hog(x, bsize, cm->seq_params->sb_size, |
1098 | 0 | thresh[intra_sf->intra_pruning_with_hog - 1], |
1099 | 0 | intra_search_state->directional_mode_skip_mask, |
1100 | 0 | is_chroma); |
1101 | 0 | intra_search_state->dir_mode_skip_mask_ready = 1; |
1102 | 0 | } |
1103 | 0 | if (intra_search_state->directional_mode_skip_mask[mode]) return 0; |
1104 | 0 | } |
1105 | 0 | const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]); |
1106 | 0 | const int64_t this_model_rd = |
1107 | 0 | intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1); |
1108 | |
|
1109 | 0 | const int model_rd_index_for_pruning = |
1110 | 0 | get_model_rd_index_for_pruning(x, intra_sf); |
1111 | |
|
1112 | 0 | if (prune_intra_y_mode(this_model_rd, best_model_rd, top_intra_model_rd, |
1113 | 0 | intra_sf->top_intra_model_count_allowed, |
1114 | 0 | model_rd_index_for_pruning)) |
1115 | 0 | return 0; |
1116 | 0 | av1_init_rd_stats(rd_stats_y); |
1117 | 0 | av1_pick_uniform_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, best_rd); |
1118 | | |
1119 | | // Pick filter intra modes. |
1120 | 0 | if (mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) { |
1121 | 0 | int try_filter_intra = 1; |
1122 | 0 | int64_t best_rd_so_far = INT64_MAX; |
1123 | 0 | if (rd_stats_y->rate != INT_MAX) { |
1124 | | // best_rd_so_far is the rdcost of DC_PRED without using filter_intra. |
1125 | | // Later, in filter intra search, best_rd_so_far is used for comparison. |
1126 | 0 | mbmi->filter_intra_mode_info.use_filter_intra = 0; |
1127 | 0 | const int tmp_rate = |
1128 | 0 | rd_stats_y->rate + |
1129 | 0 | intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost); |
1130 | 0 | best_rd_so_far = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist); |
1131 | 0 | try_filter_intra = (best_rd_so_far / 2) <= best_rd; |
1132 | 0 | } else if (intra_sf->skip_filter_intra_in_inter_frames >= 1) { |
1133 | | // As rd cost of luma intra dc mode is more than best_rd (i.e., |
1134 | | // rd_stats_y->rate = INT_MAX), skip the evaluation of filter intra modes. |
1135 | 0 | try_filter_intra = 0; |
1136 | 0 | } |
1137 | |
|
1138 | 0 | if (try_filter_intra) { |
1139 | 0 | handle_filter_intra_mode(cpi, x, bsize, ctx, rd_stats_y, mode_cost, |
1140 | 0 | best_rd, best_rd_so_far); |
1141 | 0 | } |
1142 | 0 | } |
1143 | |
|
1144 | 0 | if (rd_stats_y->rate == INT_MAX) return 0; |
1145 | | |
1146 | 0 | *mode_cost_y = intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost); |
1147 | 0 | const int rate_y = rd_stats_y->skip_txfm |
1148 | 0 | ? mode_costs->skip_txfm_cost[skip_ctx][1] |
1149 | 0 | : rd_stats_y->rate; |
1150 | 0 | *rd_y = RDCOST(x->rdmult, rate_y + *mode_cost_y, rd_stats_y->dist); |
1151 | 0 | if (best_rd < (INT64_MAX / 2) && *rd_y > (best_rd + (best_rd >> 2))) { |
1152 | 0 | intra_search_state->skip_intra_modes = 1; |
1153 | 0 | return 0; |
1154 | 0 | } |
1155 | | |
1156 | 0 | return 1; |
1157 | 0 | } |
1158 | | |
1159 | | int av1_search_intra_uv_modes_in_interframe( |
1160 | | IntraModeSearchState *intra_search_state, const AV1_COMP *cpi, |
1161 | | MACROBLOCK *x, BLOCK_SIZE bsize, RD_STATS *rd_stats, |
1162 | 0 | const RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, int64_t best_rd) { |
1163 | 0 | const AV1_COMMON *cm = &cpi->common; |
1164 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
1165 | 0 | MB_MODE_INFO *const mbmi = xd->mi[0]; |
1166 | 0 | assert(mbmi->ref_frame[0] == INTRA_FRAME); |
1167 | | |
1168 | | // TODO(chiyotsai@google.com): Consolidate the chroma search code here with |
1169 | | // the one in av1_search_palette_mode. |
1170 | 0 | PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; |
1171 | 0 | const int try_palette = |
1172 | 0 | cpi->oxcf.tool_cfg.enable_palette && |
1173 | 0 | av1_allow_palette(cm->features.allow_screen_content_tools, mbmi->bsize); |
1174 | |
|
1175 | 0 | assert(intra_search_state->rate_uv_intra == INT_MAX); |
1176 | 0 | if (intra_search_state->rate_uv_intra == INT_MAX) { |
1177 | | // If no good uv-predictor had been found, search for it. |
1178 | 0 | const TX_SIZE uv_tx = av1_get_tx_size(AOM_PLANE_U, xd); |
1179 | 0 | av1_rd_pick_intra_sbuv_mode(cpi, x, &intra_search_state->rate_uv_intra, |
1180 | 0 | &intra_search_state->rate_uv_tokenonly, |
1181 | 0 | &intra_search_state->dist_uvs, |
1182 | 0 | &intra_search_state->skip_uvs, bsize, uv_tx); |
1183 | 0 | intra_search_state->mode_uv = mbmi->uv_mode; |
1184 | 0 | if (try_palette) intra_search_state->pmi_uv = *pmi; |
1185 | 0 | intra_search_state->uv_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV]; |
1186 | |
|
1187 | 0 | const int uv_rate = intra_search_state->rate_uv_tokenonly; |
1188 | 0 | const int64_t uv_dist = intra_search_state->dist_uvs; |
1189 | 0 | const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist); |
1190 | 0 | if (uv_rd > best_rd) { |
1191 | | // If there is no good intra uv-mode available, we can skip all intra |
1192 | | // modes. |
1193 | 0 | intra_search_state->skip_intra_modes = 1; |
1194 | 0 | return 0; |
1195 | 0 | } |
1196 | 0 | } |
1197 | | |
1198 | | // If we are here, then the encoder has found at least one good intra uv |
1199 | | // predictor, so we can directly copy its statistics over. |
1200 | | // TODO(any): the stats here is not right if the best uv mode is CFL but the |
1201 | | // best y mode is palette. |
1202 | 0 | rd_stats_uv->rate = intra_search_state->rate_uv_tokenonly; |
1203 | 0 | rd_stats_uv->dist = intra_search_state->dist_uvs; |
1204 | 0 | rd_stats_uv->skip_txfm = intra_search_state->skip_uvs; |
1205 | 0 | rd_stats->skip_txfm = rd_stats_y->skip_txfm && rd_stats_uv->skip_txfm; |
1206 | 0 | mbmi->uv_mode = intra_search_state->mode_uv; |
1207 | 0 | if (try_palette) { |
1208 | 0 | pmi->palette_size[1] = intra_search_state->pmi_uv.palette_size[1]; |
1209 | 0 | memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, |
1210 | 0 | intra_search_state->pmi_uv.palette_colors + PALETTE_MAX_SIZE, |
1211 | 0 | 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0])); |
1212 | 0 | } |
1213 | 0 | mbmi->angle_delta[PLANE_TYPE_UV] = intra_search_state->uv_angle_delta; |
1214 | |
|
1215 | 0 | return 1; |
1216 | 0 | } |
1217 | | |
1218 | | // Finds the best non-intrabc mode on an intra frame. |
1219 | | int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, |
1220 | | int *rate, int *rate_tokenonly, |
1221 | | int64_t *distortion, int *skippable, |
1222 | | BLOCK_SIZE bsize, int64_t best_rd, |
1223 | 112k | PICK_MODE_CONTEXT *ctx) { |
1224 | 112k | MACROBLOCKD *const xd = &x->e_mbd; |
1225 | 112k | MB_MODE_INFO *const mbmi = xd->mi[0]; |
1226 | 112k | assert(!is_inter_block(mbmi)); |
1227 | 112k | int64_t best_model_rd = INT64_MAX; |
1228 | 112k | int is_directional_mode; |
1229 | 112k | uint8_t directional_mode_skip_mask[INTRA_MODES] = { 0 }; |
1230 | | // Flag to check rd of any intra mode is better than best_rd passed to this |
1231 | | // function |
1232 | 112k | int beat_best_rd = 0; |
1233 | 112k | const int *bmode_costs; |
1234 | 112k | const IntraModeCfg *const intra_mode_cfg = &cpi->oxcf.intra_mode_cfg; |
1235 | 112k | PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; |
1236 | 112k | const int try_palette = |
1237 | 112k | cpi->oxcf.tool_cfg.enable_palette && |
1238 | 112k | av1_allow_palette(cpi->common.features.allow_screen_content_tools, |
1239 | 112k | mbmi->bsize); |
1240 | 112k | uint8_t *best_palette_color_map = |
1241 | 112k | try_palette ? x->palette_buffer->best_palette_color_map : NULL; |
1242 | 112k | const MB_MODE_INFO *above_mi = xd->above_mbmi; |
1243 | 112k | const MB_MODE_INFO *left_mi = xd->left_mbmi; |
1244 | 112k | const PREDICTION_MODE A = av1_above_block_mode(above_mi); |
1245 | 112k | const PREDICTION_MODE L = av1_left_block_mode(left_mi); |
1246 | 112k | const int above_ctx = intra_mode_context[A]; |
1247 | 112k | const int left_ctx = intra_mode_context[L]; |
1248 | 112k | bmode_costs = x->mode_costs.y_mode_costs[above_ctx][left_ctx]; |
1249 | | |
1250 | 112k | mbmi->angle_delta[PLANE_TYPE_Y] = 0; |
1251 | 112k | const INTRA_MODE_SPEED_FEATURES *const intra_sf = &cpi->sf.intra_sf; |
1252 | 112k | if (intra_sf->intra_pruning_with_hog) { |
1253 | | // Less aggressive thresholds are used here than those used in inter frame |
1254 | | // encoding in av1_handle_intra_y_mode() because we want key frames/intra |
1255 | | // frames to have higher quality. |
1256 | 112k | const float thresh[4] = { -1.2f, -1.2f, -0.6f, 0.4f }; |
1257 | 112k | const int is_chroma = 0; |
1258 | 112k | prune_intra_mode_with_hog(x, bsize, cpi->common.seq_params->sb_size, |
1259 | 112k | thresh[intra_sf->intra_pruning_with_hog - 1], |
1260 | 112k | directional_mode_skip_mask, is_chroma); |
1261 | 112k | } |
1262 | 112k | mbmi->filter_intra_mode_info.use_filter_intra = 0; |
1263 | 112k | pmi->palette_size[0] = 0; |
1264 | | |
1265 | | // Set params for mode evaluation |
1266 | 112k | set_mode_eval_params(cpi, x, MODE_EVAL); |
1267 | | |
1268 | 112k | MB_MODE_INFO best_mbmi = *mbmi; |
1269 | 112k | zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTRA, |
1270 | 112k | x->winner_mode_stats); |
1271 | 112k | x->winner_mode_count = 0; |
1272 | | |
1273 | | // Searches the intra-modes except for intrabc, palette, and filter_intra. |
1274 | 112k | int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT]; |
1275 | 560k | for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) { |
1276 | 448k | top_intra_model_rd[i] = INT64_MAX; |
1277 | 448k | } |
1278 | 6.95M | for (int mode_idx = INTRA_MODE_START; mode_idx < LUMA_MODE_COUNT; |
1279 | 6.83M | ++mode_idx) { |
1280 | 6.83M | set_y_mode_and_delta_angle(mode_idx, mbmi); |
1281 | 6.83M | RD_STATS this_rd_stats; |
1282 | 6.83M | int this_rate, this_rate_tokenonly, s; |
1283 | 6.83M | int is_diagonal_mode; |
1284 | 6.83M | int64_t this_distortion, this_rd; |
1285 | | |
1286 | 6.83M | is_diagonal_mode = av1_is_diagonal_mode(mbmi->mode); |
1287 | 6.83M | if (is_diagonal_mode && !intra_mode_cfg->enable_diagonal_intra) continue; |
1288 | 6.83M | if (av1_is_directional_mode(mbmi->mode) && |
1289 | 6.83M | !intra_mode_cfg->enable_directional_intra) |
1290 | 0 | continue; |
1291 | | |
1292 | | // The smooth prediction mode appears to be more frequently picked |
1293 | | // than horizontal / vertical smooth prediction modes. Hence treat |
1294 | | // them differently in speed features. |
1295 | 6.83M | if ((!intra_mode_cfg->enable_smooth_intra || |
1296 | 6.83M | intra_sf->disable_smooth_intra) && |
1297 | 6.83M | (mbmi->mode == SMOOTH_H_PRED || mbmi->mode == SMOOTH_V_PRED)) |
1298 | 224k | continue; |
1299 | 6.61M | if (!intra_mode_cfg->enable_smooth_intra && mbmi->mode == SMOOTH_PRED) |
1300 | 0 | continue; |
1301 | | |
1302 | | // The functionality of filter intra modes and smooth prediction |
1303 | | // overlap. Hence smooth prediction is pruned only if all the |
1304 | | // filter intra modes are enabled. |
1305 | 6.61M | if (intra_sf->disable_smooth_intra && |
1306 | 6.61M | intra_sf->prune_filter_intra_level == 0 && mbmi->mode == SMOOTH_PRED) |
1307 | 0 | continue; |
1308 | 6.61M | if (!intra_mode_cfg->enable_paeth_intra && mbmi->mode == PAETH_PRED) |
1309 | 0 | continue; |
1310 | | |
1311 | | // Skip the evaluation of modes that do not match with the winner mode in |
1312 | | // x->mb_mode_cache. |
1313 | 6.61M | if (x->use_mb_mode_cache && mbmi->mode != x->mb_mode_cache->mode) continue; |
1314 | | |
1315 | 6.61M | is_directional_mode = av1_is_directional_mode(mbmi->mode); |
1316 | 6.61M | if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue; |
1317 | 5.04M | if (is_directional_mode && |
1318 | 5.04M | !(av1_use_angle_delta(bsize) && intra_mode_cfg->enable_angle_delta) && |
1319 | 5.04M | mbmi->angle_delta[PLANE_TYPE_Y] != 0) |
1320 | 1.11M | continue; |
1321 | | |
1322 | | // Use intra_y_mode_mask speed feature to skip intra mode evaluation. |
1323 | 3.92M | if (!(intra_sf->intra_y_mode_mask[max_txsize_lookup[bsize]] & |
1324 | 3.92M | (1 << mbmi->mode))) |
1325 | 0 | continue; |
1326 | | |
1327 | 3.92M | const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]); |
1328 | 3.92M | const int64_t this_model_rd = |
1329 | 3.92M | intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1); |
1330 | | |
1331 | 3.92M | const int model_rd_index_for_pruning = |
1332 | 3.92M | get_model_rd_index_for_pruning(x, intra_sf); |
1333 | | |
1334 | 3.92M | if (prune_intra_y_mode(this_model_rd, &best_model_rd, top_intra_model_rd, |
1335 | 3.92M | intra_sf->top_intra_model_count_allowed, |
1336 | 3.92M | model_rd_index_for_pruning)) |
1337 | 234k | continue; |
1338 | | |
1339 | | // Builds the actual prediction. The prediction from |
1340 | | // model_intra_yrd_and_prune was just an estimation that did not take into |
1341 | | // account the effect of txfm pipeline, so we need to redo it for real |
1342 | | // here. |
1343 | 3.69M | av1_pick_uniform_tx_size_type_yrd(cpi, x, &this_rd_stats, bsize, best_rd); |
1344 | 3.69M | this_rate_tokenonly = this_rd_stats.rate; |
1345 | 3.69M | this_distortion = this_rd_stats.dist; |
1346 | 3.69M | s = this_rd_stats.skip_txfm; |
1347 | | |
1348 | 3.69M | if (this_rate_tokenonly == INT_MAX) continue; |
1349 | | |
1350 | 2.96M | if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->bsize)) { |
1351 | | // av1_pick_uniform_tx_size_type_yrd above includes the cost of the |
1352 | | // tx_size in the tokenonly rate, but for intra blocks, tx_size is always |
1353 | | // coded (prediction granularity), so we account for it in the full rate, |
1354 | | // not the tokenonly rate. |
1355 | 1.83M | this_rate_tokenonly -= tx_size_cost(x, bsize, mbmi->tx_size); |
1356 | 1.83M | } |
1357 | 2.96M | this_rate = |
1358 | 2.96M | this_rd_stats.rate + |
1359 | 2.96M | intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]); |
1360 | 2.96M | this_rd = RDCOST(x->rdmult, this_rate, this_distortion); |
1361 | | |
1362 | | // Visual quality adjustment based on recon vs source variance. |
1363 | 2.96M | if ((cpi->oxcf.mode == ALLINTRA) && (this_rd != INT64_MAX)) { |
1364 | 2.96M | this_rd = (int64_t)(this_rd * intra_rd_variance_factor(cpi, x, bsize)); |
1365 | 2.96M | } |
1366 | | |
1367 | | // Collect mode stats for multiwinner mode processing |
1368 | 2.96M | const int txfm_search_done = 1; |
1369 | 2.96M | store_winner_mode_stats( |
1370 | 2.96M | &cpi->common, x, mbmi, NULL, NULL, NULL, 0, NULL, bsize, this_rd, |
1371 | 2.96M | cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done); |
1372 | 2.96M | if (this_rd < best_rd) { |
1373 | 75.1k | best_mbmi = *mbmi; |
1374 | 75.1k | best_rd = this_rd; |
1375 | | // Setting beat_best_rd flag because current mode rd is better than |
1376 | | // best_rd passed to this function |
1377 | 75.1k | beat_best_rd = 1; |
1378 | 75.1k | *rate = this_rate; |
1379 | 75.1k | *rate_tokenonly = this_rate_tokenonly; |
1380 | 75.1k | *distortion = this_distortion; |
1381 | 75.1k | *skippable = s; |
1382 | 75.1k | memcpy(ctx->blk_skip, x->txfm_search_info.blk_skip, |
1383 | 75.1k | sizeof(x->txfm_search_info.blk_skip[0]) * ctx->num_4x4_blk); |
1384 | 75.1k | av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk); |
1385 | 75.1k | } |
1386 | 2.96M | } |
1387 | | |
1388 | | // Searches palette |
1389 | 112k | if (try_palette) { |
1390 | 0 | av1_rd_pick_palette_intra_sby( |
1391 | 0 | cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map, |
1392 | 0 | &best_rd, rate, rate_tokenonly, distortion, skippable, &beat_best_rd, |
1393 | 0 | ctx, ctx->blk_skip, ctx->tx_type_map); |
1394 | 0 | } |
1395 | | |
1396 | | // Searches filter_intra |
1397 | 112k | if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) { |
1398 | 57.2k | if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion, |
1399 | 57.2k | skippable, bsize, bmode_costs[DC_PRED], |
1400 | 57.2k | best_mbmi.mode, &best_rd, &best_model_rd, |
1401 | 57.2k | ctx)) { |
1402 | 1.97k | best_mbmi = *mbmi; |
1403 | 1.97k | } |
1404 | 57.2k | } |
1405 | | |
1406 | | // No mode is identified with less rd value than best_rd passed to this |
1407 | | // function. In such cases winner mode processing is not necessary and return |
1408 | | // best_rd as INT64_MAX to indicate best mode is not identified |
1409 | 112k | if (!beat_best_rd) return INT64_MAX; |
1410 | | |
1411 | | // In multi-winner mode processing, perform tx search for few best modes |
1412 | | // identified during mode evaluation. Winner mode processing uses best tx |
1413 | | // configuration for tx search. |
1414 | 68.3k | if (cpi->sf.winner_mode_sf.multi_winner_mode_type) { |
1415 | 68.3k | int best_mode_idx = 0; |
1416 | 68.3k | int block_width, block_height; |
1417 | 68.3k | uint8_t *color_map_dst = xd->plane[PLANE_TYPE_Y].color_index_map; |
1418 | 68.3k | av1_get_block_dimensions(bsize, AOM_PLANE_Y, xd, &block_width, |
1419 | 68.3k | &block_height, NULL, NULL); |
1420 | | |
1421 | 204k | for (int mode_idx = 0; mode_idx < x->winner_mode_count; mode_idx++) { |
1422 | 136k | *mbmi = x->winner_mode_stats[mode_idx].mbmi; |
1423 | 136k | if (is_winner_mode_processing_enabled(cpi, x, mbmi, mbmi->mode)) { |
1424 | | // Restore color_map of palette mode before winner mode processing |
1425 | 136k | if (mbmi->palette_mode_info.palette_size[0] > 0) { |
1426 | 0 | uint8_t *color_map_src = |
1427 | 0 | x->winner_mode_stats[mode_idx].color_index_map; |
1428 | 0 | memcpy(color_map_dst, color_map_src, |
1429 | 0 | block_width * block_height * sizeof(*color_map_src)); |
1430 | 0 | } |
1431 | | // Set params for winner mode evaluation |
1432 | 136k | set_mode_eval_params(cpi, x, WINNER_MODE_EVAL); |
1433 | | |
1434 | | // Winner mode processing |
1435 | | // If previous searches use only the default tx type/no R-D optimization |
1436 | | // of quantized coeffs, do an extra search for the best tx type/better |
1437 | | // R-D optimization of quantized coeffs |
1438 | 136k | if (intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, |
1439 | 136k | rate_tokenonly, distortion, skippable, &best_mbmi, |
1440 | 136k | ctx)) |
1441 | 28.7k | best_mode_idx = mode_idx; |
1442 | 136k | } |
1443 | 136k | } |
1444 | | // Copy color_map of palette mode for final winner mode |
1445 | 68.3k | if (best_mbmi.palette_mode_info.palette_size[0] > 0) { |
1446 | 0 | uint8_t *color_map_src = |
1447 | 0 | x->winner_mode_stats[best_mode_idx].color_index_map; |
1448 | 0 | memcpy(color_map_dst, color_map_src, |
1449 | 0 | block_width * block_height * sizeof(*color_map_src)); |
1450 | 0 | } |
1451 | 18.4E | } else { |
1452 | | // If previous searches use only the default tx type/no R-D optimization of |
1453 | | // quantized coeffs, do an extra search for the best tx type/better R-D |
1454 | | // optimization of quantized coeffs |
1455 | 18.4E | if (is_winner_mode_processing_enabled(cpi, x, mbmi, best_mbmi.mode)) { |
1456 | | // Set params for winner mode evaluation |
1457 | 0 | set_mode_eval_params(cpi, x, WINNER_MODE_EVAL); |
1458 | 0 | *mbmi = best_mbmi; |
1459 | 0 | intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, |
1460 | 0 | rate_tokenonly, distortion, skippable, &best_mbmi, ctx); |
1461 | 0 | } |
1462 | 18.4E | } |
1463 | 68.3k | *mbmi = best_mbmi; |
1464 | 68.3k | av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk); |
1465 | 68.3k | return best_rd; |
1466 | 112k | } |