/src/aom/av1/encoder/tpl_model.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2019, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <assert.h> |
13 | | #include <float.h> |
14 | | #include <stdint.h> |
15 | | |
16 | | #include "config/aom_config.h" |
17 | | |
18 | | #if CONFIG_THREE_PASS |
19 | | #include "av1/encoder/thirdpass.h" |
20 | | #endif |
21 | | #include "config/aom_dsp_rtcd.h" |
22 | | #include "config/aom_scale_rtcd.h" |
23 | | |
24 | | #include "aom/aom_codec.h" |
25 | | #include "aom_util/aom_pthread.h" |
26 | | |
27 | | #include "av1/common/av1_common_int.h" |
28 | | #include "av1/common/enums.h" |
29 | | #include "av1/common/idct.h" |
30 | | #include "av1/common/reconintra.h" |
31 | | |
32 | | #include "av1/encoder/encoder.h" |
33 | | #include "av1/encoder/ethread.h" |
34 | | #include "av1/encoder/encodeframe_utils.h" |
35 | | #include "av1/encoder/encode_strategy.h" |
36 | | #include "av1/encoder/hybrid_fwd_txfm.h" |
37 | | #include "av1/encoder/motion_search_facade.h" |
38 | | #include "av1/encoder/rd.h" |
39 | | #include "av1/encoder/rdopt.h" |
40 | | #include "av1/encoder/reconinter_enc.h" |
41 | | #include "av1/encoder/tpl_model.h" |
42 | | |
43 | 0 | static inline double exp_bounded(double v) { |
44 | | // When v > 700 or <-700, the exp function will be close to overflow |
45 | | // For details, see the "Notes" in the following link. |
46 | | // https://en.cppreference.com/w/c/numeric/math/exp |
47 | 0 | if (v > 700) { |
48 | 0 | return DBL_MAX; |
49 | 0 | } else if (v < -700) { |
50 | 0 | return 0; |
51 | 0 | } |
52 | 0 | return exp(v); |
53 | 0 | } |
54 | | |
55 | 0 | void av1_init_tpl_txfm_stats(TplTxfmStats *tpl_txfm_stats) { |
56 | 0 | tpl_txfm_stats->ready = 0; |
57 | 0 | tpl_txfm_stats->coeff_num = 256; |
58 | 0 | tpl_txfm_stats->txfm_block_count = 0; |
59 | 0 | memset(tpl_txfm_stats->abs_coeff_sum, 0, |
60 | 0 | sizeof(tpl_txfm_stats->abs_coeff_sum[0]) * tpl_txfm_stats->coeff_num); |
61 | 0 | memset(tpl_txfm_stats->abs_coeff_mean, 0, |
62 | 0 | sizeof(tpl_txfm_stats->abs_coeff_mean[0]) * tpl_txfm_stats->coeff_num); |
63 | 0 | } |
64 | | |
65 | | #if CONFIG_BITRATE_ACCURACY |
66 | | void av1_accumulate_tpl_txfm_stats(const TplTxfmStats *sub_stats, |
67 | | TplTxfmStats *accumulated_stats) { |
68 | | accumulated_stats->txfm_block_count += sub_stats->txfm_block_count; |
69 | | for (int i = 0; i < accumulated_stats->coeff_num; ++i) { |
70 | | accumulated_stats->abs_coeff_sum[i] += sub_stats->abs_coeff_sum[i]; |
71 | | } |
72 | | } |
73 | | |
74 | | void av1_record_tpl_txfm_block(TplTxfmStats *tpl_txfm_stats, |
75 | | const tran_low_t *coeff) { |
76 | | // For transform larger than 16x16, the scale of coeff need to be adjusted. |
77 | | // It's not LOSSLESS_Q_STEP. |
78 | | assert(tpl_txfm_stats->coeff_num <= 256); |
79 | | for (int i = 0; i < tpl_txfm_stats->coeff_num; ++i) { |
80 | | tpl_txfm_stats->abs_coeff_sum[i] += abs(coeff[i]) / (double)LOSSLESS_Q_STEP; |
81 | | } |
82 | | ++tpl_txfm_stats->txfm_block_count; |
83 | | } |
84 | | |
85 | | void av1_tpl_txfm_stats_update_abs_coeff_mean(TplTxfmStats *txfm_stats) { |
86 | | if (txfm_stats->txfm_block_count > 0) { |
87 | | for (int j = 0; j < txfm_stats->coeff_num; j++) { |
88 | | txfm_stats->abs_coeff_mean[j] = |
89 | | txfm_stats->abs_coeff_sum[j] / txfm_stats->txfm_block_count; |
90 | | } |
91 | | txfm_stats->ready = 1; |
92 | | } else { |
93 | | txfm_stats->ready = 0; |
94 | | } |
95 | | } |
96 | | |
97 | | static inline void av1_tpl_store_txfm_stats(TplParams *tpl_data, |
98 | | const TplTxfmStats *tpl_txfm_stats, |
99 | | const int frame_index) { |
100 | | tpl_data->txfm_stats_list[frame_index] = *tpl_txfm_stats; |
101 | | } |
102 | | #endif // CONFIG_BITRATE_ACCURACY |
103 | | |
104 | | static inline void get_quantize_error(const MACROBLOCK *x, int plane, |
105 | | const tran_low_t *coeff, |
106 | | tran_low_t *qcoeff, tran_low_t *dqcoeff, |
107 | | TX_SIZE tx_size, uint16_t *eob, |
108 | 0 | int64_t *recon_error, int64_t *sse) { |
109 | 0 | const struct macroblock_plane *const p = &x->plane[plane]; |
110 | 0 | const MACROBLOCKD *xd = &x->e_mbd; |
111 | 0 | const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; |
112 | 0 | int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; |
113 | 0 | const int shift = tx_size == TX_32X32 ? 0 : 2; |
114 | |
|
115 | 0 | QUANT_PARAM quant_param; |
116 | 0 | av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param); |
117 | |
|
118 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
119 | 0 | if (is_cur_buf_hbd(xd)) { |
120 | 0 | av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, eob, |
121 | 0 | scan_order, &quant_param); |
122 | 0 | *recon_error = |
123 | 0 | av1_highbd_block_error(coeff, dqcoeff, pix_num, sse, xd->bd) >> shift; |
124 | 0 | } else { |
125 | 0 | av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, eob, scan_order, |
126 | 0 | &quant_param); |
127 | 0 | *recon_error = av1_block_error(coeff, dqcoeff, pix_num, sse) >> shift; |
128 | 0 | } |
129 | | #else |
130 | | (void)xd; |
131 | | av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, eob, scan_order, |
132 | | &quant_param); |
133 | | *recon_error = av1_block_error(coeff, dqcoeff, pix_num, sse) >> shift; |
134 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
135 | |
|
136 | 0 | *recon_error = AOMMAX(*recon_error, 1); |
137 | |
|
138 | 0 | *sse = (*sse) >> shift; |
139 | 0 | *sse = AOMMAX(*sse, 1); |
140 | 0 | } |
141 | | |
142 | | static inline void set_tpl_stats_block_size(uint8_t *block_mis_log2, |
143 | 0 | uint8_t *tpl_bsize_1d) { |
144 | | // tpl stats bsize: 2 means 16x16 |
145 | 0 | *block_mis_log2 = 2; |
146 | | // Block size used in tpl motion estimation |
147 | 0 | *tpl_bsize_1d = 16; |
148 | | // MIN_TPL_BSIZE_1D = 16; |
149 | 0 | assert(*tpl_bsize_1d >= 16); |
150 | 0 | } |
151 | | |
152 | | void av1_setup_tpl_buffers(AV1_PRIMARY *const ppi, |
153 | | CommonModeInfoParams *const mi_params, int width, |
154 | 0 | int height, int byte_alignment, int lag_in_frames) { |
155 | 0 | SequenceHeader *const seq_params = &ppi->seq_params; |
156 | 0 | TplParams *const tpl_data = &ppi->tpl_data; |
157 | 0 | set_tpl_stats_block_size(&tpl_data->tpl_stats_block_mis_log2, |
158 | 0 | &tpl_data->tpl_bsize_1d); |
159 | 0 | const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; |
160 | 0 | tpl_data->border_in_pixels = |
161 | 0 | ALIGN_POWER_OF_TWO(tpl_data->tpl_bsize_1d + 2 * AOM_INTERP_EXTEND, 5); |
162 | |
|
163 | 0 | const int alloc_y_plane_only = |
164 | 0 | ppi->cpi->sf.tpl_sf.use_y_only_rate_distortion ? 1 : 0; |
165 | 0 | for (int frame = 0; frame < MAX_LENGTH_TPL_FRAME_STATS; ++frame) { |
166 | 0 | const int mi_cols = |
167 | 0 | ALIGN_POWER_OF_TWO(mi_params->mi_cols, MAX_MIB_SIZE_LOG2); |
168 | 0 | const int mi_rows = |
169 | 0 | ALIGN_POWER_OF_TWO(mi_params->mi_rows, MAX_MIB_SIZE_LOG2); |
170 | 0 | TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame]; |
171 | 0 | tpl_frame->is_valid = 0; |
172 | 0 | tpl_frame->width = mi_cols >> block_mis_log2; |
173 | 0 | tpl_frame->height = mi_rows >> block_mis_log2; |
174 | 0 | tpl_frame->stride = tpl_data->tpl_stats_buffer[frame].width; |
175 | 0 | tpl_frame->mi_rows = mi_params->mi_rows; |
176 | 0 | tpl_frame->mi_cols = mi_params->mi_cols; |
177 | 0 | } |
178 | 0 | tpl_data->tpl_frame = &tpl_data->tpl_stats_buffer[REF_FRAMES + 1]; |
179 | | |
180 | | // If lag_in_frames <= 1, TPL module is not invoked. Hence dynamic memory |
181 | | // allocations are avoided for buffers in tpl_data. |
182 | 0 | if (lag_in_frames <= 1) return; |
183 | | |
184 | 0 | AOM_CHECK_MEM_ERROR(&ppi->error, tpl_data->txfm_stats_list, |
185 | 0 | aom_calloc(MAX_LENGTH_TPL_FRAME_STATS, |
186 | 0 | sizeof(*tpl_data->txfm_stats_list))); |
187 | |
|
188 | 0 | for (int frame = 0; frame < lag_in_frames; ++frame) { |
189 | 0 | AOM_CHECK_MEM_ERROR( |
190 | 0 | &ppi->error, tpl_data->tpl_stats_pool[frame], |
191 | 0 | aom_calloc(tpl_data->tpl_stats_buffer[frame].width * |
192 | 0 | tpl_data->tpl_stats_buffer[frame].height, |
193 | 0 | sizeof(*tpl_data->tpl_stats_buffer[frame].tpl_stats_ptr))); |
194 | |
|
195 | 0 | if (aom_alloc_frame_buffer( |
196 | 0 | &tpl_data->tpl_rec_pool[frame], width, height, |
197 | 0 | seq_params->subsampling_x, seq_params->subsampling_y, |
198 | 0 | seq_params->use_highbitdepth, tpl_data->border_in_pixels, |
199 | 0 | byte_alignment, false, alloc_y_plane_only)) |
200 | 0 | aom_internal_error(&ppi->error, AOM_CODEC_MEM_ERROR, |
201 | 0 | "Failed to allocate frame buffer"); |
202 | 0 | } |
203 | 0 | } |
204 | | |
205 | | static inline int32_t tpl_get_satd_cost(BitDepthInfo bd_info, int16_t *src_diff, |
206 | | int diff_stride, const uint8_t *src, |
207 | | int src_stride, const uint8_t *dst, |
208 | | int dst_stride, tran_low_t *coeff, |
209 | 0 | int bw, int bh, TX_SIZE tx_size) { |
210 | 0 | const int pix_num = bw * bh; |
211 | |
|
212 | 0 | av1_subtract_block(bd_info, bh, bw, src_diff, diff_stride, src, src_stride, |
213 | 0 | dst, dst_stride); |
214 | 0 | av1_quick_txfm(/*use_hadamard=*/0, tx_size, bd_info, src_diff, bw, coeff); |
215 | 0 | return aom_satd(coeff, pix_num); |
216 | 0 | } |
217 | | |
218 | 0 | static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) { |
219 | 0 | const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; |
220 | |
|
221 | 0 | assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob); |
222 | 0 | int rate_cost = 1; |
223 | |
|
224 | 0 | for (int idx = 0; idx < eob; ++idx) { |
225 | 0 | unsigned int abs_level = abs(qcoeff[scan_order->scan[idx]]); |
226 | 0 | rate_cost += get_msb(abs_level + 1) + 1 + (abs_level > 0); |
227 | 0 | } |
228 | |
|
229 | 0 | return (rate_cost << AV1_PROB_COST_SHIFT); |
230 | 0 | } |
231 | | |
232 | | static inline void txfm_quant_rdcost( |
233 | | const MACROBLOCK *x, int16_t *src_diff, int diff_stride, uint8_t *src, |
234 | | int src_stride, uint8_t *dst, int dst_stride, tran_low_t *coeff, |
235 | | tran_low_t *qcoeff, tran_low_t *dqcoeff, int bw, int bh, TX_SIZE tx_size, |
236 | 0 | int do_recon, int *rate_cost, int64_t *recon_error, int64_t *sse) { |
237 | 0 | const MACROBLOCKD *xd = &x->e_mbd; |
238 | 0 | const BitDepthInfo bd_info = get_bit_depth_info(xd); |
239 | 0 | uint16_t eob; |
240 | 0 | av1_subtract_block(bd_info, bh, bw, src_diff, diff_stride, src, src_stride, |
241 | 0 | dst, dst_stride); |
242 | 0 | av1_quick_txfm(/*use_hadamard=*/0, tx_size, bd_info, src_diff, bw, coeff); |
243 | |
|
244 | 0 | get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, &eob, recon_error, |
245 | 0 | sse); |
246 | |
|
247 | 0 | *rate_cost = rate_estimator(qcoeff, eob, tx_size); |
248 | |
|
249 | 0 | if (do_recon) |
250 | 0 | av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst, |
251 | 0 | dst_stride, eob, 0); |
252 | 0 | } |
253 | | |
254 | | static uint32_t motion_estimation(AV1_COMP *cpi, MACROBLOCK *x, |
255 | | uint8_t *cur_frame_buf, |
256 | | uint8_t *ref_frame_buf, int stride, |
257 | | int ref_stride, int width, int ref_width, |
258 | | BLOCK_SIZE bsize, MV center_mv, |
259 | 0 | int_mv *best_mv) { |
260 | 0 | AV1_COMMON *cm = &cpi->common; |
261 | 0 | MACROBLOCKD *const xd = &x->e_mbd; |
262 | 0 | TPL_SPEED_FEATURES *tpl_sf = &cpi->sf.tpl_sf; |
263 | 0 | int step_param; |
264 | 0 | uint32_t bestsme = UINT_MAX; |
265 | 0 | FULLPEL_MV_STATS best_mv_stats; |
266 | 0 | int distortion; |
267 | 0 | uint32_t sse; |
268 | 0 | int cost_list[5]; |
269 | 0 | FULLPEL_MV start_mv = get_fullmv_from_mv(¢er_mv); |
270 | | |
271 | | // Setup frame pointers |
272 | 0 | x->plane[0].src.buf = cur_frame_buf; |
273 | 0 | x->plane[0].src.stride = stride; |
274 | 0 | x->plane[0].src.width = width; |
275 | 0 | xd->plane[0].pre[0].buf = ref_frame_buf; |
276 | 0 | xd->plane[0].pre[0].stride = ref_stride; |
277 | 0 | xd->plane[0].pre[0].width = ref_width; |
278 | |
|
279 | 0 | step_param = tpl_sf->reduce_first_step_size; |
280 | 0 | step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2); |
281 | |
|
282 | 0 | const search_site_config *search_site_cfg = |
283 | 0 | cpi->mv_search_params.search_site_cfg[SS_CFG_SRC]; |
284 | 0 | if (search_site_cfg->stride != ref_stride) |
285 | 0 | search_site_cfg = cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD]; |
286 | 0 | assert(search_site_cfg->stride == ref_stride); |
287 | |
|
288 | 0 | FULLPEL_MOTION_SEARCH_PARAMS full_ms_params; |
289 | 0 | av1_make_default_fullpel_ms_params(&full_ms_params, cpi, x, bsize, ¢er_mv, |
290 | 0 | start_mv, search_site_cfg, |
291 | 0 | tpl_sf->search_method, |
292 | 0 | /*fine_search_interval=*/0); |
293 | |
|
294 | 0 | bestsme = av1_full_pixel_search(start_mv, &full_ms_params, step_param, |
295 | 0 | cond_cost_list(cpi, cost_list), |
296 | 0 | &best_mv->as_fullmv, &best_mv_stats, NULL); |
297 | | |
298 | | // When sub-pel motion search is skipped, populate sub-pel precision MV and |
299 | | // return. |
300 | 0 | if (tpl_sf->subpel_force_stop == FULL_PEL) { |
301 | 0 | best_mv->as_mv = get_mv_from_fullmv(&best_mv->as_fullmv); |
302 | 0 | return bestsme; |
303 | 0 | } |
304 | | |
305 | 0 | SUBPEL_MOTION_SEARCH_PARAMS ms_params; |
306 | 0 | av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize, ¢er_mv, |
307 | 0 | cost_list); |
308 | 0 | ms_params.forced_stop = tpl_sf->subpel_force_stop; |
309 | 0 | ms_params.var_params.subpel_search_type = USE_2_TAPS; |
310 | 0 | ms_params.mv_cost_params.mv_cost_type = MV_COST_NONE; |
311 | 0 | best_mv_stats.err_cost = 0; |
312 | 0 | MV subpel_start_mv = get_mv_from_fullmv(&best_mv->as_fullmv); |
313 | 0 | assert(av1_is_subpelmv_in_range(&ms_params.mv_limits, subpel_start_mv)); |
314 | 0 | bestsme = cpi->mv_search_params.find_fractional_mv_step( |
315 | 0 | xd, cm, &ms_params, subpel_start_mv, &best_mv_stats, &best_mv->as_mv, |
316 | 0 | &distortion, &sse, NULL); |
317 | |
|
318 | 0 | return bestsme; |
319 | 0 | } |
320 | | |
321 | | typedef struct { |
322 | | int_mv mv; |
323 | | int sad; |
324 | | } center_mv_t; |
325 | | |
326 | 0 | static int compare_sad(const void *a, const void *b) { |
327 | 0 | const int diff = ((center_mv_t *)a)->sad - ((center_mv_t *)b)->sad; |
328 | 0 | if (diff < 0) |
329 | 0 | return -1; |
330 | 0 | else if (diff > 0) |
331 | 0 | return 1; |
332 | 0 | return 0; |
333 | 0 | } |
334 | | |
335 | | static int is_alike_mv(int_mv candidate_mv, center_mv_t *center_mvs, |
336 | 0 | int center_mvs_count, int skip_alike_starting_mv) { |
337 | | // MV difference threshold is in 1/8 precision. |
338 | 0 | const int mv_diff_thr[3] = { 1, (8 << 3), (16 << 3) }; |
339 | 0 | int thr = mv_diff_thr[skip_alike_starting_mv]; |
340 | 0 | int i; |
341 | |
|
342 | 0 | for (i = 0; i < center_mvs_count; i++) { |
343 | 0 | if (abs(center_mvs[i].mv.as_mv.col - candidate_mv.as_mv.col) < thr && |
344 | 0 | abs(center_mvs[i].mv.as_mv.row - candidate_mv.as_mv.row) < thr) |
345 | 0 | return 1; |
346 | 0 | } |
347 | | |
348 | 0 | return 0; |
349 | 0 | } |
350 | | |
351 | | static void get_rate_distortion( |
352 | | int *rate_cost, int64_t *recon_error, int64_t *pred_error, |
353 | | int16_t *src_diff, tran_low_t *coeff, tran_low_t *qcoeff, |
354 | | tran_low_t *dqcoeff, AV1_COMMON *cm, MACROBLOCK *x, |
355 | | const YV12_BUFFER_CONFIG *ref_frame_ptr[2], uint8_t *rec_buffer_pool[3], |
356 | | const int rec_stride_pool[3], TX_SIZE tx_size, PREDICTION_MODE best_mode, |
357 | | int mi_row, int mi_col, int use_y_only_rate_distortion, int do_recon, |
358 | 0 | TplTxfmStats *tpl_txfm_stats) { |
359 | 0 | const SequenceHeader *seq_params = cm->seq_params; |
360 | 0 | *rate_cost = 0; |
361 | 0 | *recon_error = 1; |
362 | 0 | *pred_error = 1; |
363 | |
|
364 | 0 | (void)tpl_txfm_stats; |
365 | |
|
366 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
367 | 0 | int is_compound = (best_mode == NEW_NEWMV); |
368 | 0 | int num_planes = use_y_only_rate_distortion ? 1 : MAX_MB_PLANE; |
369 | |
|
370 | 0 | uint8_t *src_buffer_pool[MAX_MB_PLANE] = { |
371 | 0 | xd->cur_buf->y_buffer, |
372 | 0 | xd->cur_buf->u_buffer, |
373 | 0 | xd->cur_buf->v_buffer, |
374 | 0 | }; |
375 | 0 | const int src_stride_pool[MAX_MB_PLANE] = { |
376 | 0 | xd->cur_buf->y_stride, |
377 | 0 | xd->cur_buf->uv_stride, |
378 | 0 | xd->cur_buf->uv_stride, |
379 | 0 | }; |
380 | |
|
381 | 0 | const int_interpfilters kernel = |
382 | 0 | av1_broadcast_interp_filter(EIGHTTAP_REGULAR); |
383 | |
|
384 | 0 | for (int plane = 0; plane < num_planes; ++plane) { |
385 | 0 | struct macroblockd_plane *pd = &xd->plane[plane]; |
386 | 0 | BLOCK_SIZE bsize_plane = |
387 | 0 | av1_ss_size_lookup[txsize_to_bsize[tx_size]][pd->subsampling_x] |
388 | 0 | [pd->subsampling_y]; |
389 | |
|
390 | 0 | int dst_buffer_stride = rec_stride_pool[plane]; |
391 | 0 | int dst_mb_offset = |
392 | 0 | ((mi_row * MI_SIZE * dst_buffer_stride) >> pd->subsampling_y) + |
393 | 0 | ((mi_col * MI_SIZE) >> pd->subsampling_x); |
394 | 0 | uint8_t *dst_buffer = rec_buffer_pool[plane] + dst_mb_offset; |
395 | 0 | for (int ref = 0; ref < 1 + is_compound; ++ref) { |
396 | 0 | if (!is_inter_mode(best_mode)) { |
397 | 0 | av1_predict_intra_block( |
398 | 0 | xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, |
399 | 0 | block_size_wide[bsize_plane], block_size_high[bsize_plane], |
400 | 0 | max_txsize_rect_lookup[bsize_plane], best_mode, 0, 0, |
401 | 0 | FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride, dst_buffer, |
402 | 0 | dst_buffer_stride, 0, 0, plane); |
403 | 0 | } else { |
404 | 0 | int_mv best_mv = xd->mi[0]->mv[ref]; |
405 | 0 | uint8_t *ref_buffer_pool[MAX_MB_PLANE] = { |
406 | 0 | ref_frame_ptr[ref]->y_buffer, |
407 | 0 | ref_frame_ptr[ref]->u_buffer, |
408 | 0 | ref_frame_ptr[ref]->v_buffer, |
409 | 0 | }; |
410 | 0 | InterPredParams inter_pred_params; |
411 | 0 | struct buf_2d ref_buf = { |
412 | 0 | NULL, ref_buffer_pool[plane], |
413 | 0 | plane ? ref_frame_ptr[ref]->uv_width : ref_frame_ptr[ref]->y_width, |
414 | 0 | plane ? ref_frame_ptr[ref]->uv_height : ref_frame_ptr[ref]->y_height, |
415 | 0 | plane ? ref_frame_ptr[ref]->uv_stride : ref_frame_ptr[ref]->y_stride |
416 | 0 | }; |
417 | 0 | av1_init_inter_params(&inter_pred_params, block_size_wide[bsize_plane], |
418 | 0 | block_size_high[bsize_plane], |
419 | 0 | (mi_row * MI_SIZE) >> pd->subsampling_y, |
420 | 0 | (mi_col * MI_SIZE) >> pd->subsampling_x, |
421 | 0 | pd->subsampling_x, pd->subsampling_y, xd->bd, |
422 | 0 | is_cur_buf_hbd(xd), 0, |
423 | 0 | xd->block_ref_scale_factors[0], &ref_buf, kernel); |
424 | 0 | if (is_compound) av1_init_comp_mode(&inter_pred_params); |
425 | 0 | inter_pred_params.conv_params = get_conv_params_no_round( |
426 | 0 | ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd); |
427 | |
|
428 | 0 | av1_enc_build_one_inter_predictor(dst_buffer, dst_buffer_stride, |
429 | 0 | &best_mv.as_mv, &inter_pred_params); |
430 | 0 | } |
431 | 0 | } |
432 | |
|
433 | 0 | int src_stride = src_stride_pool[plane]; |
434 | 0 | int src_mb_offset = ((mi_row * MI_SIZE * src_stride) >> pd->subsampling_y) + |
435 | 0 | ((mi_col * MI_SIZE) >> pd->subsampling_x); |
436 | |
|
437 | 0 | int this_rate = 1; |
438 | 0 | int64_t this_recon_error = 1; |
439 | 0 | int64_t sse; |
440 | 0 | txfm_quant_rdcost( |
441 | 0 | x, src_diff, block_size_wide[bsize_plane], |
442 | 0 | src_buffer_pool[plane] + src_mb_offset, src_stride, dst_buffer, |
443 | 0 | dst_buffer_stride, coeff, qcoeff, dqcoeff, block_size_wide[bsize_plane], |
444 | 0 | block_size_high[bsize_plane], max_txsize_rect_lookup[bsize_plane], |
445 | 0 | do_recon, &this_rate, &this_recon_error, &sse); |
446 | |
|
447 | | #if CONFIG_BITRATE_ACCURACY |
448 | | if (plane == 0 && tpl_txfm_stats) { |
449 | | // We only collect Y plane's transform coefficient |
450 | | av1_record_tpl_txfm_block(tpl_txfm_stats, coeff); |
451 | | } |
452 | | #endif // CONFIG_BITRATE_ACCURACY |
453 | |
|
454 | 0 | *recon_error += this_recon_error; |
455 | 0 | *pred_error += sse; |
456 | 0 | *rate_cost += this_rate; |
457 | 0 | } |
458 | 0 | } |
459 | | |
460 | | static inline int32_t get_inter_cost(const AV1_COMP *cpi, MACROBLOCKD *xd, |
461 | | const uint8_t *src_mb_buffer, |
462 | | int src_stride, |
463 | | TplBuffers *tpl_tmp_buffers, |
464 | | BLOCK_SIZE bsize, TX_SIZE tx_size, |
465 | | int mi_row, int mi_col, int rf_idx, |
466 | 0 | MV *rfidx_mv, int use_pred_sad) { |
467 | 0 | const BitDepthInfo bd_info = get_bit_depth_info(xd); |
468 | 0 | TplParams *tpl_data = &cpi->ppi->tpl_data; |
469 | 0 | const YV12_BUFFER_CONFIG *const ref_frame_ptr = |
470 | 0 | tpl_data->src_ref_frame[rf_idx]; |
471 | 0 | int16_t *src_diff = tpl_tmp_buffers->src_diff; |
472 | 0 | tran_low_t *coeff = tpl_tmp_buffers->coeff; |
473 | 0 | const int bw = 4 << mi_size_wide_log2[bsize]; |
474 | 0 | const int bh = 4 << mi_size_high_log2[bsize]; |
475 | 0 | int32_t inter_cost; |
476 | |
|
477 | 0 | if (cpi->sf.tpl_sf.subpel_force_stop != FULL_PEL) { |
478 | 0 | const int_interpfilters kernel = |
479 | 0 | av1_broadcast_interp_filter(EIGHTTAP_REGULAR); |
480 | 0 | uint8_t *predictor8 = tpl_tmp_buffers->predictor8; |
481 | 0 | uint8_t *predictor = |
482 | 0 | is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8; |
483 | 0 | struct buf_2d ref_buf = { NULL, ref_frame_ptr->y_buffer, |
484 | 0 | ref_frame_ptr->y_width, ref_frame_ptr->y_height, |
485 | 0 | ref_frame_ptr->y_stride }; |
486 | 0 | InterPredParams inter_pred_params; |
487 | 0 | av1_init_inter_params(&inter_pred_params, bw, bh, mi_row * MI_SIZE, |
488 | 0 | mi_col * MI_SIZE, 0, 0, xd->bd, is_cur_buf_hbd(xd), 0, |
489 | 0 | &tpl_data->sf, &ref_buf, kernel); |
490 | 0 | inter_pred_params.conv_params = get_conv_params(0, 0, xd->bd); |
491 | |
|
492 | 0 | av1_enc_build_one_inter_predictor(predictor, bw, rfidx_mv, |
493 | 0 | &inter_pred_params); |
494 | |
|
495 | 0 | if (use_pred_sad) { |
496 | 0 | inter_cost = (int)cpi->ppi->fn_ptr[bsize].sdf(src_mb_buffer, src_stride, |
497 | 0 | predictor, bw); |
498 | 0 | } else { |
499 | 0 | inter_cost = |
500 | 0 | tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, |
501 | 0 | predictor, bw, coeff, bw, bh, tx_size); |
502 | 0 | } |
503 | 0 | } else { |
504 | 0 | int ref_mb_offset = |
505 | 0 | mi_row * MI_SIZE * ref_frame_ptr->y_stride + mi_col * MI_SIZE; |
506 | 0 | uint8_t *ref_mb = ref_frame_ptr->y_buffer + ref_mb_offset; |
507 | 0 | int ref_stride = ref_frame_ptr->y_stride; |
508 | 0 | const FULLPEL_MV fullmv = get_fullmv_from_mv(rfidx_mv); |
509 | | // Since sub-pel motion search is not performed, use the prediction pixels |
510 | | // directly from the reference block ref_mb |
511 | 0 | if (use_pred_sad) { |
512 | 0 | inter_cost = (int)cpi->ppi->fn_ptr[bsize].sdf( |
513 | 0 | src_mb_buffer, src_stride, |
514 | 0 | &ref_mb[fullmv.row * ref_stride + fullmv.col], ref_stride); |
515 | 0 | } else { |
516 | 0 | inter_cost = |
517 | 0 | tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, |
518 | 0 | &ref_mb[fullmv.row * ref_stride + fullmv.col], |
519 | 0 | ref_stride, coeff, bw, bh, tx_size); |
520 | 0 | } |
521 | 0 | } |
522 | 0 | return inter_cost; |
523 | 0 | } |
524 | | |
525 | | static inline void mode_estimation(AV1_COMP *cpi, TplTxfmStats *tpl_txfm_stats, |
526 | | TplBuffers *tpl_tmp_buffers, MACROBLOCK *x, |
527 | | int mi_row, int mi_col, BLOCK_SIZE bsize, |
528 | 0 | TX_SIZE tx_size, TplDepStats *tpl_stats) { |
529 | 0 | AV1_COMMON *cm = &cpi->common; |
530 | 0 | const GF_GROUP *gf_group = &cpi->ppi->gf_group; |
531 | 0 | TPL_SPEED_FEATURES *tpl_sf = &cpi->sf.tpl_sf; |
532 | |
|
533 | 0 | (void)gf_group; |
534 | |
|
535 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
536 | 0 | const BitDepthInfo bd_info = get_bit_depth_info(xd); |
537 | 0 | TplParams *tpl_data = &cpi->ppi->tpl_data; |
538 | 0 | TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_data->frame_idx]; |
539 | 0 | const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; |
540 | |
|
541 | 0 | const int bw = 4 << mi_size_wide_log2[bsize]; |
542 | 0 | const int bh = 4 << mi_size_high_log2[bsize]; |
543 | |
|
544 | 0 | int32_t best_intra_cost = INT32_MAX; |
545 | 0 | int32_t intra_cost; |
546 | 0 | PREDICTION_MODE best_mode = DC_PRED; |
547 | |
|
548 | 0 | const int mb_y_offset = |
549 | 0 | mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; |
550 | 0 | uint8_t *src_mb_buffer = xd->cur_buf->y_buffer + mb_y_offset; |
551 | 0 | const int src_stride = xd->cur_buf->y_stride; |
552 | 0 | const int src_width = xd->cur_buf->y_width; |
553 | |
|
554 | 0 | int dst_mb_offset = |
555 | 0 | mi_row * MI_SIZE * tpl_frame->rec_picture->y_stride + mi_col * MI_SIZE; |
556 | 0 | uint8_t *dst_buffer = tpl_frame->rec_picture->y_buffer + dst_mb_offset; |
557 | 0 | int dst_buffer_stride = tpl_frame->rec_picture->y_stride; |
558 | 0 | int use_y_only_rate_distortion = tpl_sf->use_y_only_rate_distortion; |
559 | |
|
560 | 0 | uint8_t *rec_buffer_pool[3] = { |
561 | 0 | tpl_frame->rec_picture->y_buffer, |
562 | 0 | tpl_frame->rec_picture->u_buffer, |
563 | 0 | tpl_frame->rec_picture->v_buffer, |
564 | 0 | }; |
565 | |
|
566 | 0 | const int rec_stride_pool[3] = { |
567 | 0 | tpl_frame->rec_picture->y_stride, |
568 | 0 | tpl_frame->rec_picture->uv_stride, |
569 | 0 | tpl_frame->rec_picture->uv_stride, |
570 | 0 | }; |
571 | |
|
572 | 0 | for (int plane = 1; plane < MAX_MB_PLANE; ++plane) { |
573 | 0 | struct macroblockd_plane *pd = &xd->plane[plane]; |
574 | 0 | pd->subsampling_x = xd->cur_buf->subsampling_x; |
575 | 0 | pd->subsampling_y = xd->cur_buf->subsampling_y; |
576 | 0 | } |
577 | |
|
578 | 0 | uint8_t *predictor8 = tpl_tmp_buffers->predictor8; |
579 | 0 | int16_t *src_diff = tpl_tmp_buffers->src_diff; |
580 | 0 | tran_low_t *coeff = tpl_tmp_buffers->coeff; |
581 | 0 | tran_low_t *qcoeff = tpl_tmp_buffers->qcoeff; |
582 | 0 | tran_low_t *dqcoeff = tpl_tmp_buffers->dqcoeff; |
583 | 0 | uint8_t *predictor = |
584 | 0 | is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8; |
585 | 0 | int64_t recon_error = 1; |
586 | 0 | int64_t pred_error = 1; |
587 | |
|
588 | 0 | memset(tpl_stats, 0, sizeof(*tpl_stats)); |
589 | 0 | tpl_stats->ref_frame_index[0] = -1; |
590 | 0 | tpl_stats->ref_frame_index[1] = -1; |
591 | |
|
592 | 0 | const int mi_width = mi_size_wide[bsize]; |
593 | 0 | const int mi_height = mi_size_high[bsize]; |
594 | 0 | set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd, |
595 | 0 | mi_row, mi_col); |
596 | 0 | set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width, |
597 | 0 | cm->mi_params.mi_rows, cm->mi_params.mi_cols); |
598 | 0 | set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], |
599 | 0 | av1_num_planes(cm)); |
600 | 0 | xd->mi[0]->bsize = bsize; |
601 | 0 | xd->mi[0]->motion_mode = SIMPLE_TRANSLATION; |
602 | | |
603 | | // Intra prediction search |
604 | 0 | xd->mi[0]->ref_frame[0] = INTRA_FRAME; |
605 | | |
606 | | // Pre-load the bottom left line. |
607 | 0 | if (xd->left_available && |
608 | 0 | mi_row + tx_size_high_unit[tx_size] < xd->tile.mi_row_end) { |
609 | 0 | if (is_cur_buf_hbd(xd)) { |
610 | 0 | uint16_t *dst = CONVERT_TO_SHORTPTR(dst_buffer); |
611 | 0 | for (int i = 0; i < bw; ++i) |
612 | 0 | dst[(bw + i) * dst_buffer_stride - 1] = |
613 | 0 | dst[(bw - 1) * dst_buffer_stride - 1]; |
614 | 0 | } else { |
615 | 0 | for (int i = 0; i < bw; ++i) |
616 | 0 | dst_buffer[(bw + i) * dst_buffer_stride - 1] = |
617 | 0 | dst_buffer[(bw - 1) * dst_buffer_stride - 1]; |
618 | 0 | } |
619 | 0 | } |
620 | | |
621 | | // if cpi->sf.tpl_sf.prune_intra_modes is on, then search only DC_PRED, |
622 | | // H_PRED, and V_PRED |
623 | 0 | const PREDICTION_MODE last_intra_mode = |
624 | 0 | tpl_sf->prune_intra_modes ? D45_PRED : INTRA_MODE_END; |
625 | 0 | const SequenceHeader *seq_params = cm->seq_params; |
626 | 0 | for (PREDICTION_MODE mode = INTRA_MODE_START; mode < last_intra_mode; |
627 | 0 | ++mode) { |
628 | 0 | av1_predict_intra_block(xd, seq_params->sb_size, |
629 | 0 | seq_params->enable_intra_edge_filter, |
630 | 0 | block_size_wide[bsize], block_size_high[bsize], |
631 | 0 | tx_size, mode, 0, 0, FILTER_INTRA_MODES, dst_buffer, |
632 | 0 | dst_buffer_stride, predictor, bw, 0, 0, 0); |
633 | |
|
634 | 0 | if (tpl_frame->use_pred_sad) { |
635 | 0 | intra_cost = (int32_t)cpi->ppi->fn_ptr[bsize].sdf( |
636 | 0 | src_mb_buffer, src_stride, predictor, bw); |
637 | 0 | } else { |
638 | 0 | intra_cost = |
639 | 0 | tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, |
640 | 0 | predictor, bw, coeff, bw, bh, tx_size); |
641 | 0 | } |
642 | |
|
643 | 0 | if (intra_cost < best_intra_cost) { |
644 | 0 | best_intra_cost = intra_cost; |
645 | 0 | best_mode = mode; |
646 | 0 | } |
647 | 0 | } |
648 | | // Calculate SATD of the best intra mode if SAD was used for mode decision |
649 | | // as best_intra_cost is used in ML model to skip intra mode evaluation. |
650 | 0 | if (tpl_frame->use_pred_sad) { |
651 | 0 | av1_predict_intra_block( |
652 | 0 | xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, |
653 | 0 | block_size_wide[bsize], block_size_high[bsize], tx_size, best_mode, 0, |
654 | 0 | 0, FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride, predictor, bw, 0, |
655 | 0 | 0, 0); |
656 | 0 | best_intra_cost = |
657 | 0 | tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, |
658 | 0 | predictor, bw, coeff, bw, bh, tx_size); |
659 | 0 | } |
660 | |
|
661 | 0 | int rate_cost = 1; |
662 | |
|
663 | 0 | if (cpi->use_ducky_encode) { |
664 | 0 | get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, |
665 | 0 | qcoeff, dqcoeff, cm, x, NULL, rec_buffer_pool, |
666 | 0 | rec_stride_pool, tx_size, best_mode, mi_row, mi_col, |
667 | 0 | use_y_only_rate_distortion, 1 /*do_recon*/, NULL); |
668 | |
|
669 | 0 | tpl_stats->intra_dist = recon_error << TPL_DEP_COST_SCALE_LOG2; |
670 | 0 | tpl_stats->intra_sse = pred_error << TPL_DEP_COST_SCALE_LOG2; |
671 | 0 | tpl_stats->intra_rate = rate_cost; |
672 | 0 | } |
673 | |
|
674 | | #if CONFIG_THREE_PASS |
675 | | const int frame_offset = tpl_data->frame_idx - cpi->gf_frame_index; |
676 | | |
677 | | if (cpi->third_pass_ctx && |
678 | | frame_offset < cpi->third_pass_ctx->frame_info_count && |
679 | | tpl_data->frame_idx < gf_group->size) { |
680 | | double ratio_h, ratio_w; |
681 | | av1_get_third_pass_ratio(cpi->third_pass_ctx, frame_offset, cm->height, |
682 | | cm->width, &ratio_h, &ratio_w); |
683 | | THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( |
684 | | cpi->third_pass_ctx, frame_offset, mi_row, mi_col, ratio_h, ratio_w); |
685 | | |
686 | | PREDICTION_MODE third_pass_mode = this_mi->pred_mode; |
687 | | |
688 | | if (third_pass_mode >= last_intra_mode && |
689 | | third_pass_mode < INTRA_MODE_END) { |
690 | | av1_predict_intra_block( |
691 | | xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, |
692 | | block_size_wide[bsize], block_size_high[bsize], tx_size, |
693 | | third_pass_mode, 0, 0, FILTER_INTRA_MODES, dst_buffer, |
694 | | dst_buffer_stride, predictor, bw, 0, 0, 0); |
695 | | |
696 | | intra_cost = |
697 | | tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, |
698 | | predictor, bw, coeff, bw, bh, tx_size); |
699 | | |
700 | | if (intra_cost < best_intra_cost) { |
701 | | best_intra_cost = intra_cost; |
702 | | best_mode = third_pass_mode; |
703 | | } |
704 | | } |
705 | | } |
706 | | #endif // CONFIG_THREE_PASS |
707 | | |
708 | | // Motion compensated prediction |
709 | 0 | xd->mi[0]->ref_frame[0] = INTRA_FRAME; |
710 | 0 | xd->mi[0]->ref_frame[1] = NONE_FRAME; |
711 | 0 | xd->mi[0]->compound_idx = 1; |
712 | |
|
713 | 0 | int best_rf_idx = -1; |
714 | 0 | int_mv best_mv[2]; |
715 | 0 | int32_t inter_cost; |
716 | 0 | int32_t best_inter_cost = INT32_MAX; |
717 | 0 | int rf_idx; |
718 | 0 | int_mv single_mv[INTER_REFS_PER_FRAME]; |
719 | |
|
720 | 0 | best_mv[0].as_int = INVALID_MV; |
721 | 0 | best_mv[1].as_int = INVALID_MV; |
722 | |
|
723 | 0 | for (rf_idx = 0; rf_idx < INTER_REFS_PER_FRAME; ++rf_idx) { |
724 | 0 | single_mv[rf_idx].as_int = INVALID_MV; |
725 | 0 | if (tpl_data->ref_frame[rf_idx] == NULL || |
726 | 0 | tpl_data->src_ref_frame[rf_idx] == NULL) { |
727 | 0 | tpl_stats->mv[rf_idx].as_int = INVALID_MV; |
728 | 0 | continue; |
729 | 0 | } |
730 | | |
731 | 0 | const YV12_BUFFER_CONFIG *ref_frame_ptr = tpl_data->src_ref_frame[rf_idx]; |
732 | 0 | const int ref_mb_offset = |
733 | 0 | mi_row * MI_SIZE * ref_frame_ptr->y_stride + mi_col * MI_SIZE; |
734 | 0 | uint8_t *ref_mb = ref_frame_ptr->y_buffer + ref_mb_offset; |
735 | 0 | const int ref_stride = ref_frame_ptr->y_stride; |
736 | 0 | const int ref_width = ref_frame_ptr->y_width; |
737 | |
|
738 | 0 | int_mv best_rfidx_mv = { 0 }; |
739 | 0 | uint32_t bestsme = UINT32_MAX; |
740 | |
|
741 | 0 | center_mv_t center_mvs[4] = { { { 0 }, INT_MAX }, |
742 | 0 | { { 0 }, INT_MAX }, |
743 | 0 | { { 0 }, INT_MAX }, |
744 | 0 | { { 0 }, INT_MAX } }; |
745 | 0 | int refmv_count = 1; |
746 | 0 | int idx; |
747 | |
|
748 | 0 | if (xd->up_available) { |
749 | 0 | TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( |
750 | 0 | mi_row - mi_height, mi_col, tpl_frame->stride, block_mis_log2)]; |
751 | 0 | if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count, |
752 | 0 | tpl_sf->skip_alike_starting_mv)) { |
753 | 0 | center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int; |
754 | 0 | ++refmv_count; |
755 | 0 | } |
756 | 0 | } |
757 | |
|
758 | 0 | if (xd->left_available) { |
759 | 0 | TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( |
760 | 0 | mi_row, mi_col - mi_width, tpl_frame->stride, block_mis_log2)]; |
761 | 0 | if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count, |
762 | 0 | tpl_sf->skip_alike_starting_mv)) { |
763 | 0 | center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int; |
764 | 0 | ++refmv_count; |
765 | 0 | } |
766 | 0 | } |
767 | |
|
768 | 0 | if (xd->up_available && mi_col + mi_width < xd->tile.mi_col_end) { |
769 | 0 | TplDepStats *ref_tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( |
770 | 0 | mi_row - mi_height, mi_col + mi_width, tpl_frame->stride, |
771 | 0 | block_mis_log2)]; |
772 | 0 | if (!is_alike_mv(ref_tpl_stats->mv[rf_idx], center_mvs, refmv_count, |
773 | 0 | tpl_sf->skip_alike_starting_mv)) { |
774 | 0 | center_mvs[refmv_count].mv.as_int = ref_tpl_stats->mv[rf_idx].as_int; |
775 | 0 | ++refmv_count; |
776 | 0 | } |
777 | 0 | } |
778 | |
|
779 | | #if CONFIG_THREE_PASS |
780 | | if (cpi->third_pass_ctx && |
781 | | frame_offset < cpi->third_pass_ctx->frame_info_count && |
782 | | tpl_data->frame_idx < gf_group->size) { |
783 | | double ratio_h, ratio_w; |
784 | | av1_get_third_pass_ratio(cpi->third_pass_ctx, frame_offset, cm->height, |
785 | | cm->width, &ratio_h, &ratio_w); |
786 | | THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( |
787 | | cpi->third_pass_ctx, frame_offset, mi_row, mi_col, ratio_h, ratio_w); |
788 | | |
789 | | int_mv tp_mv = av1_get_third_pass_adjusted_mv(this_mi, ratio_h, ratio_w, |
790 | | rf_idx + LAST_FRAME); |
791 | | if (tp_mv.as_int != INVALID_MV && |
792 | | !is_alike_mv(tp_mv, center_mvs + 1, refmv_count - 1, |
793 | | tpl_sf->skip_alike_starting_mv)) { |
794 | | center_mvs[0].mv = tp_mv; |
795 | | } |
796 | | } |
797 | | #endif // CONFIG_THREE_PASS |
798 | | |
799 | | // Prune starting mvs |
800 | 0 | if (tpl_sf->prune_starting_mv && refmv_count > 1) { |
801 | | // Get each center mv's sad. |
802 | 0 | for (idx = 0; idx < refmv_count; ++idx) { |
803 | 0 | FULLPEL_MV mv = get_fullmv_from_mv(¢er_mvs[idx].mv.as_mv); |
804 | 0 | clamp_fullmv(&mv, &x->mv_limits); |
805 | 0 | center_mvs[idx].sad = (int)cpi->ppi->fn_ptr[bsize].sdf( |
806 | 0 | src_mb_buffer, src_stride, &ref_mb[mv.row * ref_stride + mv.col], |
807 | 0 | ref_stride); |
808 | 0 | } |
809 | | |
810 | | // Rank center_mv using sad. |
811 | 0 | qsort(center_mvs, refmv_count, sizeof(center_mvs[0]), compare_sad); |
812 | |
|
813 | 0 | refmv_count = AOMMIN(4 - tpl_sf->prune_starting_mv, refmv_count); |
814 | | // Further reduce number of refmv based on sad difference. |
815 | 0 | if (refmv_count > 1) { |
816 | 0 | int last_sad = center_mvs[refmv_count - 1].sad; |
817 | 0 | int second_to_last_sad = center_mvs[refmv_count - 2].sad; |
818 | 0 | if ((last_sad - second_to_last_sad) * 5 > second_to_last_sad) |
819 | 0 | refmv_count--; |
820 | 0 | } |
821 | 0 | } |
822 | |
|
823 | 0 | for (idx = 0; idx < refmv_count; ++idx) { |
824 | 0 | int_mv this_mv; |
825 | 0 | uint32_t thissme = motion_estimation( |
826 | 0 | cpi, x, src_mb_buffer, ref_mb, src_stride, ref_stride, src_width, |
827 | 0 | ref_width, bsize, center_mvs[idx].mv.as_mv, &this_mv); |
828 | |
|
829 | 0 | if (thissme < bestsme) { |
830 | 0 | bestsme = thissme; |
831 | 0 | best_rfidx_mv = this_mv; |
832 | 0 | } |
833 | 0 | } |
834 | |
|
835 | 0 | tpl_stats->mv[rf_idx].as_int = best_rfidx_mv.as_int; |
836 | 0 | single_mv[rf_idx] = best_rfidx_mv; |
837 | |
|
838 | 0 | inter_cost = get_inter_cost( |
839 | 0 | cpi, xd, src_mb_buffer, src_stride, tpl_tmp_buffers, bsize, tx_size, |
840 | 0 | mi_row, mi_col, rf_idx, &best_rfidx_mv.as_mv, tpl_frame->use_pred_sad); |
841 | | // Store inter cost for each ref frame. This is used to prune inter modes. |
842 | 0 | tpl_stats->pred_error[rf_idx] = AOMMAX(1, inter_cost); |
843 | |
|
844 | 0 | if (inter_cost < best_inter_cost) { |
845 | 0 | best_rf_idx = rf_idx; |
846 | |
|
847 | 0 | best_inter_cost = inter_cost; |
848 | 0 | best_mv[0].as_int = best_rfidx_mv.as_int; |
849 | 0 | } |
850 | 0 | } |
851 | | // Calculate SATD of the best inter mode if SAD was used for mode decision |
852 | | // as best_inter_cost is used in ML model to skip intra mode evaluation. |
853 | 0 | if (best_inter_cost < INT32_MAX && tpl_frame->use_pred_sad) { |
854 | 0 | assert(best_rf_idx != -1); |
855 | 0 | best_inter_cost = get_inter_cost( |
856 | 0 | cpi, xd, src_mb_buffer, src_stride, tpl_tmp_buffers, bsize, tx_size, |
857 | 0 | mi_row, mi_col, best_rf_idx, &best_mv[0].as_mv, 0 /* use_pred_sad */); |
858 | 0 | } |
859 | |
|
860 | 0 | if (best_rf_idx != -1 && best_inter_cost < best_intra_cost) { |
861 | 0 | best_mode = NEWMV; |
862 | 0 | xd->mi[0]->ref_frame[0] = best_rf_idx + LAST_FRAME; |
863 | 0 | xd->mi[0]->mv[0].as_int = best_mv[0].as_int; |
864 | 0 | } |
865 | | |
866 | | // Start compound predition search. |
867 | 0 | int comp_ref_frames[3][2] = { |
868 | 0 | { 0, 4 }, |
869 | 0 | { 0, 6 }, |
870 | 0 | { 3, 6 }, |
871 | 0 | }; |
872 | |
|
873 | 0 | int start_rf = 0; |
874 | 0 | int end_rf = 3; |
875 | 0 | if (!tpl_sf->allow_compound_pred) end_rf = 0; |
876 | | #if CONFIG_THREE_PASS |
877 | | if (cpi->third_pass_ctx && |
878 | | frame_offset < cpi->third_pass_ctx->frame_info_count && |
879 | | tpl_data->frame_idx < gf_group->size) { |
880 | | double ratio_h, ratio_w; |
881 | | av1_get_third_pass_ratio(cpi->third_pass_ctx, frame_offset, cm->height, |
882 | | cm->width, &ratio_h, &ratio_w); |
883 | | THIRD_PASS_MI_INFO *this_mi = av1_get_third_pass_mi( |
884 | | cpi->third_pass_ctx, frame_offset, mi_row, mi_col, ratio_h, ratio_w); |
885 | | |
886 | | if (this_mi->ref_frame[0] >= LAST_FRAME && |
887 | | this_mi->ref_frame[1] >= LAST_FRAME) { |
888 | | int found = 0; |
889 | | for (int i = 0; i < 3; i++) { |
890 | | if (comp_ref_frames[i][0] + LAST_FRAME == this_mi->ref_frame[0] && |
891 | | comp_ref_frames[i][1] + LAST_FRAME == this_mi->ref_frame[1]) { |
892 | | found = 1; |
893 | | break; |
894 | | } |
895 | | } |
896 | | if (!found || !tpl_sf->allow_compound_pred) { |
897 | | comp_ref_frames[2][0] = this_mi->ref_frame[0] - LAST_FRAME; |
898 | | comp_ref_frames[2][1] = this_mi->ref_frame[1] - LAST_FRAME; |
899 | | if (!tpl_sf->allow_compound_pred) { |
900 | | start_rf = 2; |
901 | | end_rf = 3; |
902 | | } |
903 | | } |
904 | | } |
905 | | } |
906 | | #endif // CONFIG_THREE_PASS |
907 | |
|
908 | 0 | xd->mi_row = mi_row; |
909 | 0 | xd->mi_col = mi_col; |
910 | 0 | int best_cmp_rf_idx = -1; |
911 | 0 | const int_interpfilters kernel = |
912 | 0 | av1_broadcast_interp_filter(EIGHTTAP_REGULAR); |
913 | 0 | for (int cmp_rf_idx = start_rf; cmp_rf_idx < end_rf; ++cmp_rf_idx) { |
914 | 0 | int rf_idx0 = comp_ref_frames[cmp_rf_idx][0]; |
915 | 0 | int rf_idx1 = comp_ref_frames[cmp_rf_idx][1]; |
916 | |
|
917 | 0 | if (tpl_data->ref_frame[rf_idx0] == NULL || |
918 | 0 | tpl_data->src_ref_frame[rf_idx0] == NULL || |
919 | 0 | tpl_data->ref_frame[rf_idx1] == NULL || |
920 | 0 | tpl_data->src_ref_frame[rf_idx1] == NULL) { |
921 | 0 | continue; |
922 | 0 | } |
923 | | |
924 | 0 | const YV12_BUFFER_CONFIG *ref_frame_ptr[2] = { |
925 | 0 | tpl_data->src_ref_frame[rf_idx0], |
926 | 0 | tpl_data->src_ref_frame[rf_idx1], |
927 | 0 | }; |
928 | |
|
929 | 0 | xd->mi[0]->ref_frame[0] = rf_idx0 + LAST_FRAME; |
930 | 0 | xd->mi[0]->ref_frame[1] = rf_idx1 + LAST_FRAME; |
931 | 0 | xd->mi[0]->mode = NEW_NEWMV; |
932 | 0 | const int8_t ref_frame_type = av1_ref_frame_type(xd->mi[0]->ref_frame); |
933 | | // Set up ref_mv for av1_joint_motion_search(). |
934 | 0 | CANDIDATE_MV *this_ref_mv_stack = x->mbmi_ext.ref_mv_stack[ref_frame_type]; |
935 | 0 | this_ref_mv_stack[xd->mi[0]->ref_mv_idx].this_mv = single_mv[rf_idx0]; |
936 | 0 | this_ref_mv_stack[xd->mi[0]->ref_mv_idx].comp_mv = single_mv[rf_idx1]; |
937 | |
|
938 | 0 | struct buf_2d yv12_mb[2][MAX_MB_PLANE]; |
939 | 0 | for (int i = 0; i < 2; ++i) { |
940 | 0 | av1_setup_pred_block(xd, yv12_mb[i], ref_frame_ptr[i], |
941 | 0 | xd->block_ref_scale_factors[i], |
942 | 0 | xd->block_ref_scale_factors[i], MAX_MB_PLANE); |
943 | 0 | for (int plane = 0; plane < MAX_MB_PLANE; ++plane) { |
944 | 0 | xd->plane[plane].pre[i] = yv12_mb[i][plane]; |
945 | 0 | } |
946 | 0 | } |
947 | |
|
948 | 0 | int_mv tmp_mv[2] = { single_mv[rf_idx0], single_mv[rf_idx1] }; |
949 | 0 | int rate_mv; |
950 | 0 | av1_joint_motion_search(cpi, x, bsize, tmp_mv, NULL, 0, &rate_mv, |
951 | 0 | !cpi->sf.mv_sf.disable_second_mv, |
952 | 0 | NUM_JOINT_ME_REFINE_ITER); |
953 | |
|
954 | 0 | for (int ref = 0; ref < 2; ++ref) { |
955 | 0 | struct buf_2d ref_buf = { NULL, ref_frame_ptr[ref]->y_buffer, |
956 | 0 | ref_frame_ptr[ref]->y_width, |
957 | 0 | ref_frame_ptr[ref]->y_height, |
958 | 0 | ref_frame_ptr[ref]->y_stride }; |
959 | 0 | InterPredParams inter_pred_params; |
960 | 0 | av1_init_inter_params(&inter_pred_params, bw, bh, mi_row * MI_SIZE, |
961 | 0 | mi_col * MI_SIZE, 0, 0, xd->bd, is_cur_buf_hbd(xd), |
962 | 0 | 0, &tpl_data->sf, &ref_buf, kernel); |
963 | 0 | av1_init_comp_mode(&inter_pred_params); |
964 | |
|
965 | 0 | inter_pred_params.conv_params = get_conv_params_no_round( |
966 | 0 | ref, 0, xd->tmp_conv_dst, MAX_SB_SIZE, 1, xd->bd); |
967 | |
|
968 | 0 | av1_enc_build_one_inter_predictor(predictor, bw, &tmp_mv[ref].as_mv, |
969 | 0 | &inter_pred_params); |
970 | 0 | } |
971 | 0 | inter_cost = |
972 | 0 | tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride, |
973 | 0 | predictor, bw, coeff, bw, bh, tx_size); |
974 | 0 | if (inter_cost < best_inter_cost) { |
975 | 0 | best_cmp_rf_idx = cmp_rf_idx; |
976 | 0 | best_inter_cost = inter_cost; |
977 | 0 | best_mv[0] = tmp_mv[0]; |
978 | 0 | best_mv[1] = tmp_mv[1]; |
979 | 0 | } |
980 | 0 | } |
981 | |
|
982 | 0 | if (best_cmp_rf_idx != -1 && best_inter_cost < best_intra_cost) { |
983 | 0 | best_mode = NEW_NEWMV; |
984 | 0 | const int best_rf_idx0 = comp_ref_frames[best_cmp_rf_idx][0]; |
985 | 0 | const int best_rf_idx1 = comp_ref_frames[best_cmp_rf_idx][1]; |
986 | 0 | xd->mi[0]->ref_frame[0] = best_rf_idx0 + LAST_FRAME; |
987 | 0 | xd->mi[0]->ref_frame[1] = best_rf_idx1 + LAST_FRAME; |
988 | 0 | } |
989 | |
|
990 | 0 | if (best_inter_cost < INT32_MAX && is_inter_mode(best_mode)) { |
991 | 0 | xd->mi[0]->mv[0].as_int = best_mv[0].as_int; |
992 | 0 | xd->mi[0]->mv[1].as_int = best_mv[1].as_int; |
993 | 0 | const YV12_BUFFER_CONFIG *ref_frame_ptr[2] = { |
994 | 0 | best_cmp_rf_idx >= 0 |
995 | 0 | ? tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][0]] |
996 | 0 | : tpl_data->src_ref_frame[best_rf_idx], |
997 | 0 | best_cmp_rf_idx >= 0 |
998 | 0 | ? tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][1]] |
999 | 0 | : NULL, |
1000 | 0 | }; |
1001 | 0 | rate_cost = 1; |
1002 | 0 | get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, |
1003 | 0 | qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool, |
1004 | 0 | rec_stride_pool, tx_size, best_mode, mi_row, mi_col, |
1005 | 0 | use_y_only_rate_distortion, 0 /*do_recon*/, NULL); |
1006 | 0 | tpl_stats->srcrf_rate = rate_cost; |
1007 | 0 | } |
1008 | |
|
1009 | 0 | best_intra_cost = AOMMAX(best_intra_cost, 1); |
1010 | 0 | best_inter_cost = AOMMIN(best_intra_cost, best_inter_cost); |
1011 | 0 | tpl_stats->inter_cost = best_inter_cost; |
1012 | 0 | tpl_stats->intra_cost = best_intra_cost; |
1013 | |
|
1014 | 0 | tpl_stats->srcrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2; |
1015 | 0 | tpl_stats->srcrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2; |
1016 | | |
1017 | | // Final encode |
1018 | 0 | rate_cost = 0; |
1019 | 0 | const YV12_BUFFER_CONFIG *ref_frame_ptr[2]; |
1020 | |
|
1021 | 0 | ref_frame_ptr[0] = |
1022 | 0 | best_mode == NEW_NEWMV |
1023 | 0 | ? tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][0]] |
1024 | 0 | : best_rf_idx >= 0 ? tpl_data->ref_frame[best_rf_idx] |
1025 | 0 | : NULL; |
1026 | 0 | ref_frame_ptr[1] = |
1027 | 0 | best_mode == NEW_NEWMV |
1028 | 0 | ? tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][1]] |
1029 | 0 | : NULL; |
1030 | 0 | get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, |
1031 | 0 | qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool, |
1032 | 0 | rec_stride_pool, tx_size, best_mode, mi_row, mi_col, |
1033 | 0 | use_y_only_rate_distortion, 1 /*do_recon*/, |
1034 | 0 | tpl_txfm_stats); |
1035 | |
|
1036 | 0 | tpl_stats->recrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2; |
1037 | 0 | tpl_stats->recrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2; |
1038 | 0 | tpl_stats->recrf_rate = rate_cost; |
1039 | |
|
1040 | 0 | if (!is_inter_mode(best_mode)) { |
1041 | 0 | tpl_stats->srcrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2; |
1042 | 0 | tpl_stats->srcrf_rate = rate_cost; |
1043 | 0 | tpl_stats->srcrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2; |
1044 | 0 | } |
1045 | |
|
1046 | 0 | tpl_stats->recrf_dist = AOMMAX(tpl_stats->srcrf_dist, tpl_stats->recrf_dist); |
1047 | 0 | tpl_stats->recrf_rate = AOMMAX(tpl_stats->srcrf_rate, tpl_stats->recrf_rate); |
1048 | |
|
1049 | 0 | if (best_mode == NEW_NEWMV) { |
1050 | 0 | ref_frame_ptr[0] = tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][0]]; |
1051 | 0 | ref_frame_ptr[1] = |
1052 | 0 | tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][1]]; |
1053 | 0 | get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, |
1054 | 0 | qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool, |
1055 | 0 | rec_stride_pool, tx_size, best_mode, mi_row, mi_col, |
1056 | 0 | use_y_only_rate_distortion, 1 /*do_recon*/, NULL); |
1057 | 0 | tpl_stats->cmp_recrf_dist[0] = recon_error << TPL_DEP_COST_SCALE_LOG2; |
1058 | 0 | tpl_stats->cmp_recrf_rate[0] = rate_cost; |
1059 | |
|
1060 | 0 | tpl_stats->cmp_recrf_dist[0] = |
1061 | 0 | AOMMAX(tpl_stats->srcrf_dist, tpl_stats->cmp_recrf_dist[0]); |
1062 | 0 | tpl_stats->cmp_recrf_rate[0] = |
1063 | 0 | AOMMAX(tpl_stats->srcrf_rate, tpl_stats->cmp_recrf_rate[0]); |
1064 | |
|
1065 | 0 | tpl_stats->cmp_recrf_dist[0] = |
1066 | 0 | AOMMIN(tpl_stats->recrf_dist, tpl_stats->cmp_recrf_dist[0]); |
1067 | 0 | tpl_stats->cmp_recrf_rate[0] = |
1068 | 0 | AOMMIN(tpl_stats->recrf_rate, tpl_stats->cmp_recrf_rate[0]); |
1069 | |
|
1070 | 0 | rate_cost = 0; |
1071 | 0 | ref_frame_ptr[0] = |
1072 | 0 | tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][0]]; |
1073 | 0 | ref_frame_ptr[1] = tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][1]]; |
1074 | 0 | get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff, |
1075 | 0 | qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool, |
1076 | 0 | rec_stride_pool, tx_size, best_mode, mi_row, mi_col, |
1077 | 0 | use_y_only_rate_distortion, 1 /*do_recon*/, NULL); |
1078 | 0 | tpl_stats->cmp_recrf_dist[1] = recon_error << TPL_DEP_COST_SCALE_LOG2; |
1079 | 0 | tpl_stats->cmp_recrf_rate[1] = rate_cost; |
1080 | |
|
1081 | 0 | tpl_stats->cmp_recrf_dist[1] = |
1082 | 0 | AOMMAX(tpl_stats->srcrf_dist, tpl_stats->cmp_recrf_dist[1]); |
1083 | 0 | tpl_stats->cmp_recrf_rate[1] = |
1084 | 0 | AOMMAX(tpl_stats->srcrf_rate, tpl_stats->cmp_recrf_rate[1]); |
1085 | |
|
1086 | 0 | tpl_stats->cmp_recrf_dist[1] = |
1087 | 0 | AOMMIN(tpl_stats->recrf_dist, tpl_stats->cmp_recrf_dist[1]); |
1088 | 0 | tpl_stats->cmp_recrf_rate[1] = |
1089 | 0 | AOMMIN(tpl_stats->recrf_rate, tpl_stats->cmp_recrf_rate[1]); |
1090 | 0 | } |
1091 | |
|
1092 | 0 | if (best_mode == NEWMV) { |
1093 | 0 | tpl_stats->mv[best_rf_idx] = best_mv[0]; |
1094 | 0 | tpl_stats->ref_frame_index[0] = best_rf_idx; |
1095 | 0 | tpl_stats->ref_frame_index[1] = NONE_FRAME; |
1096 | 0 | } else if (best_mode == NEW_NEWMV) { |
1097 | 0 | tpl_stats->ref_frame_index[0] = comp_ref_frames[best_cmp_rf_idx][0]; |
1098 | 0 | tpl_stats->ref_frame_index[1] = comp_ref_frames[best_cmp_rf_idx][1]; |
1099 | 0 | tpl_stats->mv[tpl_stats->ref_frame_index[0]] = best_mv[0]; |
1100 | 0 | tpl_stats->mv[tpl_stats->ref_frame_index[1]] = best_mv[1]; |
1101 | 0 | } |
1102 | |
|
1103 | 0 | for (int idy = 0; idy < mi_height; ++idy) { |
1104 | 0 | for (int idx = 0; idx < mi_width; ++idx) { |
1105 | 0 | if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > idx && |
1106 | 0 | (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > idy) { |
1107 | 0 | xd->mi[idx + idy * cm->mi_params.mi_stride] = xd->mi[0]; |
1108 | 0 | } |
1109 | 0 | } |
1110 | 0 | } |
1111 | 0 | } |
1112 | | |
1113 | 0 | static int round_floor(int ref_pos, int bsize_pix) { |
1114 | 0 | int round; |
1115 | 0 | if (ref_pos < 0) |
1116 | 0 | round = -(1 + (-ref_pos - 1) / bsize_pix); |
1117 | 0 | else |
1118 | 0 | round = ref_pos / bsize_pix; |
1119 | |
|
1120 | 0 | return round; |
1121 | 0 | } |
1122 | | |
1123 | | int av1_get_overlap_area(int row_a, int col_a, int row_b, int col_b, int width, |
1124 | 0 | int height) { |
1125 | 0 | int min_row = AOMMAX(row_a, row_b); |
1126 | 0 | int max_row = AOMMIN(row_a + height, row_b + height); |
1127 | 0 | int min_col = AOMMAX(col_a, col_b); |
1128 | 0 | int max_col = AOMMIN(col_a + width, col_b + width); |
1129 | 0 | if (min_row < max_row && min_col < max_col) { |
1130 | 0 | return (max_row - min_row) * (max_col - min_col); |
1131 | 0 | } |
1132 | 0 | return 0; |
1133 | 0 | } |
1134 | | |
1135 | 0 | int av1_tpl_ptr_pos(int mi_row, int mi_col, int stride, uint8_t right_shift) { |
1136 | 0 | return (mi_row >> right_shift) * stride + (mi_col >> right_shift); |
1137 | 0 | } |
1138 | | |
1139 | | int64_t av1_delta_rate_cost(int64_t delta_rate, int64_t recrf_dist, |
1140 | 0 | int64_t srcrf_dist, int pix_num) { |
1141 | 0 | double beta = (double)srcrf_dist / recrf_dist; |
1142 | 0 | int64_t rate_cost = delta_rate; |
1143 | |
|
1144 | 0 | if (srcrf_dist <= 128) return rate_cost; |
1145 | | |
1146 | 0 | double dr = |
1147 | 0 | (double)(delta_rate >> (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT)) / |
1148 | 0 | pix_num; |
1149 | |
|
1150 | 0 | double log_den = log(beta) / log(2.0) + 2.0 * dr; |
1151 | |
|
1152 | 0 | if (log_den > log(10.0) / log(2.0)) { |
1153 | 0 | rate_cost = (int64_t)((log(1.0 / beta) * pix_num) / log(2.0) / 2.0); |
1154 | 0 | rate_cost <<= (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT); |
1155 | 0 | return rate_cost; |
1156 | 0 | } |
1157 | | |
1158 | 0 | double num = pow(2.0, log_den); |
1159 | 0 | double den = num * beta + (1 - beta) * beta; |
1160 | |
|
1161 | 0 | rate_cost = (int64_t)((pix_num * log(num / den)) / log(2.0) / 2.0); |
1162 | |
|
1163 | 0 | rate_cost <<= (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT); |
1164 | |
|
1165 | 0 | return rate_cost; |
1166 | 0 | } |
1167 | | |
1168 | | static inline void tpl_model_update_b(TplParams *const tpl_data, int mi_row, |
1169 | | int mi_col, const BLOCK_SIZE bsize, |
1170 | 0 | int frame_idx, int ref) { |
1171 | 0 | TplDepFrame *tpl_frame_ptr = &tpl_data->tpl_frame[frame_idx]; |
1172 | 0 | TplDepStats *tpl_ptr = tpl_frame_ptr->tpl_stats_ptr; |
1173 | 0 | TplDepFrame *tpl_frame = tpl_data->tpl_frame; |
1174 | 0 | const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2; |
1175 | 0 | TplDepStats *tpl_stats_ptr = &tpl_ptr[av1_tpl_ptr_pos( |
1176 | 0 | mi_row, mi_col, tpl_frame->stride, block_mis_log2)]; |
1177 | |
|
1178 | 0 | int is_compound = tpl_stats_ptr->ref_frame_index[1] >= 0; |
1179 | |
|
1180 | 0 | if (tpl_stats_ptr->ref_frame_index[ref] < 0) return; |
1181 | 0 | const int ref_frame_index = tpl_stats_ptr->ref_frame_index[ref]; |
1182 | 0 | TplDepFrame *ref_tpl_frame = |
1183 | 0 | &tpl_frame[tpl_frame[frame_idx].ref_map_index[ref_frame_index]]; |
1184 | 0 | TplDepStats *ref_stats_ptr = ref_tpl_frame->tpl_stats_ptr; |
1185 | |
|
1186 | 0 | if (tpl_frame[frame_idx].ref_map_index[ref_frame_index] < 0) return; |
1187 | | |
1188 | 0 | const FULLPEL_MV full_mv = |
1189 | 0 | get_fullmv_from_mv(&tpl_stats_ptr->mv[ref_frame_index].as_mv); |
1190 | 0 | const int ref_pos_row = mi_row * MI_SIZE + full_mv.row; |
1191 | 0 | const int ref_pos_col = mi_col * MI_SIZE + full_mv.col; |
1192 | |
|
1193 | 0 | const int bw = 4 << mi_size_wide_log2[bsize]; |
1194 | 0 | const int bh = 4 << mi_size_high_log2[bsize]; |
1195 | 0 | const int mi_height = mi_size_high[bsize]; |
1196 | 0 | const int mi_width = mi_size_wide[bsize]; |
1197 | 0 | const int pix_num = bw * bh; |
1198 | | |
1199 | | // top-left on grid block location in pixel |
1200 | 0 | int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh; |
1201 | 0 | int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw; |
1202 | 0 | int block; |
1203 | |
|
1204 | 0 | int64_t srcrf_dist = is_compound ? tpl_stats_ptr->cmp_recrf_dist[!ref] |
1205 | 0 | : tpl_stats_ptr->srcrf_dist; |
1206 | 0 | int64_t srcrf_rate = |
1207 | 0 | is_compound |
1208 | 0 | ? (tpl_stats_ptr->cmp_recrf_rate[!ref] << TPL_DEP_COST_SCALE_LOG2) |
1209 | 0 | : (tpl_stats_ptr->srcrf_rate << TPL_DEP_COST_SCALE_LOG2); |
1210 | |
|
1211 | 0 | int64_t cur_dep_dist = tpl_stats_ptr->recrf_dist - srcrf_dist; |
1212 | 0 | int64_t mc_dep_dist = |
1213 | 0 | (int64_t)(tpl_stats_ptr->mc_dep_dist * |
1214 | 0 | ((double)(tpl_stats_ptr->recrf_dist - srcrf_dist) / |
1215 | 0 | tpl_stats_ptr->recrf_dist)); |
1216 | 0 | int64_t delta_rate = |
1217 | 0 | (tpl_stats_ptr->recrf_rate << TPL_DEP_COST_SCALE_LOG2) - srcrf_rate; |
1218 | 0 | int64_t mc_dep_rate = |
1219 | 0 | av1_delta_rate_cost(tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->recrf_dist, |
1220 | 0 | srcrf_dist, pix_num); |
1221 | |
|
1222 | 0 | for (block = 0; block < 4; ++block) { |
1223 | 0 | int grid_pos_row = grid_pos_row_base + bh * (block >> 1); |
1224 | 0 | int grid_pos_col = grid_pos_col_base + bw * (block & 0x01); |
1225 | |
|
1226 | 0 | if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE && |
1227 | 0 | grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) { |
1228 | 0 | int overlap_area = av1_get_overlap_area(grid_pos_row, grid_pos_col, |
1229 | 0 | ref_pos_row, ref_pos_col, bw, bh); |
1230 | 0 | int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height; |
1231 | 0 | int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width; |
1232 | 0 | assert((1 << block_mis_log2) == mi_height); |
1233 | 0 | assert((1 << block_mis_log2) == mi_width); |
1234 | 0 | TplDepStats *des_stats = &ref_stats_ptr[av1_tpl_ptr_pos( |
1235 | 0 | ref_mi_row, ref_mi_col, ref_tpl_frame->stride, block_mis_log2)]; |
1236 | 0 | des_stats->mc_dep_dist += |
1237 | 0 | ((cur_dep_dist + mc_dep_dist) * overlap_area) / pix_num; |
1238 | 0 | des_stats->mc_dep_rate += |
1239 | 0 | ((delta_rate + mc_dep_rate) * overlap_area) / pix_num; |
1240 | 0 | } |
1241 | 0 | } |
1242 | 0 | } |
1243 | | |
1244 | | static inline void tpl_model_update(TplParams *const tpl_data, int mi_row, |
1245 | 0 | int mi_col, int frame_idx) { |
1246 | 0 | const BLOCK_SIZE tpl_stats_block_size = |
1247 | 0 | convert_length_to_bsize(MI_SIZE << tpl_data->tpl_stats_block_mis_log2); |
1248 | 0 | tpl_model_update_b(tpl_data, mi_row, mi_col, tpl_stats_block_size, frame_idx, |
1249 | 0 | 0); |
1250 | 0 | tpl_model_update_b(tpl_data, mi_row, mi_col, tpl_stats_block_size, frame_idx, |
1251 | 0 | 1); |
1252 | 0 | } |
1253 | | |
1254 | | static inline void tpl_model_store(TplDepStats *tpl_stats_ptr, int mi_row, |
1255 | | int mi_col, int stride, |
1256 | | const TplDepStats *src_stats, |
1257 | 0 | uint8_t block_mis_log2) { |
1258 | 0 | int index = av1_tpl_ptr_pos(mi_row, mi_col, stride, block_mis_log2); |
1259 | 0 | TplDepStats *tpl_ptr = &tpl_stats_ptr[index]; |
1260 | 0 | *tpl_ptr = *src_stats; |
1261 | 0 | tpl_ptr->intra_cost = AOMMAX(1, tpl_ptr->intra_cost); |
1262 | 0 | tpl_ptr->inter_cost = AOMMAX(1, tpl_ptr->inter_cost); |
1263 | 0 | tpl_ptr->srcrf_dist = AOMMAX(1, tpl_ptr->srcrf_dist); |
1264 | 0 | tpl_ptr->srcrf_sse = AOMMAX(1, tpl_ptr->srcrf_sse); |
1265 | 0 | tpl_ptr->recrf_dist = AOMMAX(1, tpl_ptr->recrf_dist); |
1266 | 0 | tpl_ptr->srcrf_rate = AOMMAX(1, tpl_ptr->srcrf_rate); |
1267 | 0 | tpl_ptr->recrf_rate = AOMMAX(1, tpl_ptr->recrf_rate); |
1268 | 0 | tpl_ptr->cmp_recrf_dist[0] = AOMMAX(1, tpl_ptr->cmp_recrf_dist[0]); |
1269 | 0 | tpl_ptr->cmp_recrf_dist[1] = AOMMAX(1, tpl_ptr->cmp_recrf_dist[1]); |
1270 | 0 | tpl_ptr->cmp_recrf_rate[0] = AOMMAX(1, tpl_ptr->cmp_recrf_rate[0]); |
1271 | 0 | tpl_ptr->cmp_recrf_rate[1] = AOMMAX(1, tpl_ptr->cmp_recrf_rate[1]); |
1272 | 0 | } |
1273 | | |
1274 | | // Reset the ref and source frame pointers of tpl_data. |
1275 | 0 | static inline void tpl_reset_src_ref_frames(TplParams *tpl_data) { |
1276 | 0 | for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { |
1277 | 0 | tpl_data->ref_frame[i] = NULL; |
1278 | 0 | tpl_data->src_ref_frame[i] = NULL; |
1279 | 0 | } |
1280 | 0 | } |
1281 | | |
1282 | 0 | static inline int get_gop_length(const GF_GROUP *gf_group) { |
1283 | 0 | int gop_length = AOMMIN(gf_group->size, MAX_TPL_FRAME_IDX - 1); |
1284 | 0 | return gop_length; |
1285 | 0 | } |
1286 | | |
1287 | | // Initialize the mc_flow parameters used in computing tpl data. |
1288 | | static inline void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx, |
1289 | 0 | int pframe_qindex) { |
1290 | 0 | TplParams *const tpl_data = &cpi->ppi->tpl_data; |
1291 | 0 | TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx]; |
1292 | 0 | const YV12_BUFFER_CONFIG *this_frame = tpl_frame->gf_picture; |
1293 | 0 | const YV12_BUFFER_CONFIG *ref_frames_ordered[INTER_REFS_PER_FRAME]; |
1294 | 0 | uint32_t ref_frame_display_indices[INTER_REFS_PER_FRAME]; |
1295 | 0 | const GF_GROUP *gf_group = &cpi->ppi->gf_group; |
1296 | 0 | TPL_SPEED_FEATURES *tpl_sf = &cpi->sf.tpl_sf; |
1297 | 0 | int ref_pruning_enabled = is_frame_eligible_for_ref_pruning( |
1298 | 0 | gf_group, cpi->sf.inter_sf.selective_ref_frame, |
1299 | 0 | tpl_sf->prune_ref_frames_in_tpl, frame_idx); |
1300 | 0 | int gop_length = get_gop_length(gf_group); |
1301 | 0 | int ref_frame_flags; |
1302 | 0 | AV1_COMMON *cm = &cpi->common; |
1303 | 0 | int rdmult, idx; |
1304 | 0 | ThreadData *td = &cpi->td; |
1305 | 0 | MACROBLOCK *x = &td->mb; |
1306 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1307 | 0 | TplTxfmStats *tpl_txfm_stats = &td->tpl_txfm_stats; |
1308 | 0 | tpl_data->frame_idx = frame_idx; |
1309 | 0 | tpl_reset_src_ref_frames(tpl_data); |
1310 | 0 | av1_tile_init(&xd->tile, cm, 0, 0); |
1311 | |
|
1312 | 0 | const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100)); |
1313 | 0 | const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6); |
1314 | 0 | const FRAME_TYPE frame_type = cm->current_frame.frame_type; |
1315 | | |
1316 | | // Setup scaling factor |
1317 | 0 | av1_setup_scale_factors_for_frame( |
1318 | 0 | &tpl_data->sf, this_frame->y_crop_width, this_frame->y_crop_height, |
1319 | 0 | this_frame->y_crop_width, this_frame->y_crop_height); |
1320 | |
|
1321 | 0 | xd->cur_buf = this_frame; |
1322 | |
|
1323 | 0 | for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) { |
1324 | 0 | TplDepFrame *tpl_ref_frame = |
1325 | 0 | &tpl_data->tpl_frame[tpl_frame->ref_map_index[idx]]; |
1326 | 0 | tpl_data->ref_frame[idx] = tpl_ref_frame->rec_picture; |
1327 | 0 | tpl_data->src_ref_frame[idx] = tpl_ref_frame->gf_picture; |
1328 | 0 | ref_frame_display_indices[idx] = tpl_ref_frame->frame_display_index; |
1329 | 0 | } |
1330 | | |
1331 | | // Store the reference frames based on priority order |
1332 | 0 | for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { |
1333 | 0 | ref_frames_ordered[i] = |
1334 | 0 | tpl_data->ref_frame[ref_frame_priority_order[i] - 1]; |
1335 | 0 | } |
1336 | | |
1337 | | // Work out which reference frame slots may be used. |
1338 | 0 | ref_frame_flags = |
1339 | 0 | get_ref_frame_flags(&cpi->sf, is_one_pass_rt_params(cpi), |
1340 | 0 | ref_frames_ordered, cpi->ext_flags.ref_frame_flags); |
1341 | |
|
1342 | 0 | enforce_max_ref_frames(cpi, &ref_frame_flags, ref_frame_display_indices, |
1343 | 0 | tpl_frame->frame_display_index); |
1344 | | |
1345 | | // Prune reference frames |
1346 | 0 | for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) { |
1347 | 0 | if ((ref_frame_flags & (1 << idx)) == 0) { |
1348 | 0 | tpl_data->ref_frame[idx] = NULL; |
1349 | 0 | } |
1350 | 0 | } |
1351 | | |
1352 | | // Skip motion estimation w.r.t. reference frames which are not |
1353 | | // considered in RD search, using "selective_ref_frame" speed feature. |
1354 | | // The reference frame pruning is not enabled for frames beyond the gop |
1355 | | // length, as there are fewer reference frames and the reference frames |
1356 | | // differ from the frames considered during RD search. |
1357 | 0 | if (ref_pruning_enabled && (frame_idx < gop_length)) { |
1358 | 0 | for (idx = 0; idx < INTER_REFS_PER_FRAME; ++idx) { |
1359 | 0 | const MV_REFERENCE_FRAME refs[2] = { idx + 1, NONE_FRAME }; |
1360 | 0 | if (prune_ref_by_selective_ref_frame(cpi, NULL, refs, |
1361 | 0 | ref_frame_display_indices)) { |
1362 | 0 | tpl_data->ref_frame[idx] = NULL; |
1363 | 0 | } |
1364 | 0 | } |
1365 | 0 | } |
1366 | | |
1367 | | // Make a temporary mbmi for tpl model |
1368 | 0 | MB_MODE_INFO mbmi; |
1369 | 0 | memset(&mbmi, 0, sizeof(mbmi)); |
1370 | 0 | MB_MODE_INFO *mbmi_ptr = &mbmi; |
1371 | 0 | xd->mi = &mbmi_ptr; |
1372 | |
|
1373 | 0 | xd->block_ref_scale_factors[0] = &tpl_data->sf; |
1374 | 0 | xd->block_ref_scale_factors[1] = &tpl_data->sf; |
1375 | |
|
1376 | 0 | const int base_qindex = |
1377 | 0 | cpi->use_ducky_encode ? gf_group->q_val[frame_idx] : pframe_qindex; |
1378 | | // The TPL model is only meant to be run in inter mode, so ensure that we are |
1379 | | // not running in all intra mode, which implies we are not tuning for image |
1380 | | // quality (IQ). |
1381 | 0 | assert(cpi->oxcf.tune_cfg.tuning != AOM_TUNE_IQ && |
1382 | 0 | cpi->oxcf.mode != ALLINTRA); |
1383 | | // Get rd multiplier set up. |
1384 | 0 | rdmult = av1_compute_rd_mult( |
1385 | 0 | base_qindex, cm->seq_params->bit_depth, |
1386 | 0 | cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth, |
1387 | 0 | boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets, |
1388 | 0 | is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning); |
1389 | |
|
1390 | 0 | if (rdmult < 1) rdmult = 1; |
1391 | 0 | av1_set_error_per_bit(&x->errorperbit, rdmult); |
1392 | 0 | av1_set_sad_per_bit(cpi, &x->sadperbit, base_qindex); |
1393 | |
|
1394 | 0 | tpl_frame->is_valid = 1; |
1395 | |
|
1396 | 0 | cm->quant_params.base_qindex = base_qindex; |
1397 | 0 | av1_frame_init_quantizer(cpi); |
1398 | |
|
1399 | 0 | const BitDepthInfo bd_info = get_bit_depth_info(xd); |
1400 | 0 | const FRAME_UPDATE_TYPE update_type = |
1401 | 0 | gf_group->update_type[cpi->gf_frame_index]; |
1402 | 0 | tpl_frame->base_rdmult = av1_compute_rd_mult_based_on_qindex( |
1403 | 0 | bd_info.bit_depth, update_type, base_qindex, |
1404 | 0 | cpi->oxcf.tune_cfg.tuning) / |
1405 | 0 | 6; |
1406 | |
|
1407 | 0 | if (cpi->use_ducky_encode) |
1408 | 0 | tpl_frame->base_rdmult = gf_group->rdmult_val[frame_idx]; |
1409 | |
|
1410 | 0 | av1_init_tpl_txfm_stats(tpl_txfm_stats); |
1411 | | |
1412 | | // Initialize x->mbmi_ext when compound predictions are enabled. |
1413 | 0 | if (tpl_sf->allow_compound_pred) av1_zero(x->mbmi_ext); |
1414 | | |
1415 | | // Set the pointer to null since mbmi is only allocated inside this function. |
1416 | 0 | assert(xd->mi == &mbmi_ptr); |
1417 | 0 | xd->mi = NULL; |
1418 | | |
1419 | | // Tpl module is called before the setting of speed features at frame level. |
1420 | | // Thus, turning off this speed feature for key frame is done here and not |
1421 | | // integrated into the speed feature setting itself. |
1422 | 0 | const int layer_depth_th = (tpl_sf->use_sad_for_mode_decision == 1) ? 5 : 0; |
1423 | 0 | tpl_frame->use_pred_sad = |
1424 | 0 | tpl_sf->use_sad_for_mode_decision && |
1425 | 0 | gf_group->update_type[cpi->gf_frame_index] != KF_UPDATE && |
1426 | 0 | gf_group->layer_depth[frame_idx] >= layer_depth_th; |
1427 | 0 | } |
1428 | | |
1429 | | // This function stores the motion estimation dependencies of all the blocks in |
1430 | | // a row |
1431 | | void av1_mc_flow_dispenser_row(AV1_COMP *cpi, TplTxfmStats *tpl_txfm_stats, |
1432 | | TplBuffers *tpl_tmp_buffers, MACROBLOCK *x, |
1433 | 0 | int mi_row, BLOCK_SIZE bsize, TX_SIZE tx_size) { |
1434 | 0 | AV1_COMMON *const cm = &cpi->common; |
1435 | 0 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
1436 | 0 | AV1TplRowMultiThreadInfo *const tpl_row_mt = &mt_info->tpl_row_mt; |
1437 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
1438 | 0 | const int mi_width = mi_size_wide[bsize]; |
1439 | 0 | TplParams *const tpl_data = &cpi->ppi->tpl_data; |
1440 | 0 | TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_data->frame_idx]; |
1441 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1442 | |
|
1443 | 0 | const int tplb_cols_in_tile = |
1444 | 0 | ROUND_POWER_OF_TWO(mi_params->mi_cols, mi_size_wide_log2[bsize]); |
1445 | 0 | const int tplb_row = ROUND_POWER_OF_TWO(mi_row, mi_size_high_log2[bsize]); |
1446 | 0 | assert(mi_size_high[bsize] == (1 << tpl_data->tpl_stats_block_mis_log2)); |
1447 | 0 | assert(mi_size_wide[bsize] == (1 << tpl_data->tpl_stats_block_mis_log2)); |
1448 | |
|
1449 | 0 | for (int mi_col = 0, tplb_col_in_tile = 0; mi_col < mi_params->mi_cols; |
1450 | 0 | mi_col += mi_width, tplb_col_in_tile++) { |
1451 | 0 | (*tpl_row_mt->sync_read_ptr)(&tpl_data->tpl_mt_sync, tplb_row, |
1452 | 0 | tplb_col_in_tile); |
1453 | |
|
1454 | 0 | #if CONFIG_MULTITHREAD |
1455 | 0 | if (mt_info->num_workers > 1) { |
1456 | 0 | pthread_mutex_lock(tpl_row_mt->mutex_); |
1457 | 0 | const bool tpl_mt_exit = tpl_row_mt->tpl_mt_exit; |
1458 | 0 | pthread_mutex_unlock(tpl_row_mt->mutex_); |
1459 | | // Exit in case any worker has encountered an error. |
1460 | 0 | if (tpl_mt_exit) return; |
1461 | 0 | } |
1462 | 0 | #endif |
1463 | | |
1464 | 0 | TplDepStats tpl_stats; |
1465 | | |
1466 | | // Motion estimation column boundary |
1467 | 0 | av1_set_mv_col_limits(mi_params, &x->mv_limits, mi_col, mi_width, |
1468 | 0 | tpl_data->border_in_pixels); |
1469 | 0 | xd->mb_to_left_edge = -GET_MV_SUBPEL(mi_col * MI_SIZE); |
1470 | 0 | xd->mb_to_right_edge = |
1471 | 0 | GET_MV_SUBPEL(mi_params->mi_cols - mi_width - mi_col); |
1472 | 0 | mode_estimation(cpi, tpl_txfm_stats, tpl_tmp_buffers, x, mi_row, mi_col, |
1473 | 0 | bsize, tx_size, &tpl_stats); |
1474 | | |
1475 | | // Motion flow dependency dispenser. |
1476 | 0 | tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, tpl_frame->stride, |
1477 | 0 | &tpl_stats, tpl_data->tpl_stats_block_mis_log2); |
1478 | 0 | (*tpl_row_mt->sync_write_ptr)(&tpl_data->tpl_mt_sync, tplb_row, |
1479 | 0 | tplb_col_in_tile, tplb_cols_in_tile); |
1480 | 0 | } |
1481 | 0 | } |
1482 | | |
1483 | 0 | static inline void mc_flow_dispenser(AV1_COMP *cpi) { |
1484 | 0 | AV1_COMMON *cm = &cpi->common; |
1485 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
1486 | 0 | ThreadData *td = &cpi->td; |
1487 | 0 | MACROBLOCK *x = &td->mb; |
1488 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
1489 | 0 | const BLOCK_SIZE bsize = |
1490 | 0 | convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d); |
1491 | 0 | const TX_SIZE tx_size = max_txsize_lookup[bsize]; |
1492 | 0 | const int mi_height = mi_size_high[bsize]; |
1493 | 0 | for (int mi_row = 0; mi_row < mi_params->mi_rows; mi_row += mi_height) { |
1494 | | // Motion estimation row boundary |
1495 | 0 | av1_set_mv_row_limits(mi_params, &x->mv_limits, mi_row, mi_height, |
1496 | 0 | cpi->ppi->tpl_data.border_in_pixels); |
1497 | 0 | xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE); |
1498 | 0 | xd->mb_to_bottom_edge = |
1499 | 0 | GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE); |
1500 | 0 | av1_mc_flow_dispenser_row(cpi, &td->tpl_txfm_stats, &td->tpl_tmp_buffers, x, |
1501 | 0 | mi_row, bsize, tx_size); |
1502 | 0 | } |
1503 | 0 | } |
1504 | | |
1505 | | static void mc_flow_synthesizer(TplParams *tpl_data, int frame_idx, int mi_rows, |
1506 | 0 | int mi_cols) { |
1507 | 0 | if (!frame_idx) { |
1508 | 0 | return; |
1509 | 0 | } |
1510 | 0 | const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d); |
1511 | 0 | const int mi_height = mi_size_high[bsize]; |
1512 | 0 | const int mi_width = mi_size_wide[bsize]; |
1513 | 0 | assert(mi_height == (1 << tpl_data->tpl_stats_block_mis_log2)); |
1514 | 0 | assert(mi_width == (1 << tpl_data->tpl_stats_block_mis_log2)); |
1515 | |
|
1516 | 0 | for (int mi_row = 0; mi_row < mi_rows; mi_row += mi_height) { |
1517 | 0 | for (int mi_col = 0; mi_col < mi_cols; mi_col += mi_width) { |
1518 | 0 | tpl_model_update(tpl_data, mi_row, mi_col, frame_idx); |
1519 | 0 | } |
1520 | 0 | } |
1521 | 0 | } |
1522 | | |
1523 | | static inline void init_gop_frames_for_tpl( |
1524 | | AV1_COMP *cpi, const EncodeFrameParams *const init_frame_params, |
1525 | 0 | GF_GROUP *gf_group, int *tpl_group_frames, int *pframe_qindex) { |
1526 | 0 | AV1_COMMON *cm = &cpi->common; |
1527 | 0 | assert(cpi->gf_frame_index == 0); |
1528 | 0 | *pframe_qindex = 0; |
1529 | |
|
1530 | 0 | RefFrameMapPair ref_frame_map_pairs[REF_FRAMES]; |
1531 | 0 | init_ref_map_pair(cpi, ref_frame_map_pairs); |
1532 | |
|
1533 | 0 | int remapped_ref_idx[REF_FRAMES]; |
1534 | |
|
1535 | 0 | EncodeFrameParams frame_params = *init_frame_params; |
1536 | 0 | TplParams *const tpl_data = &cpi->ppi->tpl_data; |
1537 | |
|
1538 | 0 | int ref_picture_map[REF_FRAMES]; |
1539 | |
|
1540 | 0 | for (int i = 0; i < REF_FRAMES; ++i) { |
1541 | 0 | if (frame_params.frame_type == KEY_FRAME) { |
1542 | 0 | tpl_data->tpl_frame[-i - 1].gf_picture = NULL; |
1543 | 0 | tpl_data->tpl_frame[-i - 1].rec_picture = NULL; |
1544 | 0 | tpl_data->tpl_frame[-i - 1].frame_display_index = 0; |
1545 | 0 | } else { |
1546 | 0 | tpl_data->tpl_frame[-i - 1].gf_picture = &cm->ref_frame_map[i]->buf; |
1547 | 0 | tpl_data->tpl_frame[-i - 1].rec_picture = &cm->ref_frame_map[i]->buf; |
1548 | 0 | tpl_data->tpl_frame[-i - 1].frame_display_index = |
1549 | 0 | cm->ref_frame_map[i]->display_order_hint; |
1550 | 0 | } |
1551 | |
|
1552 | 0 | ref_picture_map[i] = -i - 1; |
1553 | 0 | } |
1554 | |
|
1555 | 0 | *tpl_group_frames = 0; |
1556 | |
|
1557 | 0 | int gf_index; |
1558 | 0 | int process_frame_count = 0; |
1559 | 0 | const int gop_length = get_gop_length(gf_group); |
1560 | |
|
1561 | 0 | for (gf_index = 0; gf_index < gop_length; ++gf_index) { |
1562 | 0 | TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index]; |
1563 | 0 | FRAME_UPDATE_TYPE frame_update_type = gf_group->update_type[gf_index]; |
1564 | 0 | int lookahead_index = |
1565 | 0 | gf_group->cur_frame_idx[gf_index] + gf_group->arf_src_offset[gf_index]; |
1566 | 0 | frame_params.show_frame = frame_update_type != ARF_UPDATE && |
1567 | 0 | frame_update_type != INTNL_ARF_UPDATE; |
1568 | 0 | frame_params.show_existing_frame = |
1569 | 0 | frame_update_type == INTNL_OVERLAY_UPDATE || |
1570 | 0 | frame_update_type == OVERLAY_UPDATE; |
1571 | 0 | frame_params.frame_type = gf_group->frame_type[gf_index]; |
1572 | |
|
1573 | 0 | if (frame_update_type == LF_UPDATE) |
1574 | 0 | *pframe_qindex = gf_group->q_val[gf_index]; |
1575 | |
|
1576 | 0 | const struct lookahead_entry *buf = av1_lookahead_peek( |
1577 | 0 | cpi->ppi->lookahead, lookahead_index, cpi->compressor_stage); |
1578 | 0 | if (buf == NULL) break; |
1579 | 0 | tpl_frame->gf_picture = &buf->img; |
1580 | | |
1581 | | // Use filtered frame buffer if available. This will make tpl stats more |
1582 | | // precise. |
1583 | 0 | FRAME_DIFF frame_diff; |
1584 | 0 | const YV12_BUFFER_CONFIG *tf_buf = |
1585 | 0 | av1_tf_info_get_filtered_buf(&cpi->ppi->tf_info, gf_index, &frame_diff); |
1586 | 0 | if (tf_buf != NULL) { |
1587 | 0 | tpl_frame->gf_picture = tf_buf; |
1588 | 0 | } |
1589 | | |
1590 | | // 'cm->current_frame.frame_number' is the display number |
1591 | | // of the current frame. |
1592 | | // 'lookahead_index' is frame offset within the gf group. |
1593 | | // 'lookahead_index + cm->current_frame.frame_number' |
1594 | | // is the display index of the frame. |
1595 | 0 | tpl_frame->frame_display_index = |
1596 | 0 | lookahead_index + cm->current_frame.frame_number; |
1597 | 0 | assert(buf->display_idx == |
1598 | 0 | cpi->frame_index_set.show_frame_count + lookahead_index); |
1599 | |
|
1600 | 0 | if (frame_update_type != OVERLAY_UPDATE && |
1601 | 0 | frame_update_type != INTNL_OVERLAY_UPDATE) { |
1602 | 0 | tpl_frame->rec_picture = &tpl_data->tpl_rec_pool[process_frame_count]; |
1603 | 0 | tpl_frame->tpl_stats_ptr = tpl_data->tpl_stats_pool[process_frame_count]; |
1604 | 0 | ++process_frame_count; |
1605 | 0 | } |
1606 | 0 | const int true_disp = (int)(tpl_frame->frame_display_index); |
1607 | |
|
1608 | 0 | av1_get_ref_frames(ref_frame_map_pairs, true_disp, cpi, gf_index, 0, |
1609 | 0 | remapped_ref_idx); |
1610 | |
|
1611 | 0 | int refresh_mask = |
1612 | 0 | av1_get_refresh_frame_flags(cpi, &frame_params, frame_update_type, |
1613 | 0 | gf_index, true_disp, ref_frame_map_pairs); |
1614 | | |
1615 | | // Make the frames marked as is_frame_non_ref to non-reference frames. |
1616 | 0 | if (cpi->ppi->gf_group.is_frame_non_ref[gf_index]) refresh_mask = 0; |
1617 | |
|
1618 | 0 | int refresh_frame_map_index = av1_get_refresh_ref_frame_map(refresh_mask); |
1619 | |
|
1620 | 0 | if (refresh_frame_map_index < REF_FRAMES && |
1621 | 0 | refresh_frame_map_index != INVALID_IDX) { |
1622 | 0 | ref_frame_map_pairs[refresh_frame_map_index].disp_order = |
1623 | 0 | AOMMAX(0, true_disp); |
1624 | 0 | ref_frame_map_pairs[refresh_frame_map_index].pyr_level = |
1625 | 0 | get_true_pyr_level(gf_group->layer_depth[gf_index], true_disp, |
1626 | 0 | cpi->ppi->gf_group.max_layer_depth); |
1627 | 0 | } |
1628 | |
|
1629 | 0 | for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) |
1630 | 0 | tpl_frame->ref_map_index[i - LAST_FRAME] = |
1631 | 0 | ref_picture_map[remapped_ref_idx[i - LAST_FRAME]]; |
1632 | |
|
1633 | 0 | if (refresh_mask) ref_picture_map[refresh_frame_map_index] = gf_index; |
1634 | |
|
1635 | 0 | ++*tpl_group_frames; |
1636 | 0 | } |
1637 | |
|
1638 | 0 | const int tpl_extend = cpi->oxcf.gf_cfg.lag_in_frames - MAX_GF_INTERVAL; |
1639 | 0 | int extend_frame_count = 0; |
1640 | 0 | int extend_frame_length = AOMMIN( |
1641 | 0 | tpl_extend, cpi->rc.frames_to_key - cpi->ppi->p_rc.baseline_gf_interval); |
1642 | |
|
1643 | 0 | int frame_display_index = gf_group->cur_frame_idx[gop_length - 1] + |
1644 | 0 | gf_group->arf_src_offset[gop_length - 1] + 1; |
1645 | |
|
1646 | 0 | for (; |
1647 | 0 | gf_index < MAX_TPL_FRAME_IDX && extend_frame_count < extend_frame_length; |
1648 | 0 | ++gf_index) { |
1649 | 0 | TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index]; |
1650 | 0 | FRAME_UPDATE_TYPE frame_update_type = LF_UPDATE; |
1651 | 0 | frame_params.show_frame = frame_update_type != ARF_UPDATE && |
1652 | 0 | frame_update_type != INTNL_ARF_UPDATE; |
1653 | 0 | frame_params.show_existing_frame = |
1654 | 0 | frame_update_type == INTNL_OVERLAY_UPDATE; |
1655 | 0 | frame_params.frame_type = INTER_FRAME; |
1656 | |
|
1657 | 0 | int lookahead_index = frame_display_index; |
1658 | 0 | struct lookahead_entry *buf = av1_lookahead_peek( |
1659 | 0 | cpi->ppi->lookahead, lookahead_index, cpi->compressor_stage); |
1660 | |
|
1661 | 0 | if (buf == NULL) break; |
1662 | | |
1663 | 0 | tpl_frame->gf_picture = &buf->img; |
1664 | 0 | tpl_frame->rec_picture = &tpl_data->tpl_rec_pool[process_frame_count]; |
1665 | 0 | tpl_frame->tpl_stats_ptr = tpl_data->tpl_stats_pool[process_frame_count]; |
1666 | | // 'cm->current_frame.frame_number' is the display number |
1667 | | // of the current frame. |
1668 | | // 'frame_display_index' is frame offset within the gf group. |
1669 | | // 'frame_display_index + cm->current_frame.frame_number' |
1670 | | // is the display index of the frame. |
1671 | 0 | tpl_frame->frame_display_index = |
1672 | 0 | frame_display_index + cm->current_frame.frame_number; |
1673 | |
|
1674 | 0 | ++process_frame_count; |
1675 | |
|
1676 | 0 | gf_group->update_type[gf_index] = LF_UPDATE; |
1677 | |
|
1678 | | #if CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS |
1679 | | if (cpi->oxcf.pass == AOM_RC_SECOND_PASS) { |
1680 | | if (cpi->oxcf.rc_cfg.mode == AOM_Q) { |
1681 | | *pframe_qindex = cpi->oxcf.rc_cfg.cq_level; |
1682 | | } else if (cpi->oxcf.rc_cfg.mode == AOM_VBR) { |
1683 | | // TODO(angiebird): Find a more adaptive method to decide pframe_qindex |
1684 | | // override the pframe_qindex in the second pass when bitrate accuracy |
1685 | | // is on. We found that setting this pframe_qindex make the tpl stats |
1686 | | // more stable. |
1687 | | *pframe_qindex = 128; |
1688 | | } |
1689 | | } |
1690 | | #endif // CONFIG_BITRATE_ACCURACY && CONFIG_THREE_PASS |
1691 | 0 | gf_group->q_val[gf_index] = *pframe_qindex; |
1692 | 0 | const int true_disp = (int)(tpl_frame->frame_display_index); |
1693 | 0 | av1_get_ref_frames(ref_frame_map_pairs, true_disp, cpi, gf_index, 0, |
1694 | 0 | remapped_ref_idx); |
1695 | 0 | int refresh_mask = |
1696 | 0 | av1_get_refresh_frame_flags(cpi, &frame_params, frame_update_type, |
1697 | 0 | gf_index, true_disp, ref_frame_map_pairs); |
1698 | 0 | int refresh_frame_map_index = av1_get_refresh_ref_frame_map(refresh_mask); |
1699 | |
|
1700 | 0 | if (refresh_frame_map_index < REF_FRAMES && |
1701 | 0 | refresh_frame_map_index != INVALID_IDX) { |
1702 | 0 | ref_frame_map_pairs[refresh_frame_map_index].disp_order = |
1703 | 0 | AOMMAX(0, true_disp); |
1704 | 0 | ref_frame_map_pairs[refresh_frame_map_index].pyr_level = |
1705 | 0 | get_true_pyr_level(gf_group->layer_depth[gf_index], true_disp, |
1706 | 0 | cpi->ppi->gf_group.max_layer_depth); |
1707 | 0 | } |
1708 | |
|
1709 | 0 | for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) |
1710 | 0 | tpl_frame->ref_map_index[i - LAST_FRAME] = |
1711 | 0 | ref_picture_map[remapped_ref_idx[i - LAST_FRAME]]; |
1712 | |
|
1713 | 0 | tpl_frame->ref_map_index[ALTREF_FRAME - LAST_FRAME] = -1; |
1714 | 0 | tpl_frame->ref_map_index[LAST3_FRAME - LAST_FRAME] = -1; |
1715 | 0 | tpl_frame->ref_map_index[BWDREF_FRAME - LAST_FRAME] = -1; |
1716 | 0 | tpl_frame->ref_map_index[ALTREF2_FRAME - LAST_FRAME] = -1; |
1717 | |
|
1718 | 0 | if (refresh_mask) ref_picture_map[refresh_frame_map_index] = gf_index; |
1719 | |
|
1720 | 0 | ++*tpl_group_frames; |
1721 | 0 | ++extend_frame_count; |
1722 | 0 | ++frame_display_index; |
1723 | 0 | } |
1724 | 0 | } |
1725 | | |
1726 | 0 | void av1_init_tpl_stats(TplParams *const tpl_data) { |
1727 | 0 | tpl_data->ready = 0; |
1728 | 0 | set_tpl_stats_block_size(&tpl_data->tpl_stats_block_mis_log2, |
1729 | 0 | &tpl_data->tpl_bsize_1d); |
1730 | 0 | for (int frame_idx = 0; frame_idx < MAX_LENGTH_TPL_FRAME_STATS; ++frame_idx) { |
1731 | 0 | TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame_idx]; |
1732 | 0 | tpl_frame->is_valid = 0; |
1733 | 0 | } |
1734 | 0 | for (int frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) { |
1735 | 0 | TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame_idx]; |
1736 | 0 | if (tpl_data->tpl_stats_pool[frame_idx] == NULL) continue; |
1737 | 0 | memset(tpl_data->tpl_stats_pool[frame_idx], 0, |
1738 | 0 | tpl_frame->height * tpl_frame->width * |
1739 | 0 | sizeof(*tpl_frame->tpl_stats_ptr)); |
1740 | 0 | } |
1741 | 0 | } |
1742 | | |
1743 | 0 | int av1_tpl_stats_ready(const TplParams *tpl_data, int gf_frame_index) { |
1744 | 0 | if (tpl_data->ready == 0) { |
1745 | 0 | return 0; |
1746 | 0 | } |
1747 | 0 | if (gf_frame_index >= MAX_TPL_FRAME_IDX) { |
1748 | | // The sub-GOP length exceeds the TPL buffer capacity. |
1749 | | // Hence the TPL related functions are disabled hereafter. |
1750 | 0 | return 0; |
1751 | 0 | } |
1752 | 0 | return tpl_data->tpl_frame[gf_frame_index].is_valid; |
1753 | 0 | } |
1754 | | |
1755 | 0 | static inline int eval_gop_length(double *beta, int gop_eval) { |
1756 | 0 | switch (gop_eval) { |
1757 | 0 | case 1: |
1758 | | // Allow larger GOP size if the base layer ARF has higher dependency |
1759 | | // factor than the intermediate ARF and both ARFs have reasonably high |
1760 | | // dependency factors. |
1761 | 0 | return (beta[0] >= beta[1] + 0.7) && beta[0] > 3.0; |
1762 | 0 | case 2: |
1763 | 0 | if ((beta[0] >= beta[1] + 0.4) && beta[0] > 1.6) |
1764 | 0 | return 1; // Don't shorten the gf interval |
1765 | 0 | else if ((beta[0] < beta[1] + 0.1) || beta[0] <= 1.4) |
1766 | 0 | return 0; // Shorten the gf interval |
1767 | 0 | else |
1768 | 0 | return 2; // Cannot decide the gf interval, so redo the |
1769 | | // tpl stats calculation. |
1770 | 0 | case 3: return beta[0] > 1.1; |
1771 | 0 | default: return 2; |
1772 | 0 | } |
1773 | 0 | } |
1774 | | |
1775 | | // TODO(jingning): Restructure av1_rc_pick_q_and_bounds() to narrow down |
1776 | | // the scope of input arguments. |
1777 | | void av1_tpl_preload_rc_estimate(AV1_COMP *cpi, |
1778 | 0 | const EncodeFrameParams *const frame_params) { |
1779 | 0 | AV1_COMMON *cm = &cpi->common; |
1780 | 0 | GF_GROUP *gf_group = &cpi->ppi->gf_group; |
1781 | 0 | int bottom_index, top_index; |
1782 | 0 | if (cpi->use_ducky_encode) return; |
1783 | | |
1784 | 0 | cm->current_frame.frame_type = frame_params->frame_type; |
1785 | 0 | for (int gf_index = cpi->gf_frame_index; gf_index < gf_group->size; |
1786 | 0 | ++gf_index) { |
1787 | 0 | cm->current_frame.frame_type = gf_group->frame_type[gf_index]; |
1788 | 0 | cm->show_frame = gf_group->update_type[gf_index] != ARF_UPDATE && |
1789 | 0 | gf_group->update_type[gf_index] != INTNL_ARF_UPDATE; |
1790 | 0 | gf_group->q_val[gf_index] = av1_rc_pick_q_and_bounds( |
1791 | 0 | cpi, cm->width, cm->height, gf_index, &bottom_index, &top_index); |
1792 | 0 | } |
1793 | 0 | } |
1794 | | |
1795 | | static inline int skip_tpl_for_frame(const GF_GROUP *gf_group, int frame_idx, |
1796 | | int gop_eval, int approx_gop_eval, |
1797 | 0 | int reduce_num_frames) { |
1798 | | // When gop_eval is set to 2, tpl stats calculation is done for ARFs from base |
1799 | | // layer, (base+1) layer and (base+2) layer. When gop_eval is set to 3, |
1800 | | // tpl stats calculation is limited to ARFs from base layer and (base+1) |
1801 | | // layer. |
1802 | 0 | const int num_arf_layers = (gop_eval == 2) ? 3 : 2; |
1803 | 0 | const int gop_length = get_gop_length(gf_group); |
1804 | |
|
1805 | 0 | if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE || |
1806 | 0 | gf_group->update_type[frame_idx] == OVERLAY_UPDATE) |
1807 | 0 | return 1; |
1808 | | |
1809 | | // When approx_gop_eval = 1, skip tpl stats calculation for higher layer |
1810 | | // frames and for frames beyond gop length. |
1811 | 0 | if (approx_gop_eval && (gf_group->layer_depth[frame_idx] > num_arf_layers || |
1812 | 0 | frame_idx >= gop_length)) |
1813 | 0 | return 1; |
1814 | | |
1815 | 0 | if (reduce_num_frames && gf_group->update_type[frame_idx] == LF_UPDATE && |
1816 | 0 | frame_idx < gop_length) |
1817 | 0 | return 1; |
1818 | | |
1819 | 0 | return 0; |
1820 | 0 | } |
1821 | | |
1822 | | /*!\brief Compute the frame importance from TPL stats |
1823 | | * |
1824 | | * \param[in] tpl_data TPL struct |
1825 | | * \param[in] gf_frame_index current frame index in the GOP |
1826 | | * |
1827 | | * \return frame_importance |
1828 | | */ |
1829 | | static double get_frame_importance(const TplParams *tpl_data, |
1830 | 0 | int gf_frame_index) { |
1831 | 0 | const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_frame_index]; |
1832 | 0 | const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; |
1833 | |
|
1834 | 0 | const int tpl_stride = tpl_frame->stride; |
1835 | 0 | double intra_cost_base = 0; |
1836 | 0 | double mc_dep_cost_base = 0; |
1837 | 0 | double cbcmp_base = 1; |
1838 | 0 | const int step = 1 << tpl_data->tpl_stats_block_mis_log2; |
1839 | |
|
1840 | 0 | for (int row = 0; row < tpl_frame->mi_rows; row += step) { |
1841 | 0 | for (int col = 0; col < tpl_frame->mi_cols; col += step) { |
1842 | 0 | const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos( |
1843 | 0 | row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)]; |
1844 | 0 | double cbcmp = (double)this_stats->srcrf_dist; |
1845 | 0 | const int64_t mc_dep_delta = |
1846 | 0 | RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, |
1847 | 0 | this_stats->mc_dep_dist); |
1848 | 0 | double dist_scaled = (double)(this_stats->recrf_dist << RDDIV_BITS); |
1849 | 0 | dist_scaled = AOMMAX(dist_scaled, 1); |
1850 | 0 | intra_cost_base += log(dist_scaled) * cbcmp; |
1851 | 0 | mc_dep_cost_base += log(dist_scaled + mc_dep_delta) * cbcmp; |
1852 | 0 | cbcmp_base += cbcmp; |
1853 | 0 | } |
1854 | 0 | } |
1855 | 0 | return exp((mc_dep_cost_base - intra_cost_base) / cbcmp_base); |
1856 | 0 | } |
1857 | | |
1858 | | int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval, |
1859 | 0 | const EncodeFrameParams *const frame_params) { |
1860 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
1861 | | start_timing(cpi, av1_tpl_setup_stats_time); |
1862 | | #endif |
1863 | 0 | assert(cpi->gf_frame_index == 0); |
1864 | 0 | AV1_COMMON *cm = &cpi->common; |
1865 | 0 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
1866 | 0 | AV1TplRowMultiThreadInfo *const tpl_row_mt = &mt_info->tpl_row_mt; |
1867 | 0 | GF_GROUP *gf_group = &cpi->ppi->gf_group; |
1868 | 0 | EncodeFrameParams this_frame_params = *frame_params; |
1869 | 0 | TplParams *const tpl_data = &cpi->ppi->tpl_data; |
1870 | 0 | int approx_gop_eval = (gop_eval > 1); |
1871 | |
|
1872 | 0 | if (cpi->superres_mode != AOM_SUPERRES_NONE) { |
1873 | 0 | assert(cpi->superres_mode != AOM_SUPERRES_AUTO); |
1874 | 0 | av1_init_tpl_stats(tpl_data); |
1875 | 0 | return 0; |
1876 | 0 | } |
1877 | | |
1878 | 0 | cm->current_frame.frame_type = frame_params->frame_type; |
1879 | 0 | for (int gf_index = cpi->gf_frame_index; gf_index < gf_group->size; |
1880 | 0 | ++gf_index) { |
1881 | 0 | cm->current_frame.frame_type = gf_group->frame_type[gf_index]; |
1882 | 0 | av1_configure_buffer_updates(cpi, &this_frame_params.refresh_frame, |
1883 | 0 | gf_group->update_type[gf_index], |
1884 | 0 | gf_group->refbuf_state[gf_index], 0); |
1885 | |
|
1886 | 0 | memcpy(&cpi->refresh_frame, &this_frame_params.refresh_frame, |
1887 | 0 | sizeof(cpi->refresh_frame)); |
1888 | 0 | } |
1889 | |
|
1890 | 0 | int pframe_qindex; |
1891 | 0 | int tpl_gf_group_frames; |
1892 | 0 | init_gop_frames_for_tpl(cpi, frame_params, gf_group, &tpl_gf_group_frames, |
1893 | 0 | &pframe_qindex); |
1894 | |
|
1895 | 0 | cpi->ppi->p_rc.base_layer_qp = pframe_qindex; |
1896 | |
|
1897 | 0 | av1_init_tpl_stats(tpl_data); |
1898 | |
|
1899 | 0 | TplBuffers *tpl_tmp_buffers = &cpi->td.tpl_tmp_buffers; |
1900 | 0 | if (!tpl_alloc_temp_buffers(tpl_tmp_buffers, tpl_data->tpl_bsize_1d)) { |
1901 | 0 | aom_internal_error(cpi->common.error, AOM_CODEC_MEM_ERROR, |
1902 | 0 | "Error allocating tpl data"); |
1903 | 0 | } |
1904 | |
|
1905 | 0 | tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read_dummy; |
1906 | 0 | tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write_dummy; |
1907 | |
|
1908 | 0 | av1_setup_scale_factors_for_frame(&cm->sf_identity, cm->width, cm->height, |
1909 | 0 | cm->width, cm->height); |
1910 | |
|
1911 | 0 | if (frame_params->frame_type == KEY_FRAME) { |
1912 | 0 | av1_init_mv_probs(cm); |
1913 | 0 | } |
1914 | 0 | av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv, |
1915 | 0 | cm->features.allow_high_precision_mv, cpi->td.mb.mv_costs); |
1916 | |
|
1917 | 0 | const int num_planes = |
1918 | 0 | cpi->sf.tpl_sf.use_y_only_rate_distortion ? 1 : av1_num_planes(cm); |
1919 | | // As tpl module is called before the setting of speed features at frame |
1920 | | // level, turning off this speed feature for the first GF group of the |
1921 | | // key-frame interval is done here. |
1922 | 0 | int reduce_num_frames = |
1923 | 0 | cpi->sf.tpl_sf.reduce_num_frames && |
1924 | 0 | gf_group->update_type[cpi->gf_frame_index] != KF_UPDATE && |
1925 | 0 | gf_group->max_layer_depth > 2; |
1926 | | // TPL processing is skipped for frames of type LF_UPDATE when |
1927 | | // 'reduce_num_frames' is 1, which affects the r0 calcuation. Thus, a factor |
1928 | | // to adjust r0 is used. The value of 1.6 corresponds to using ~60% of the |
1929 | | // frames in the gf group on an average. |
1930 | 0 | tpl_data->r0_adjust_factor = reduce_num_frames ? 1.6 : 1.0; |
1931 | | |
1932 | | // Backward propagation from tpl_group_frames to 1. |
1933 | 0 | for (int frame_idx = cpi->gf_frame_index; frame_idx < tpl_gf_group_frames; |
1934 | 0 | ++frame_idx) { |
1935 | 0 | if (skip_tpl_for_frame(gf_group, frame_idx, gop_eval, approx_gop_eval, |
1936 | 0 | reduce_num_frames)) |
1937 | 0 | continue; |
1938 | | |
1939 | 0 | init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex); |
1940 | 0 | if (mt_info->num_workers > 1) { |
1941 | 0 | tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read; |
1942 | 0 | tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write; |
1943 | 0 | av1_mc_flow_dispenser_mt(cpi); |
1944 | 0 | } else { |
1945 | 0 | mc_flow_dispenser(cpi); |
1946 | 0 | } |
1947 | | #if CONFIG_BITRATE_ACCURACY |
1948 | | av1_tpl_txfm_stats_update_abs_coeff_mean(&cpi->td.tpl_txfm_stats); |
1949 | | av1_tpl_store_txfm_stats(tpl_data, &cpi->td.tpl_txfm_stats, frame_idx); |
1950 | | #endif // CONFIG_BITRATE_ACCURACY |
1951 | | #if CONFIG_RATECTRL_LOG && CONFIG_THREE_PASS && CONFIG_BITRATE_ACCURACY |
1952 | | if (cpi->oxcf.pass == AOM_RC_THIRD_PASS) { |
1953 | | int frame_coding_idx = |
1954 | | av1_vbr_rc_frame_coding_idx(&cpi->vbr_rc_info, frame_idx); |
1955 | | rc_log_frame_stats(&cpi->rc_log, frame_coding_idx, |
1956 | | &cpi->td.tpl_txfm_stats); |
1957 | | } |
1958 | | #endif // CONFIG_RATECTRL_LOG |
1959 | |
|
1960 | 0 | aom_extend_frame_borders(tpl_data->tpl_frame[frame_idx].rec_picture, |
1961 | 0 | num_planes); |
1962 | 0 | } |
1963 | |
|
1964 | 0 | for (int frame_idx = tpl_gf_group_frames - 1; |
1965 | 0 | frame_idx >= cpi->gf_frame_index; --frame_idx) { |
1966 | 0 | if (skip_tpl_for_frame(gf_group, frame_idx, gop_eval, approx_gop_eval, |
1967 | 0 | reduce_num_frames)) |
1968 | 0 | continue; |
1969 | | |
1970 | 0 | mc_flow_synthesizer(tpl_data, frame_idx, cm->mi_params.mi_rows, |
1971 | 0 | cm->mi_params.mi_cols); |
1972 | 0 | } |
1973 | |
|
1974 | 0 | av1_configure_buffer_updates(cpi, &this_frame_params.refresh_frame, |
1975 | 0 | gf_group->update_type[cpi->gf_frame_index], |
1976 | 0 | gf_group->update_type[cpi->gf_frame_index], 0); |
1977 | 0 | cm->current_frame.frame_type = frame_params->frame_type; |
1978 | 0 | cm->show_frame = frame_params->show_frame; |
1979 | |
|
1980 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
1981 | | // Record the time if the function returns. |
1982 | | if (cpi->common.tiles.large_scale || gf_group->max_layer_depth_allowed == 0 || |
1983 | | !gop_eval) |
1984 | | end_timing(cpi, av1_tpl_setup_stats_time); |
1985 | | #endif |
1986 | |
|
1987 | 0 | tpl_dealloc_temp_buffers(tpl_tmp_buffers); |
1988 | |
|
1989 | 0 | if (!approx_gop_eval) { |
1990 | 0 | tpl_data->ready = 1; |
1991 | 0 | } |
1992 | 0 | if (cpi->common.tiles.large_scale) return 0; |
1993 | 0 | if (gf_group->max_layer_depth_allowed == 0) return 1; |
1994 | 0 | if (!gop_eval) return 0; |
1995 | 0 | assert(gf_group->arf_index >= 0); |
1996 | |
|
1997 | 0 | double beta[2] = { 0.0 }; |
1998 | 0 | const int frame_idx_0 = gf_group->arf_index; |
1999 | 0 | const int frame_idx_1 = |
2000 | 0 | AOMMIN(tpl_gf_group_frames - 1, gf_group->arf_index + 1); |
2001 | 0 | beta[0] = get_frame_importance(tpl_data, frame_idx_0); |
2002 | 0 | beta[1] = get_frame_importance(tpl_data, frame_idx_1); |
2003 | | #if CONFIG_COLLECT_COMPONENT_TIMING |
2004 | | end_timing(cpi, av1_tpl_setup_stats_time); |
2005 | | #endif |
2006 | 0 | return eval_gop_length(beta, gop_eval); |
2007 | 0 | } |
2008 | | |
2009 | 0 | void av1_tpl_rdmult_setup(AV1_COMP *cpi) { |
2010 | 0 | const AV1_COMMON *const cm = &cpi->common; |
2011 | 0 | const int tpl_idx = cpi->gf_frame_index; |
2012 | |
|
2013 | 0 | assert( |
2014 | 0 | IMPLIES(cpi->ppi->gf_group.size > 0, tpl_idx < cpi->ppi->gf_group.size)); |
2015 | |
|
2016 | 0 | TplParams *const tpl_data = &cpi->ppi->tpl_data; |
2017 | 0 | const TplDepFrame *const tpl_frame = &tpl_data->tpl_frame[tpl_idx]; |
2018 | |
|
2019 | 0 | if (!tpl_frame->is_valid) return; |
2020 | | |
2021 | 0 | const TplDepStats *const tpl_stats = tpl_frame->tpl_stats_ptr; |
2022 | 0 | const int tpl_stride = tpl_frame->stride; |
2023 | 0 | const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); |
2024 | |
|
2025 | 0 | const int block_size = BLOCK_16X16; |
2026 | 0 | const int num_mi_w = mi_size_wide[block_size]; |
2027 | 0 | const int num_mi_h = mi_size_high[block_size]; |
2028 | 0 | const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w; |
2029 | 0 | const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h; |
2030 | 0 | const double c = 1.2; |
2031 | 0 | const int step = 1 << tpl_data->tpl_stats_block_mis_log2; |
2032 | | |
2033 | | // Loop through each 'block_size' X 'block_size' block. |
2034 | 0 | for (int row = 0; row < num_rows; row++) { |
2035 | 0 | for (int col = 0; col < num_cols; col++) { |
2036 | 0 | double intra_cost = 0.0, mc_dep_cost = 0.0; |
2037 | | // Loop through each mi block. |
2038 | 0 | for (int mi_row = row * num_mi_h; mi_row < (row + 1) * num_mi_h; |
2039 | 0 | mi_row += step) { |
2040 | 0 | for (int mi_col = col * num_mi_w; mi_col < (col + 1) * num_mi_w; |
2041 | 0 | mi_col += step) { |
2042 | 0 | if (mi_row >= cm->mi_params.mi_rows || mi_col >= mi_cols_sr) continue; |
2043 | 0 | const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos( |
2044 | 0 | mi_row, mi_col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)]; |
2045 | 0 | int64_t mc_dep_delta = |
2046 | 0 | RDCOST(tpl_frame->base_rdmult, this_stats->mc_dep_rate, |
2047 | 0 | this_stats->mc_dep_dist); |
2048 | 0 | intra_cost += (double)(this_stats->recrf_dist << RDDIV_BITS); |
2049 | 0 | mc_dep_cost += |
2050 | 0 | (double)(this_stats->recrf_dist << RDDIV_BITS) + mc_dep_delta; |
2051 | 0 | } |
2052 | 0 | } |
2053 | 0 | const double rk = intra_cost / mc_dep_cost; |
2054 | 0 | const int index = row * num_cols + col; |
2055 | 0 | cpi->tpl_rdmult_scaling_factors[index] = rk / cpi->rd.r0 + c; |
2056 | 0 | } |
2057 | 0 | } |
2058 | 0 | } |
2059 | | |
2060 | | void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x, |
2061 | 0 | BLOCK_SIZE sb_size, int mi_row, int mi_col) { |
2062 | 0 | AV1_COMMON *const cm = &cpi->common; |
2063 | 0 | GF_GROUP *gf_group = &cpi->ppi->gf_group; |
2064 | 0 | assert(IMPLIES(cpi->ppi->gf_group.size > 0, |
2065 | 0 | cpi->gf_frame_index < cpi->ppi->gf_group.size)); |
2066 | 0 | const int tpl_idx = cpi->gf_frame_index; |
2067 | |
|
2068 | 0 | const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100)); |
2069 | 0 | const int layer_depth = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6); |
2070 | 0 | const FRAME_TYPE frame_type = cm->current_frame.frame_type; |
2071 | |
|
2072 | 0 | if (tpl_idx >= MAX_TPL_FRAME_IDX) return; |
2073 | 0 | TplDepFrame *tpl_frame = &cpi->ppi->tpl_data.tpl_frame[tpl_idx]; |
2074 | 0 | if (!tpl_frame->is_valid) return; |
2075 | 0 | if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return; |
2076 | 0 | if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return; |
2077 | | |
2078 | 0 | const int mi_col_sr = |
2079 | 0 | coded_to_superres_mi(mi_col, cm->superres_scale_denominator); |
2080 | 0 | const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width); |
2081 | 0 | const int sb_mi_width_sr = coded_to_superres_mi( |
2082 | 0 | mi_size_wide[sb_size], cm->superres_scale_denominator); |
2083 | |
|
2084 | 0 | const int bsize_base = BLOCK_16X16; |
2085 | 0 | const int num_mi_w = mi_size_wide[bsize_base]; |
2086 | 0 | const int num_mi_h = mi_size_high[bsize_base]; |
2087 | 0 | const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w; |
2088 | 0 | const int num_rows = (cm->mi_params.mi_rows + num_mi_h - 1) / num_mi_h; |
2089 | 0 | const int num_bcols = (sb_mi_width_sr + num_mi_w - 1) / num_mi_w; |
2090 | 0 | const int num_brows = (mi_size_high[sb_size] + num_mi_h - 1) / num_mi_h; |
2091 | 0 | int row, col; |
2092 | |
|
2093 | 0 | double base_block_count = 0.0; |
2094 | 0 | double log_sum = 0.0; |
2095 | |
|
2096 | 0 | for (row = mi_row / num_mi_w; |
2097 | 0 | row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
2098 | 0 | for (col = mi_col_sr / num_mi_h; |
2099 | 0 | col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) { |
2100 | 0 | const int index = row * num_cols + col; |
2101 | 0 | log_sum += log(cpi->tpl_rdmult_scaling_factors[index]); |
2102 | 0 | base_block_count += 1.0; |
2103 | 0 | } |
2104 | 0 | } |
2105 | |
|
2106 | 0 | const CommonQuantParams *quant_params = &cm->quant_params; |
2107 | |
|
2108 | 0 | const int orig_qindex_rdmult = |
2109 | 0 | quant_params->base_qindex + quant_params->y_dc_delta_q; |
2110 | 0 | const int orig_rdmult = av1_compute_rd_mult( |
2111 | 0 | orig_qindex_rdmult, cm->seq_params->bit_depth, |
2112 | 0 | cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth, |
2113 | 0 | boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets, |
2114 | 0 | is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning); |
2115 | |
|
2116 | 0 | const int new_qindex_rdmult = quant_params->base_qindex + |
2117 | 0 | x->rdmult_delta_qindex + |
2118 | 0 | quant_params->y_dc_delta_q; |
2119 | 0 | const int new_rdmult = av1_compute_rd_mult( |
2120 | 0 | new_qindex_rdmult, cm->seq_params->bit_depth, |
2121 | 0 | cpi->ppi->gf_group.update_type[cpi->gf_frame_index], layer_depth, |
2122 | 0 | boost_index, frame_type, cpi->oxcf.q_cfg.use_fixed_qp_offsets, |
2123 | 0 | is_stat_consumption_stage(cpi), cpi->oxcf.tune_cfg.tuning); |
2124 | |
|
2125 | 0 | const double scaling_factor = (double)new_rdmult / (double)orig_rdmult; |
2126 | |
|
2127 | 0 | double scale_adj = log(scaling_factor) - log_sum / base_block_count; |
2128 | 0 | scale_adj = exp_bounded(scale_adj); |
2129 | |
|
2130 | 0 | for (row = mi_row / num_mi_w; |
2131 | 0 | row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
2132 | 0 | for (col = mi_col_sr / num_mi_h; |
2133 | 0 | col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) { |
2134 | 0 | const int index = row * num_cols + col; |
2135 | 0 | cpi->ppi->tpl_sb_rdmult_scaling_factors[index] = |
2136 | 0 | scale_adj * cpi->tpl_rdmult_scaling_factors[index]; |
2137 | 0 | } |
2138 | 0 | } |
2139 | 0 | } |
2140 | | |
2141 | 0 | double av1_exponential_entropy(double q_step, double b) { |
2142 | 0 | b = AOMMAX(b, TPL_EPSILON); |
2143 | 0 | double z = fmax(exp_bounded(-q_step / b), TPL_EPSILON); |
2144 | 0 | return -log2(1 - z) - z * log2(z) / (1 - z); |
2145 | 0 | } |
2146 | | |
2147 | 0 | double av1_laplace_entropy(double q_step, double b, double zero_bin_ratio) { |
2148 | | // zero bin's size is zero_bin_ratio * q_step |
2149 | | // non-zero bin's size is q_step |
2150 | 0 | b = AOMMAX(b, TPL_EPSILON); |
2151 | 0 | double z = fmax(exp_bounded(-zero_bin_ratio / 2 * q_step / b), TPL_EPSILON); |
2152 | 0 | double h = av1_exponential_entropy(q_step, b); |
2153 | 0 | double r = -(1 - z) * log2(1 - z) - z * log2(z) + z * (h + 1); |
2154 | 0 | return r; |
2155 | 0 | } |
2156 | | |
2157 | | #if CONFIG_BITRATE_ACCURACY |
2158 | | double av1_laplace_estimate_frame_rate(int q_index, int block_count, |
2159 | | const double *abs_coeff_mean, |
2160 | | int coeff_num) { |
2161 | | double zero_bin_ratio = 2; |
2162 | | double dc_q_step = av1_dc_quant_QTX(q_index, 0, AOM_BITS_8) / 4.; |
2163 | | double ac_q_step = av1_ac_quant_QTX(q_index, 0, AOM_BITS_8) / 4.; |
2164 | | double est_rate = 0; |
2165 | | // dc coeff |
2166 | | est_rate += av1_laplace_entropy(dc_q_step, abs_coeff_mean[0], zero_bin_ratio); |
2167 | | // ac coeff |
2168 | | for (int i = 1; i < coeff_num; ++i) { |
2169 | | est_rate += |
2170 | | av1_laplace_entropy(ac_q_step, abs_coeff_mean[i], zero_bin_ratio); |
2171 | | } |
2172 | | est_rate *= block_count; |
2173 | | return est_rate; |
2174 | | } |
2175 | | #endif // CONFIG_BITRATE_ACCURACY |
2176 | | |
2177 | | double av1_estimate_coeff_entropy(double q_step, double b, |
2178 | 0 | double zero_bin_ratio, int qcoeff) { |
2179 | 0 | b = AOMMAX(b, TPL_EPSILON); |
2180 | 0 | int abs_qcoeff = abs(qcoeff); |
2181 | 0 | double z0 = fmax(exp_bounded(-zero_bin_ratio / 2 * q_step / b), TPL_EPSILON); |
2182 | 0 | if (abs_qcoeff == 0) { |
2183 | 0 | double r = -log2(1 - z0); |
2184 | 0 | return r; |
2185 | 0 | } else { |
2186 | 0 | double z = fmax(exp_bounded(-q_step / b), TPL_EPSILON); |
2187 | 0 | double r = 1 - log2(z0) - log2(1 - z) - (abs_qcoeff - 1) * log2(z); |
2188 | 0 | return r; |
2189 | 0 | } |
2190 | 0 | } |
2191 | | |
2192 | | #if CONFIG_RD_COMMAND |
2193 | | void av1_read_rd_command(const char *filepath, RD_COMMAND *rd_command) { |
2194 | | FILE *fptr = fopen(filepath, "r"); |
2195 | | fscanf(fptr, "%d", &rd_command->frame_count); |
2196 | | rd_command->frame_index = 0; |
2197 | | for (int i = 0; i < rd_command->frame_count; ++i) { |
2198 | | int option; |
2199 | | fscanf(fptr, "%d", &option); |
2200 | | rd_command->option_ls[i] = (RD_OPTION)option; |
2201 | | if (option == RD_OPTION_SET_Q) { |
2202 | | fscanf(fptr, "%d", &rd_command->q_index_ls[i]); |
2203 | | } else if (option == RD_OPTION_SET_Q_RDMULT) { |
2204 | | fscanf(fptr, "%d", &rd_command->q_index_ls[i]); |
2205 | | fscanf(fptr, "%d", &rd_command->rdmult_ls[i]); |
2206 | | } |
2207 | | } |
2208 | | fclose(fptr); |
2209 | | } |
2210 | | #endif // CONFIG_RD_COMMAND |
2211 | | |
2212 | 0 | double av1_tpl_get_qstep_ratio(const TplParams *tpl_data, int gf_frame_index) { |
2213 | 0 | if (!av1_tpl_stats_ready(tpl_data, gf_frame_index)) { |
2214 | 0 | return 1; |
2215 | 0 | } |
2216 | 0 | const double frame_importance = |
2217 | 0 | get_frame_importance(tpl_data, gf_frame_index); |
2218 | 0 | return sqrt(1 / frame_importance); |
2219 | 0 | } |
2220 | | |
2221 | | int av1_get_q_index_from_qstep_ratio(int leaf_qindex, double qstep_ratio, |
2222 | 0 | aom_bit_depth_t bit_depth) { |
2223 | 0 | const double leaf_qstep = av1_dc_quant_QTX(leaf_qindex, 0, bit_depth); |
2224 | 0 | const double target_qstep = leaf_qstep * qstep_ratio; |
2225 | 0 | int qindex = leaf_qindex; |
2226 | 0 | if (qstep_ratio < 1.0) { |
2227 | 0 | for (qindex = leaf_qindex; qindex > 0; --qindex) { |
2228 | 0 | const double qstep = av1_dc_quant_QTX(qindex, 0, bit_depth); |
2229 | 0 | if (qstep <= target_qstep) break; |
2230 | 0 | } |
2231 | 0 | } else { |
2232 | 0 | for (qindex = leaf_qindex; qindex <= MAXQ; ++qindex) { |
2233 | 0 | const double qstep = av1_dc_quant_QTX(qindex, 0, bit_depth); |
2234 | 0 | if (qstep >= target_qstep) break; |
2235 | 0 | } |
2236 | 0 | } |
2237 | 0 | return qindex; |
2238 | 0 | } |
2239 | | |
2240 | | int av1_tpl_get_q_index(const TplParams *tpl_data, int gf_frame_index, |
2241 | 0 | int leaf_qindex, aom_bit_depth_t bit_depth) { |
2242 | 0 | const double qstep_ratio = av1_tpl_get_qstep_ratio(tpl_data, gf_frame_index); |
2243 | 0 | return av1_get_q_index_from_qstep_ratio(leaf_qindex, qstep_ratio, bit_depth); |
2244 | 0 | } |
2245 | | |
2246 | | #if CONFIG_BITRATE_ACCURACY |
2247 | | void av1_vbr_rc_init(VBR_RATECTRL_INFO *vbr_rc_info, double total_bit_budget, |
2248 | | int show_frame_count) { |
2249 | | av1_zero(*vbr_rc_info); |
2250 | | vbr_rc_info->ready = 0; |
2251 | | vbr_rc_info->total_bit_budget = total_bit_budget; |
2252 | | vbr_rc_info->show_frame_count = show_frame_count; |
2253 | | const double scale_factors[FRAME_UPDATE_TYPES] = { 0.94559, 0.94559, 1, |
2254 | | 0.94559, 1, 1, |
2255 | | 0.94559 }; |
2256 | | |
2257 | | // TODO(angiebird): Based on the previous code, only the scale factor 0.94559 |
2258 | | // will be used in most of the cases with --limi=17. Figure out if the |
2259 | | // following scale factors works better. |
2260 | | // const double scale_factors[FRAME_UPDATE_TYPES] = { 0.94559, 0.12040, 1, |
2261 | | // 1.10199, 1, 1, |
2262 | | // 0.16393 }; |
2263 | | |
2264 | | const double mv_scale_factors[FRAME_UPDATE_TYPES] = { 3, 3, 3, 3, 3, 3, 3 }; |
2265 | | memcpy(vbr_rc_info->scale_factors, scale_factors, |
2266 | | sizeof(scale_factors[0]) * FRAME_UPDATE_TYPES); |
2267 | | memcpy(vbr_rc_info->mv_scale_factors, mv_scale_factors, |
2268 | | sizeof(mv_scale_factors[0]) * FRAME_UPDATE_TYPES); |
2269 | | |
2270 | | vbr_rc_reset_gop_data(vbr_rc_info); |
2271 | | #if CONFIG_THREE_PASS |
2272 | | // TODO(angiebird): Explain why we use -1 here |
2273 | | vbr_rc_info->cur_gop_idx = -1; |
2274 | | vbr_rc_info->gop_count = 0; |
2275 | | vbr_rc_info->total_frame_count = 0; |
2276 | | #endif // CONFIG_THREE_PASS |
2277 | | } |
2278 | | |
2279 | | #if CONFIG_THREE_PASS |
2280 | | int av1_vbr_rc_frame_coding_idx(const VBR_RATECTRL_INFO *vbr_rc_info, |
2281 | | int gf_frame_index) { |
2282 | | int gop_idx = vbr_rc_info->cur_gop_idx; |
2283 | | int gop_start_idx = vbr_rc_info->gop_start_idx_list[gop_idx]; |
2284 | | return gop_start_idx + gf_frame_index; |
2285 | | } |
2286 | | |
2287 | | void av1_vbr_rc_append_tpl_info(VBR_RATECTRL_INFO *vbr_rc_info, |
2288 | | const TPL_INFO *tpl_info) { |
2289 | | int gop_start_idx = vbr_rc_info->total_frame_count; |
2290 | | vbr_rc_info->gop_start_idx_list[vbr_rc_info->gop_count] = gop_start_idx; |
2291 | | vbr_rc_info->gop_length_list[vbr_rc_info->gop_count] = tpl_info->gf_length; |
2292 | | assert(gop_start_idx + tpl_info->gf_length <= VBR_RC_INFO_MAX_FRAMES); |
2293 | | for (int i = 0; i < tpl_info->gf_length; ++i) { |
2294 | | vbr_rc_info->txfm_stats_list[gop_start_idx + i] = |
2295 | | tpl_info->txfm_stats_list[i]; |
2296 | | vbr_rc_info->qstep_ratio_list[gop_start_idx + i] = |
2297 | | tpl_info->qstep_ratio_ls[i]; |
2298 | | vbr_rc_info->update_type_list[gop_start_idx + i] = |
2299 | | tpl_info->update_type_list[i]; |
2300 | | } |
2301 | | vbr_rc_info->total_frame_count += tpl_info->gf_length; |
2302 | | vbr_rc_info->gop_count++; |
2303 | | } |
2304 | | #endif // CONFIG_THREE_PASS |
2305 | | |
2306 | | void av1_vbr_rc_set_gop_bit_budget(VBR_RATECTRL_INFO *vbr_rc_info, |
2307 | | int gop_showframe_count) { |
2308 | | vbr_rc_info->gop_showframe_count = gop_showframe_count; |
2309 | | vbr_rc_info->gop_bit_budget = vbr_rc_info->total_bit_budget * |
2310 | | gop_showframe_count / |
2311 | | vbr_rc_info->show_frame_count; |
2312 | | } |
2313 | | |
2314 | | void av1_vbr_rc_compute_q_indices(int base_q_index, int frame_count, |
2315 | | const double *qstep_ratio_list, |
2316 | | aom_bit_depth_t bit_depth, |
2317 | | int *q_index_list) { |
2318 | | for (int i = 0; i < frame_count; ++i) { |
2319 | | q_index_list[i] = av1_get_q_index_from_qstep_ratio( |
2320 | | base_q_index, qstep_ratio_list[i], bit_depth); |
2321 | | } |
2322 | | } |
2323 | | |
2324 | | double av1_vbr_rc_info_estimate_gop_bitrate( |
2325 | | int base_q_index, aom_bit_depth_t bit_depth, |
2326 | | const double *update_type_scale_factors, int frame_count, |
2327 | | const FRAME_UPDATE_TYPE *update_type_list, const double *qstep_ratio_list, |
2328 | | const TplTxfmStats *stats_list, int *q_index_list, |
2329 | | double *estimated_bitrate_byframe) { |
2330 | | av1_vbr_rc_compute_q_indices(base_q_index, frame_count, qstep_ratio_list, |
2331 | | bit_depth, q_index_list); |
2332 | | double estimated_gop_bitrate = 0; |
2333 | | for (int frame_index = 0; frame_index < frame_count; frame_index++) { |
2334 | | const TplTxfmStats *frame_stats = &stats_list[frame_index]; |
2335 | | double frame_bitrate = 0; |
2336 | | if (frame_stats->ready) { |
2337 | | int q_index = q_index_list[frame_index]; |
2338 | | |
2339 | | frame_bitrate = av1_laplace_estimate_frame_rate( |
2340 | | q_index, frame_stats->txfm_block_count, frame_stats->abs_coeff_mean, |
2341 | | frame_stats->coeff_num); |
2342 | | } |
2343 | | FRAME_UPDATE_TYPE update_type = update_type_list[frame_index]; |
2344 | | estimated_gop_bitrate += |
2345 | | frame_bitrate * update_type_scale_factors[update_type]; |
2346 | | if (estimated_bitrate_byframe != NULL) { |
2347 | | estimated_bitrate_byframe[frame_index] = frame_bitrate; |
2348 | | } |
2349 | | } |
2350 | | return estimated_gop_bitrate; |
2351 | | } |
2352 | | |
2353 | | int av1_vbr_rc_info_estimate_base_q( |
2354 | | double bit_budget, aom_bit_depth_t bit_depth, |
2355 | | const double *update_type_scale_factors, int frame_count, |
2356 | | const FRAME_UPDATE_TYPE *update_type_list, const double *qstep_ratio_list, |
2357 | | const TplTxfmStats *stats_list, int *q_index_list, |
2358 | | double *estimated_bitrate_byframe) { |
2359 | | int q_max = 255; // Maximum q value. |
2360 | | int q_min = 0; // Minimum q value. |
2361 | | int q = (q_max + q_min) / 2; |
2362 | | |
2363 | | double q_max_estimate = av1_vbr_rc_info_estimate_gop_bitrate( |
2364 | | q_max, bit_depth, update_type_scale_factors, frame_count, |
2365 | | update_type_list, qstep_ratio_list, stats_list, q_index_list, |
2366 | | estimated_bitrate_byframe); |
2367 | | |
2368 | | double q_min_estimate = av1_vbr_rc_info_estimate_gop_bitrate( |
2369 | | q_min, bit_depth, update_type_scale_factors, frame_count, |
2370 | | update_type_list, qstep_ratio_list, stats_list, q_index_list, |
2371 | | estimated_bitrate_byframe); |
2372 | | while (q_min + 1 < q_max) { |
2373 | | double estimate = av1_vbr_rc_info_estimate_gop_bitrate( |
2374 | | q, bit_depth, update_type_scale_factors, frame_count, update_type_list, |
2375 | | qstep_ratio_list, stats_list, q_index_list, estimated_bitrate_byframe); |
2376 | | if (estimate > bit_budget) { |
2377 | | q_min = q; |
2378 | | q_min_estimate = estimate; |
2379 | | } else { |
2380 | | q_max = q; |
2381 | | q_max_estimate = estimate; |
2382 | | } |
2383 | | q = (q_max + q_min) / 2; |
2384 | | } |
2385 | | // Pick the estimate that lands closest to the budget. |
2386 | | if (fabs(q_max_estimate - bit_budget) < fabs(q_min_estimate - bit_budget)) { |
2387 | | q = q_max; |
2388 | | } else { |
2389 | | q = q_min; |
2390 | | } |
2391 | | // Update q_index_list and vbr_rc_info. |
2392 | | av1_vbr_rc_info_estimate_gop_bitrate( |
2393 | | q, bit_depth, update_type_scale_factors, frame_count, update_type_list, |
2394 | | qstep_ratio_list, stats_list, q_index_list, estimated_bitrate_byframe); |
2395 | | return q; |
2396 | | } |
2397 | | void av1_vbr_rc_update_q_index_list(VBR_RATECTRL_INFO *vbr_rc_info, |
2398 | | const TplParams *tpl_data, |
2399 | | const GF_GROUP *gf_group, |
2400 | | aom_bit_depth_t bit_depth) { |
2401 | | vbr_rc_info->q_index_list_ready = 1; |
2402 | | double gop_bit_budget = vbr_rc_info->gop_bit_budget; |
2403 | | |
2404 | | for (int i = 0; i < gf_group->size; i++) { |
2405 | | vbr_rc_info->qstep_ratio_list[i] = av1_tpl_get_qstep_ratio(tpl_data, i); |
2406 | | } |
2407 | | |
2408 | | double mv_bits = 0; |
2409 | | for (int i = 0; i < gf_group->size; i++) { |
2410 | | double frame_mv_bits = 0; |
2411 | | if (av1_tpl_stats_ready(tpl_data, i)) { |
2412 | | TplDepFrame *tpl_frame = &tpl_data->tpl_frame[i]; |
2413 | | frame_mv_bits = av1_tpl_compute_frame_mv_entropy( |
2414 | | tpl_frame, tpl_data->tpl_stats_block_mis_log2); |
2415 | | FRAME_UPDATE_TYPE updae_type = gf_group->update_type[i]; |
2416 | | mv_bits += frame_mv_bits * vbr_rc_info->mv_scale_factors[updae_type]; |
2417 | | } |
2418 | | } |
2419 | | |
2420 | | mv_bits = AOMMIN(mv_bits, 0.6 * gop_bit_budget); |
2421 | | gop_bit_budget -= mv_bits; |
2422 | | |
2423 | | vbr_rc_info->base_q_index = av1_vbr_rc_info_estimate_base_q( |
2424 | | gop_bit_budget, bit_depth, vbr_rc_info->scale_factors, gf_group->size, |
2425 | | gf_group->update_type, vbr_rc_info->qstep_ratio_list, |
2426 | | tpl_data->txfm_stats_list, vbr_rc_info->q_index_list, NULL); |
2427 | | } |
2428 | | |
2429 | | #endif // CONFIG_BITRATE_ACCURACY |
2430 | | |
2431 | | // Use upper and left neighbor block as the reference MVs. |
2432 | | // Compute the minimum difference between current MV and reference MV. |
2433 | | int_mv av1_compute_mv_difference(const TplDepFrame *tpl_frame, int row, int col, |
2434 | 0 | int step, int tpl_stride, int right_shift) { |
2435 | 0 | const TplDepStats *tpl_stats = |
2436 | 0 | &tpl_frame |
2437 | 0 | ->tpl_stats_ptr[av1_tpl_ptr_pos(row, col, tpl_stride, right_shift)]; |
2438 | 0 | int_mv current_mv = tpl_stats->mv[tpl_stats->ref_frame_index[0]]; |
2439 | 0 | int current_mv_magnitude = |
2440 | 0 | abs(current_mv.as_mv.row) + abs(current_mv.as_mv.col); |
2441 | | |
2442 | | // Retrieve the up and left neighbors. |
2443 | 0 | int up_error = INT_MAX; |
2444 | 0 | int_mv up_mv_diff; |
2445 | 0 | if (row - step >= 0) { |
2446 | 0 | tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( |
2447 | 0 | row - step, col, tpl_stride, right_shift)]; |
2448 | 0 | up_mv_diff = tpl_stats->mv[tpl_stats->ref_frame_index[0]]; |
2449 | 0 | up_mv_diff.as_mv.row = current_mv.as_mv.row - up_mv_diff.as_mv.row; |
2450 | 0 | up_mv_diff.as_mv.col = current_mv.as_mv.col - up_mv_diff.as_mv.col; |
2451 | 0 | up_error = abs(up_mv_diff.as_mv.row) + abs(up_mv_diff.as_mv.col); |
2452 | 0 | } |
2453 | |
|
2454 | 0 | int left_error = INT_MAX; |
2455 | 0 | int_mv left_mv_diff; |
2456 | 0 | if (col - step >= 0) { |
2457 | 0 | tpl_stats = &tpl_frame->tpl_stats_ptr[av1_tpl_ptr_pos( |
2458 | 0 | row, col - step, tpl_stride, right_shift)]; |
2459 | 0 | left_mv_diff = tpl_stats->mv[tpl_stats->ref_frame_index[0]]; |
2460 | 0 | left_mv_diff.as_mv.row = current_mv.as_mv.row - left_mv_diff.as_mv.row; |
2461 | 0 | left_mv_diff.as_mv.col = current_mv.as_mv.col - left_mv_diff.as_mv.col; |
2462 | 0 | left_error = abs(left_mv_diff.as_mv.row) + abs(left_mv_diff.as_mv.col); |
2463 | 0 | } |
2464 | | |
2465 | | // Return the MV with the minimum distance from current. |
2466 | 0 | if (up_error < left_error && up_error < current_mv_magnitude) { |
2467 | 0 | return up_mv_diff; |
2468 | 0 | } else if (left_error < up_error && left_error < current_mv_magnitude) { |
2469 | 0 | return left_mv_diff; |
2470 | 0 | } |
2471 | 0 | return current_mv; |
2472 | 0 | } |
2473 | | |
2474 | | /* Compute the entropy of motion vectors for a single frame. */ |
2475 | | double av1_tpl_compute_frame_mv_entropy(const TplDepFrame *tpl_frame, |
2476 | 0 | uint8_t right_shift) { |
2477 | 0 | if (!tpl_frame->is_valid) { |
2478 | 0 | return 0; |
2479 | 0 | } |
2480 | | |
2481 | 0 | int count_row[500] = { 0 }; |
2482 | 0 | int count_col[500] = { 0 }; |
2483 | 0 | int n = 0; // number of MVs to process |
2484 | |
|
2485 | 0 | const int tpl_stride = tpl_frame->stride; |
2486 | 0 | const int step = 1 << right_shift; |
2487 | |
|
2488 | 0 | for (int row = 0; row < tpl_frame->mi_rows; row += step) { |
2489 | 0 | for (int col = 0; col < tpl_frame->mi_cols; col += step) { |
2490 | 0 | int_mv mv = av1_compute_mv_difference(tpl_frame, row, col, step, |
2491 | 0 | tpl_stride, right_shift); |
2492 | 0 | count_row[clamp(mv.as_mv.row, 0, 499)] += 1; |
2493 | 0 | count_col[clamp(mv.as_mv.row, 0, 499)] += 1; |
2494 | 0 | n += 1; |
2495 | 0 | } |
2496 | 0 | } |
2497 | | |
2498 | | // Estimate the bits used using the entropy formula. |
2499 | 0 | double rate_row = 0; |
2500 | 0 | double rate_col = 0; |
2501 | 0 | for (int i = 0; i < 500; i++) { |
2502 | 0 | if (count_row[i] != 0) { |
2503 | 0 | double p = count_row[i] / (double)n; |
2504 | 0 | rate_row += count_row[i] * -log2(p); |
2505 | 0 | } |
2506 | 0 | if (count_col[i] != 0) { |
2507 | 0 | double p = count_col[i] / (double)n; |
2508 | 0 | rate_col += count_col[i] * -log2(p); |
2509 | 0 | } |
2510 | 0 | } |
2511 | |
|
2512 | 0 | return rate_row + rate_col; |
2513 | 0 | } |