/src/aom/av1/encoder/allintra_vis.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2021, Alliance for Open Media. All rights reserved. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include <assert.h> |
13 | | |
14 | | #include "config/aom_config.h" |
15 | | |
16 | | #include "aom_util/aom_pthread.h" |
17 | | |
18 | | #if CONFIG_TFLITE |
19 | | #include "tensorflow/lite/c/c_api.h" |
20 | | #include "av1/encoder/deltaq4_model.c" |
21 | | #endif |
22 | | |
23 | | #include "av1/common/common_data.h" |
24 | | #include "av1/common/enums.h" |
25 | | #include "av1/common/idct.h" |
26 | | #include "av1/common/reconinter.h" |
27 | | #include "av1/encoder/allintra_vis.h" |
28 | | #include "av1/encoder/aq_variance.h" |
29 | | #include "av1/encoder/encoder.h" |
30 | | #include "av1/encoder/ethread.h" |
31 | | #include "av1/encoder/hybrid_fwd_txfm.h" |
32 | | #include "av1/encoder/model_rd.h" |
33 | | #include "av1/encoder/rdopt_utils.h" |
34 | | |
35 | 0 | #define MB_WIENER_PRED_BLOCK_SIZE BLOCK_128X128 |
36 | 0 | #define MB_WIENER_PRED_BUF_STRIDE 128 |
37 | | |
38 | | // Maximum delta-q range allowed for Variance Boost after scaling |
39 | | #define VAR_BOOST_MAX_DELTAQ_RANGE 80 |
40 | | // Maximum quantization step boost allowed for Variance Boost |
41 | 0 | #define VAR_BOOST_MAX_BOOST 8.0 |
42 | | |
43 | 0 | void av1_alloc_mb_wiener_var_pred_buf(AV1_COMMON *cm, ThreadData *td) { |
44 | 0 | const int is_high_bitdepth = is_cur_buf_hbd(&td->mb.e_mbd); |
45 | 0 | assert(MB_WIENER_PRED_BLOCK_SIZE < BLOCK_SIZES_ALL); |
46 | 0 | const int buf_width = block_size_wide[MB_WIENER_PRED_BLOCK_SIZE]; |
47 | 0 | const int buf_height = block_size_high[MB_WIENER_PRED_BLOCK_SIZE]; |
48 | 0 | assert(buf_width == MB_WIENER_PRED_BUF_STRIDE); |
49 | 0 | const size_t buf_size = |
50 | 0 | (buf_width * buf_height * sizeof(*td->wiener_tmp_pred_buf)) |
51 | 0 | << is_high_bitdepth; |
52 | 0 | CHECK_MEM_ERROR(cm, td->wiener_tmp_pred_buf, aom_memalign(32, buf_size)); |
53 | 0 | } |
54 | | |
55 | 0 | void av1_dealloc_mb_wiener_var_pred_buf(ThreadData *td) { |
56 | 0 | aom_free(td->wiener_tmp_pred_buf); |
57 | 0 | td->wiener_tmp_pred_buf = NULL; |
58 | 0 | } |
59 | | |
60 | 0 | void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) { |
61 | 0 | AV1_COMMON *cm = &cpi->common; |
62 | | |
63 | | // This block size is also used to determine number of workers in |
64 | | // multi-threading. If it is changed, one needs to change it accordingly in |
65 | | // "compute_num_ai_workers()". |
66 | 0 | cpi->weber_bsize = BLOCK_8X8; |
67 | |
|
68 | 0 | if (cpi->oxcf.enable_rate_guide_deltaq) { |
69 | 0 | if (cpi->mb_weber_stats && cpi->prep_rate_estimates && |
70 | 0 | cpi->ext_rate_distribution) |
71 | 0 | return; |
72 | 0 | } else { |
73 | 0 | if (cpi->mb_weber_stats) return; |
74 | 0 | } |
75 | | |
76 | 0 | CHECK_MEM_ERROR(cm, cpi->mb_weber_stats, |
77 | 0 | aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols, |
78 | 0 | sizeof(*cpi->mb_weber_stats))); |
79 | |
|
80 | 0 | if (cpi->oxcf.enable_rate_guide_deltaq) { |
81 | 0 | CHECK_MEM_ERROR( |
82 | 0 | cm, cpi->prep_rate_estimates, |
83 | 0 | aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols, |
84 | 0 | sizeof(*cpi->prep_rate_estimates))); |
85 | |
|
86 | 0 | CHECK_MEM_ERROR( |
87 | 0 | cm, cpi->ext_rate_distribution, |
88 | 0 | aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols, |
89 | 0 | sizeof(*cpi->ext_rate_distribution))); |
90 | 0 | } |
91 | 0 | } |
92 | | |
93 | | static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, |
94 | 0 | int mi_col) { |
95 | 0 | AV1_COMMON *const cm = &cpi->common; |
96 | 0 | const int mi_wide = mi_size_wide[bsize]; |
97 | 0 | const int mi_high = mi_size_high[bsize]; |
98 | |
|
99 | 0 | const int mi_step = mi_size_wide[cpi->weber_bsize]; |
100 | 0 | int mb_stride = cpi->frame_info.mi_cols; |
101 | 0 | int mb_count = 0; |
102 | 0 | int64_t satd = 0; |
103 | |
|
104 | 0 | for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { |
105 | 0 | for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { |
106 | 0 | if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) |
107 | 0 | continue; |
108 | | |
109 | 0 | satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)] |
110 | 0 | .satd; |
111 | 0 | ++mb_count; |
112 | 0 | } |
113 | 0 | } |
114 | |
|
115 | 0 | if (mb_count) satd = (int)(satd / mb_count); |
116 | 0 | satd = AOMMAX(1, satd); |
117 | |
|
118 | 0 | return (int)satd; |
119 | 0 | } |
120 | | |
121 | | static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, |
122 | 0 | int mi_col) { |
123 | 0 | AV1_COMMON *const cm = &cpi->common; |
124 | 0 | const int mi_wide = mi_size_wide[bsize]; |
125 | 0 | const int mi_high = mi_size_high[bsize]; |
126 | |
|
127 | 0 | const int mi_step = mi_size_wide[cpi->weber_bsize]; |
128 | 0 | int mb_stride = cpi->frame_info.mi_cols; |
129 | 0 | int mb_count = 0; |
130 | 0 | int64_t distortion = 0; |
131 | |
|
132 | 0 | for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { |
133 | 0 | for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { |
134 | 0 | if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) |
135 | 0 | continue; |
136 | | |
137 | 0 | distortion += |
138 | 0 | cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)] |
139 | 0 | .distortion; |
140 | 0 | ++mb_count; |
141 | 0 | } |
142 | 0 | } |
143 | |
|
144 | 0 | if (mb_count) distortion = (int)(distortion / mb_count); |
145 | 0 | distortion = AOMMAX(1, distortion); |
146 | |
|
147 | 0 | return (int)distortion; |
148 | 0 | } |
149 | | |
150 | | static double get_max_scale(const AV1_COMP *const cpi, BLOCK_SIZE bsize, |
151 | 0 | int mi_row, int mi_col) { |
152 | 0 | const AV1_COMMON *const cm = &cpi->common; |
153 | 0 | const int mi_wide = mi_size_wide[bsize]; |
154 | 0 | const int mi_high = mi_size_high[bsize]; |
155 | 0 | const int mi_step = mi_size_wide[cpi->weber_bsize]; |
156 | 0 | int mb_stride = cpi->frame_info.mi_cols; |
157 | 0 | double min_max_scale = 10.0; |
158 | |
|
159 | 0 | for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { |
160 | 0 | for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { |
161 | 0 | if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) |
162 | 0 | continue; |
163 | 0 | const WeberStats *weber_stats = |
164 | 0 | &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]; |
165 | 0 | if (weber_stats->max_scale < 1.0) continue; |
166 | 0 | if (weber_stats->max_scale < min_max_scale) |
167 | 0 | min_max_scale = weber_stats->max_scale; |
168 | 0 | } |
169 | 0 | } |
170 | 0 | return min_max_scale; |
171 | 0 | } |
172 | | |
173 | | static int get_window_wiener_var(const AV1_COMP *const cpi, BLOCK_SIZE bsize, |
174 | 0 | int mi_row, int mi_col) { |
175 | 0 | const AV1_COMMON *const cm = &cpi->common; |
176 | 0 | const int mi_wide = mi_size_wide[bsize]; |
177 | 0 | const int mi_high = mi_size_high[bsize]; |
178 | |
|
179 | 0 | const int mi_step = mi_size_wide[cpi->weber_bsize]; |
180 | 0 | int sb_wiener_var = 0; |
181 | 0 | int mb_stride = cpi->frame_info.mi_cols; |
182 | 0 | int mb_count = 0; |
183 | 0 | double base_num = 1; |
184 | 0 | double base_den = 1; |
185 | 0 | double base_reg = 1; |
186 | |
|
187 | 0 | for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { |
188 | 0 | for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { |
189 | 0 | if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) |
190 | 0 | continue; |
191 | | |
192 | 0 | const WeberStats *weber_stats = |
193 | 0 | &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]; |
194 | |
|
195 | 0 | base_num += ((double)weber_stats->distortion) * |
196 | 0 | sqrt((double)weber_stats->src_variance) * |
197 | 0 | weber_stats->rec_pix_max; |
198 | |
|
199 | 0 | base_den += fabs( |
200 | 0 | weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) - |
201 | 0 | weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance)); |
202 | |
|
203 | 0 | base_reg += sqrt((double)weber_stats->distortion) * |
204 | 0 | sqrt((double)weber_stats->src_pix_max) * 0.1; |
205 | 0 | ++mb_count; |
206 | 0 | } |
207 | 0 | } |
208 | |
|
209 | 0 | sb_wiener_var = |
210 | 0 | (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count); |
211 | 0 | sb_wiener_var = AOMMAX(1, sb_wiener_var); |
212 | |
|
213 | 0 | return (int)sb_wiener_var; |
214 | 0 | } |
215 | | |
216 | | static int get_var_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize, |
217 | 0 | int mi_row, int mi_col) { |
218 | 0 | const AV1_COMMON *const cm = &cpi->common; |
219 | 0 | const int mi_wide = mi_size_wide[bsize]; |
220 | 0 | const int mi_high = mi_size_high[bsize]; |
221 | |
|
222 | 0 | int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col); |
223 | |
|
224 | 0 | if (mi_row >= (mi_high / 2)) { |
225 | 0 | sb_wiener_var = |
226 | 0 | AOMMIN(sb_wiener_var, |
227 | 0 | get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col)); |
228 | 0 | } |
229 | 0 | if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) { |
230 | 0 | sb_wiener_var = |
231 | 0 | AOMMIN(sb_wiener_var, |
232 | 0 | get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col)); |
233 | 0 | } |
234 | 0 | if (mi_col >= (mi_wide / 2)) { |
235 | 0 | sb_wiener_var = |
236 | 0 | AOMMIN(sb_wiener_var, |
237 | 0 | get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2)); |
238 | 0 | } |
239 | 0 | if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) { |
240 | 0 | sb_wiener_var = |
241 | 0 | AOMMIN(sb_wiener_var, |
242 | 0 | get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2)); |
243 | 0 | } |
244 | |
|
245 | 0 | return sb_wiener_var; |
246 | 0 | } |
247 | | |
248 | 0 | static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) { |
249 | 0 | const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; |
250 | |
|
251 | 0 | assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob); |
252 | 0 | int rate_cost = 1; |
253 | |
|
254 | 0 | for (int idx = 0; idx < eob; ++idx) { |
255 | 0 | int abs_level = abs(qcoeff[scan_order->scan[idx]]); |
256 | 0 | rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1 + (abs_level > 0); |
257 | 0 | } |
258 | |
|
259 | 0 | return (rate_cost << AV1_PROB_COST_SHIFT); |
260 | 0 | } |
261 | | |
262 | | void av1_calc_mb_wiener_var_row(AV1_COMP *const cpi, MACROBLOCK *x, |
263 | | MACROBLOCKD *xd, const int mi_row, |
264 | | int16_t *src_diff, tran_low_t *coeff, |
265 | | tran_low_t *qcoeff, tran_low_t *dqcoeff, |
266 | | double *sum_rec_distortion, |
267 | 0 | double *sum_est_rate, uint8_t *pred_buffer) { |
268 | 0 | AV1_COMMON *const cm = &cpi->common; |
269 | 0 | uint8_t *buffer = cpi->source->y_buffer; |
270 | 0 | int buf_stride = cpi->source->y_stride; |
271 | 0 | MB_MODE_INFO mbmi; |
272 | 0 | memset(&mbmi, 0, sizeof(mbmi)); |
273 | 0 | MB_MODE_INFO *mbmi_ptr = &mbmi; |
274 | 0 | xd->mi = &mbmi_ptr; |
275 | 0 | const BLOCK_SIZE bsize = cpi->weber_bsize; |
276 | 0 | const TX_SIZE tx_size = max_txsize_lookup[bsize]; |
277 | 0 | const int block_size = tx_size_wide[tx_size]; |
278 | 0 | const int coeff_count = block_size * block_size; |
279 | 0 | const int mb_step = mi_size_wide[bsize]; |
280 | 0 | const BitDepthInfo bd_info = get_bit_depth_info(xd); |
281 | 0 | const MultiThreadInfo *const mt_info = &cpi->mt_info; |
282 | 0 | const AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt; |
283 | 0 | AV1EncRowMultiThreadSync *const intra_row_mt_sync = |
284 | 0 | &cpi->ppi->intra_row_mt_sync; |
285 | 0 | const int mi_cols = cm->mi_params.mi_cols; |
286 | 0 | const int mt_thread_id = mi_row / mb_step; |
287 | | // TODO(chengchen): test different unit step size |
288 | 0 | const int mt_unit_step = mi_size_wide[MB_WIENER_MT_UNIT_SIZE]; |
289 | 0 | const int mt_unit_cols = (mi_cols + (mt_unit_step >> 1)) / mt_unit_step; |
290 | 0 | int mt_unit_col = 0; |
291 | 0 | const int is_high_bitdepth = is_cur_buf_hbd(xd); |
292 | |
|
293 | 0 | uint8_t *dst_buffer = pred_buffer; |
294 | 0 | const int dst_buffer_stride = MB_WIENER_PRED_BUF_STRIDE; |
295 | |
|
296 | 0 | if (is_high_bitdepth) { |
297 | 0 | uint16_t *pred_buffer_16 = (uint16_t *)pred_buffer; |
298 | 0 | dst_buffer = CONVERT_TO_BYTEPTR(pred_buffer_16); |
299 | 0 | } |
300 | |
|
301 | 0 | for (int mi_col = 0; mi_col < mi_cols; mi_col += mb_step) { |
302 | 0 | if (mi_col % mt_unit_step == 0) { |
303 | 0 | intra_mt->intra_sync_read_ptr(intra_row_mt_sync, mt_thread_id, |
304 | 0 | mt_unit_col); |
305 | 0 | #if CONFIG_MULTITHREAD |
306 | 0 | const int num_workers = |
307 | 0 | AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers); |
308 | 0 | if (num_workers > 1) { |
309 | 0 | const AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt; |
310 | 0 | pthread_mutex_lock(enc_row_mt->mutex_); |
311 | 0 | const bool exit = enc_row_mt->mb_wiener_mt_exit; |
312 | 0 | pthread_mutex_unlock(enc_row_mt->mutex_); |
313 | | // Stop further processing in case any worker has encountered an error. |
314 | 0 | if (exit) break; |
315 | 0 | } |
316 | 0 | #endif |
317 | 0 | } |
318 | | |
319 | 0 | PREDICTION_MODE best_mode = DC_PRED; |
320 | 0 | int best_intra_cost = INT_MAX; |
321 | 0 | const int mi_width = mi_size_wide[bsize]; |
322 | 0 | const int mi_height = mi_size_high[bsize]; |
323 | 0 | set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd, |
324 | 0 | mi_row, mi_col); |
325 | 0 | set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width, |
326 | 0 | AOMMIN(mi_row + mi_height, cm->mi_params.mi_rows), |
327 | 0 | AOMMIN(mi_col + mi_width, cm->mi_params.mi_cols)); |
328 | 0 | set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], |
329 | 0 | av1_num_planes(cm)); |
330 | 0 | xd->mi[0]->bsize = bsize; |
331 | 0 | xd->mi[0]->motion_mode = SIMPLE_TRANSLATION; |
332 | | // Set above and left mbmi to NULL as they are not available in the |
333 | | // preprocessing stage. |
334 | | // They are used to detemine intra edge filter types in intra prediction. |
335 | 0 | if (xd->up_available) { |
336 | 0 | xd->above_mbmi = NULL; |
337 | 0 | } |
338 | 0 | if (xd->left_available) { |
339 | 0 | xd->left_mbmi = NULL; |
340 | 0 | } |
341 | 0 | uint8_t *mb_buffer = |
342 | 0 | buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE; |
343 | 0 | for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END; |
344 | 0 | ++mode) { |
345 | | // TODO(chengchen): Here we use src instead of reconstructed frame as |
346 | | // the intra predictor to make single and multithread version match. |
347 | | // Ideally we want to use the reconstructed. |
348 | 0 | av1_predict_intra_block( |
349 | 0 | xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter, |
350 | 0 | block_size, block_size, tx_size, mode, 0, 0, FILTER_INTRA_MODES, |
351 | 0 | mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0); |
352 | 0 | av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size, |
353 | 0 | mb_buffer, buf_stride, dst_buffer, dst_buffer_stride); |
354 | 0 | av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff); |
355 | 0 | int intra_cost = aom_satd(coeff, coeff_count); |
356 | 0 | if (intra_cost < best_intra_cost) { |
357 | 0 | best_intra_cost = intra_cost; |
358 | 0 | best_mode = mode; |
359 | 0 | } |
360 | 0 | } |
361 | |
|
362 | 0 | av1_predict_intra_block( |
363 | 0 | xd, cm->seq_params->sb_size, cm->seq_params->enable_intra_edge_filter, |
364 | 0 | block_size, block_size, tx_size, best_mode, 0, 0, FILTER_INTRA_MODES, |
365 | 0 | mb_buffer, buf_stride, dst_buffer, dst_buffer_stride, 0, 0, 0); |
366 | 0 | av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size, |
367 | 0 | mb_buffer, buf_stride, dst_buffer, dst_buffer_stride); |
368 | 0 | av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff); |
369 | |
|
370 | 0 | const struct macroblock_plane *const p = &x->plane[0]; |
371 | 0 | uint16_t eob; |
372 | 0 | const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; |
373 | 0 | QUANT_PARAM quant_param; |
374 | 0 | int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; |
375 | 0 | av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param); |
376 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
377 | 0 | if (is_cur_buf_hbd(xd)) { |
378 | 0 | av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, |
379 | 0 | scan_order, &quant_param); |
380 | 0 | } else { |
381 | 0 | av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, |
382 | 0 | scan_order, &quant_param); |
383 | 0 | } |
384 | | #else |
385 | | av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, scan_order, |
386 | | &quant_param); |
387 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
388 | |
|
389 | 0 | if (cpi->oxcf.enable_rate_guide_deltaq) { |
390 | 0 | const int rate_cost = rate_estimator(qcoeff, eob, tx_size); |
391 | 0 | cpi->prep_rate_estimates[(mi_row / mb_step) * cpi->frame_info.mi_cols + |
392 | 0 | (mi_col / mb_step)] = rate_cost; |
393 | 0 | } |
394 | |
|
395 | 0 | av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer, |
396 | 0 | dst_buffer_stride, eob, 0); |
397 | 0 | WeberStats *weber_stats = |
398 | 0 | &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols + |
399 | 0 | (mi_col / mb_step)]; |
400 | |
|
401 | 0 | weber_stats->rec_pix_max = 1; |
402 | 0 | weber_stats->rec_variance = 0; |
403 | 0 | weber_stats->src_pix_max = 1; |
404 | 0 | weber_stats->src_variance = 0; |
405 | 0 | weber_stats->distortion = 0; |
406 | |
|
407 | 0 | int64_t src_mean = 0; |
408 | 0 | int64_t rec_mean = 0; |
409 | 0 | int64_t dist_mean = 0; |
410 | |
|
411 | 0 | for (int pix_row = 0; pix_row < block_size; ++pix_row) { |
412 | 0 | for (int pix_col = 0; pix_col < block_size; ++pix_col) { |
413 | 0 | int src_pix, rec_pix; |
414 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
415 | 0 | if (is_cur_buf_hbd(xd)) { |
416 | 0 | uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer); |
417 | 0 | uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer); |
418 | 0 | src_pix = src[pix_row * buf_stride + pix_col]; |
419 | 0 | rec_pix = rec[pix_row * dst_buffer_stride + pix_col]; |
420 | 0 | } else { |
421 | 0 | src_pix = mb_buffer[pix_row * buf_stride + pix_col]; |
422 | 0 | rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col]; |
423 | 0 | } |
424 | | #else |
425 | | src_pix = mb_buffer[pix_row * buf_stride + pix_col]; |
426 | | rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col]; |
427 | | #endif |
428 | 0 | src_mean += src_pix; |
429 | 0 | rec_mean += rec_pix; |
430 | 0 | dist_mean += src_pix - rec_pix; |
431 | 0 | weber_stats->src_variance += src_pix * src_pix; |
432 | 0 | weber_stats->rec_variance += rec_pix * rec_pix; |
433 | 0 | weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix); |
434 | 0 | weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix); |
435 | 0 | weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix); |
436 | 0 | } |
437 | 0 | } |
438 | |
|
439 | 0 | if (cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) { |
440 | 0 | *sum_rec_distortion += weber_stats->distortion; |
441 | 0 | int est_block_rate = 0; |
442 | 0 | int64_t est_block_dist = 0; |
443 | 0 | model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion, |
444 | 0 | pix_num, &est_block_rate, |
445 | 0 | &est_block_dist); |
446 | 0 | *sum_est_rate += est_block_rate; |
447 | 0 | } |
448 | |
|
449 | 0 | weber_stats->src_variance -= (src_mean * src_mean) / pix_num; |
450 | 0 | weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num; |
451 | 0 | weber_stats->distortion -= (dist_mean * dist_mean) / pix_num; |
452 | 0 | weber_stats->satd = best_intra_cost; |
453 | |
|
454 | 0 | qcoeff[0] = 0; |
455 | 0 | int max_scale = 0; |
456 | 0 | for (int idx = 1; idx < coeff_count; ++idx) { |
457 | 0 | const int abs_qcoeff = abs(qcoeff[idx]); |
458 | 0 | max_scale = AOMMAX(max_scale, abs_qcoeff); |
459 | 0 | } |
460 | 0 | weber_stats->max_scale = max_scale; |
461 | |
|
462 | 0 | if ((mi_col + mb_step) % mt_unit_step == 0 || |
463 | 0 | (mi_col + mb_step) >= mi_cols) { |
464 | 0 | intra_mt->intra_sync_write_ptr(intra_row_mt_sync, mt_thread_id, |
465 | 0 | mt_unit_col, mt_unit_cols); |
466 | 0 | ++mt_unit_col; |
467 | 0 | } |
468 | 0 | } |
469 | | // Set the pointer to null since mbmi is only allocated inside this function. |
470 | 0 | xd->mi = NULL; |
471 | 0 | } |
472 | | |
473 | | static void calc_mb_wiener_var(AV1_COMP *const cpi, double *sum_rec_distortion, |
474 | 0 | double *sum_est_rate) { |
475 | 0 | MACROBLOCK *x = &cpi->td.mb; |
476 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
477 | 0 | const BLOCK_SIZE bsize = cpi->weber_bsize; |
478 | 0 | const int mb_step = mi_size_wide[bsize]; |
479 | 0 | DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]); |
480 | 0 | DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]); |
481 | 0 | DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]); |
482 | 0 | DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]); |
483 | 0 | for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) { |
484 | 0 | av1_calc_mb_wiener_var_row(cpi, x, xd, mi_row, src_diff, coeff, qcoeff, |
485 | 0 | dqcoeff, sum_rec_distortion, sum_est_rate, |
486 | 0 | cpi->td.wiener_tmp_pred_buf); |
487 | 0 | } |
488 | 0 | } |
489 | | |
490 | | static int64_t estimate_wiener_var_norm(AV1_COMP *const cpi, |
491 | 0 | const BLOCK_SIZE norm_block_size) { |
492 | 0 | const AV1_COMMON *const cm = &cpi->common; |
493 | 0 | int64_t norm_factor = 1; |
494 | 0 | assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_128X128); |
495 | 0 | const int norm_step = mi_size_wide[norm_block_size]; |
496 | 0 | double sb_wiener_log = 0; |
497 | 0 | double sb_count = 0; |
498 | 0 | for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) { |
499 | 0 | for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) { |
500 | 0 | const int sb_wiener_var = |
501 | 0 | get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col); |
502 | 0 | const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col); |
503 | 0 | const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col); |
504 | 0 | const double scaled_satd = (double)satd / sqrt((double)sse); |
505 | 0 | sb_wiener_log += scaled_satd * log(sb_wiener_var); |
506 | 0 | sb_count += scaled_satd; |
507 | 0 | } |
508 | 0 | } |
509 | 0 | if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count)); |
510 | 0 | norm_factor = AOMMAX(1, norm_factor); |
511 | |
|
512 | 0 | return norm_factor; |
513 | 0 | } |
514 | | |
515 | | static void automatic_intra_tools_off(AV1_COMP *cpi, |
516 | | const double sum_rec_distortion, |
517 | 0 | const double sum_est_rate) { |
518 | 0 | if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return; |
519 | | |
520 | | // Thresholds |
521 | 0 | const int high_quality_qindex = 128; |
522 | 0 | const double high_quality_bpp = 2.0; |
523 | 0 | const double high_quality_dist_per_pix = 4.0; |
524 | |
|
525 | 0 | AV1_COMMON *const cm = &cpi->common; |
526 | 0 | const int qindex = cm->quant_params.base_qindex; |
527 | 0 | const double dist_per_pix = |
528 | 0 | (double)sum_rec_distortion / (cm->width * cm->height); |
529 | | // The estimate bpp is not accurate, an empirical constant 100 is divided. |
530 | 0 | const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100); |
531 | |
|
532 | 0 | if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp && |
533 | 0 | dist_per_pix < high_quality_dist_per_pix) { |
534 | 0 | cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0; |
535 | 0 | cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0; |
536 | 0 | cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0; |
537 | 0 | cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0; |
538 | 0 | } |
539 | 0 | } |
540 | | |
541 | 0 | static void ext_rate_guided_quantization(AV1_COMP *cpi) { |
542 | | // Calculation uses 8x8. |
543 | 0 | const int mb_step = mi_size_wide[cpi->weber_bsize]; |
544 | | // Accumulate to 16x16, step size is in the unit of mi. |
545 | 0 | const int block_step = 4; |
546 | |
|
547 | 0 | const char *filename = cpi->oxcf.rate_distribution_info; |
548 | 0 | FILE *pfile = fopen(filename, "r"); |
549 | 0 | if (pfile == NULL) { |
550 | 0 | assert(pfile != NULL); |
551 | 0 | return; |
552 | 0 | } |
553 | | |
554 | 0 | double ext_rate_sum = 0.0; |
555 | 0 | for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) { |
556 | 0 | for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) { |
557 | 0 | float val; |
558 | 0 | const int fields_converted = fscanf(pfile, "%f", &val); |
559 | 0 | if (fields_converted != 1) { |
560 | 0 | assert(fields_converted == 1); |
561 | 0 | fclose(pfile); |
562 | 0 | return; |
563 | 0 | } |
564 | 0 | ext_rate_sum += val; |
565 | 0 | cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols + |
566 | 0 | (col / mb_step)] = val; |
567 | 0 | } |
568 | 0 | } |
569 | 0 | fclose(pfile); |
570 | |
|
571 | 0 | int uniform_rate_sum = 0; |
572 | 0 | for (int row = 0; row < cpi->frame_info.mi_rows; row += block_step) { |
573 | 0 | for (int col = 0; col < cpi->frame_info.mi_cols; col += block_step) { |
574 | 0 | int rate_sum = 0; |
575 | 0 | for (int r = 0; r < block_step; r += mb_step) { |
576 | 0 | for (int c = 0; c < block_step; c += mb_step) { |
577 | 0 | const int mi_row = row + r; |
578 | 0 | const int mi_col = col + c; |
579 | 0 | rate_sum += cpi->prep_rate_estimates[(mi_row / mb_step) * |
580 | 0 | cpi->frame_info.mi_cols + |
581 | 0 | (mi_col / mb_step)]; |
582 | 0 | } |
583 | 0 | } |
584 | 0 | uniform_rate_sum += rate_sum; |
585 | 0 | } |
586 | 0 | } |
587 | |
|
588 | 0 | const double scale = uniform_rate_sum / ext_rate_sum; |
589 | 0 | cpi->ext_rate_scale = scale; |
590 | 0 | } |
591 | | |
592 | 0 | void av1_set_mb_wiener_variance(AV1_COMP *cpi) { |
593 | 0 | AV1_COMMON *const cm = &cpi->common; |
594 | 0 | const SequenceHeader *const seq_params = cm->seq_params; |
595 | 0 | if (aom_realloc_frame_buffer( |
596 | 0 | &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x, |
597 | 0 | seq_params->subsampling_y, seq_params->use_highbitdepth, |
598 | 0 | cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL, |
599 | 0 | NULL, cpi->alloc_pyramid, 0)) |
600 | 0 | aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, |
601 | 0 | "Failed to allocate frame buffer"); |
602 | 0 | av1_alloc_mb_wiener_var_pred_buf(&cpi->common, &cpi->td); |
603 | 0 | cpi->norm_wiener_variance = 0; |
604 | |
|
605 | 0 | MACROBLOCK *x = &cpi->td.mb; |
606 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
607 | | // xd->mi needs to be setup since it is used in av1_frame_init_quantizer. |
608 | 0 | MB_MODE_INFO mbmi; |
609 | 0 | memset(&mbmi, 0, sizeof(mbmi)); |
610 | 0 | MB_MODE_INFO *mbmi_ptr = &mbmi; |
611 | 0 | xd->mi = &mbmi_ptr; |
612 | 0 | cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level; |
613 | 0 | av1_frame_init_quantizer(cpi); |
614 | |
|
615 | 0 | double sum_rec_distortion = 0.0; |
616 | 0 | double sum_est_rate = 0.0; |
617 | |
|
618 | 0 | MultiThreadInfo *const mt_info = &cpi->mt_info; |
619 | 0 | const int num_workers = |
620 | 0 | AOMMIN(mt_info->num_mod_workers[MOD_AI], mt_info->num_workers); |
621 | 0 | AV1EncAllIntraMultiThreadInfo *const intra_mt = &mt_info->intra_mt; |
622 | 0 | intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read_dummy; |
623 | 0 | intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write_dummy; |
624 | | // Calculate differential contrast for each block for the entire image. |
625 | | // TODO(chengchen): properly accumulate the distortion and rate in |
626 | | // av1_calc_mb_wiener_var_mt(). Until then, call calc_mb_wiener_var() if |
627 | | // auto_intra_tools_off is true. |
628 | 0 | if (num_workers > 1 && !cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) { |
629 | 0 | intra_mt->intra_sync_read_ptr = av1_row_mt_sync_read; |
630 | 0 | intra_mt->intra_sync_write_ptr = av1_row_mt_sync_write; |
631 | 0 | av1_calc_mb_wiener_var_mt(cpi, num_workers, &sum_rec_distortion, |
632 | 0 | &sum_est_rate); |
633 | 0 | } else { |
634 | 0 | calc_mb_wiener_var(cpi, &sum_rec_distortion, &sum_est_rate); |
635 | 0 | } |
636 | | |
637 | | // Determine whether to turn off several intra coding tools. |
638 | 0 | automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate); |
639 | | |
640 | | // Read external rate distribution and use it to guide delta quantization |
641 | 0 | if (cpi->oxcf.enable_rate_guide_deltaq) ext_rate_guided_quantization(cpi); |
642 | |
|
643 | 0 | const BLOCK_SIZE norm_block_size = cm->seq_params->sb_size; |
644 | 0 | cpi->norm_wiener_variance = estimate_wiener_var_norm(cpi, norm_block_size); |
645 | 0 | const int norm_step = mi_size_wide[norm_block_size]; |
646 | |
|
647 | 0 | double sb_wiener_log = 0; |
648 | 0 | double sb_count = 0; |
649 | 0 | for (int its_cnt = 0; its_cnt < 2; ++its_cnt) { |
650 | 0 | sb_wiener_log = 0; |
651 | 0 | sb_count = 0; |
652 | 0 | for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) { |
653 | 0 | for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; |
654 | 0 | mi_col += norm_step) { |
655 | 0 | int sb_wiener_var = |
656 | 0 | get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col); |
657 | |
|
658 | 0 | double beta = (double)cpi->norm_wiener_variance / sb_wiener_var; |
659 | 0 | double min_max_scale = AOMMAX( |
660 | 0 | 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col)); |
661 | |
|
662 | 0 | beta = AOMMIN(beta, 4); |
663 | 0 | beta = AOMMAX(beta, 0.25); |
664 | |
|
665 | 0 | if (beta < 1 / min_max_scale) continue; |
666 | | |
667 | 0 | sb_wiener_var = (int)(cpi->norm_wiener_variance / beta); |
668 | |
|
669 | 0 | int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col); |
670 | 0 | int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col); |
671 | 0 | double scaled_satd = (double)satd / sqrt((double)sse); |
672 | 0 | sb_wiener_log += scaled_satd * log(sb_wiener_var); |
673 | 0 | sb_count += scaled_satd; |
674 | 0 | } |
675 | 0 | } |
676 | |
|
677 | 0 | if (sb_count > 0) |
678 | 0 | cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count)); |
679 | 0 | cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance); |
680 | 0 | } |
681 | | |
682 | | // Set the pointer to null since mbmi is only allocated inside this function. |
683 | 0 | xd->mi = NULL; |
684 | 0 | aom_free_frame_buffer(&cm->cur_frame->buf); |
685 | 0 | av1_dealloc_mb_wiener_var_pred_buf(&cpi->td); |
686 | 0 | } |
687 | | |
688 | | static int get_rate_guided_quantizer(const AV1_COMP *const cpi, |
689 | 0 | BLOCK_SIZE bsize, int mi_row, int mi_col) { |
690 | | // Calculation uses 8x8. |
691 | 0 | const int mb_step = mi_size_wide[cpi->weber_bsize]; |
692 | | // Accumulate to 16x16 |
693 | 0 | const int block_step = mi_size_wide[BLOCK_16X16]; |
694 | 0 | double sb_rate_hific = 0.0; |
695 | 0 | double sb_rate_uniform = 0.0; |
696 | 0 | for (int row = mi_row; row < mi_row + mi_size_wide[bsize]; |
697 | 0 | row += block_step) { |
698 | 0 | for (int col = mi_col; col < mi_col + mi_size_high[bsize]; |
699 | 0 | col += block_step) { |
700 | 0 | sb_rate_hific += |
701 | 0 | cpi->ext_rate_distribution[(row / mb_step) * cpi->frame_info.mi_cols + |
702 | 0 | (col / mb_step)]; |
703 | |
|
704 | 0 | for (int r = 0; r < block_step; r += mb_step) { |
705 | 0 | for (int c = 0; c < block_step; c += mb_step) { |
706 | 0 | const int this_row = row + r; |
707 | 0 | const int this_col = col + c; |
708 | 0 | sb_rate_uniform += |
709 | 0 | cpi->prep_rate_estimates[(this_row / mb_step) * |
710 | 0 | cpi->frame_info.mi_cols + |
711 | 0 | (this_col / mb_step)]; |
712 | 0 | } |
713 | 0 | } |
714 | 0 | } |
715 | 0 | } |
716 | 0 | sb_rate_hific *= cpi->ext_rate_scale; |
717 | |
|
718 | 0 | const double weight = 1.0; |
719 | 0 | const double rate_diff = |
720 | 0 | weight * (sb_rate_hific - sb_rate_uniform) / sb_rate_uniform; |
721 | 0 | double scale = pow(2, rate_diff); |
722 | |
|
723 | 0 | scale = scale * scale; |
724 | 0 | double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col)); |
725 | 0 | scale = 1.0 / AOMMIN(1.0 / scale, min_max_scale); |
726 | |
|
727 | 0 | const AV1_COMMON *const cm = &cpi->common; |
728 | 0 | const int base_qindex = cm->quant_params.base_qindex; |
729 | 0 | int offset = |
730 | 0 | av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, scale); |
731 | 0 | const DeltaQInfo *const delta_q_info = &cm->delta_q_info; |
732 | 0 | const int max_offset = delta_q_info->delta_q_res * 10; |
733 | 0 | offset = AOMMIN(offset, max_offset - 1); |
734 | 0 | offset = AOMMAX(offset, -max_offset + 1); |
735 | 0 | int qindex = cm->quant_params.base_qindex + offset; |
736 | 0 | qindex = AOMMIN(qindex, MAXQ); |
737 | 0 | qindex = AOMMAX(qindex, MINQ); |
738 | 0 | if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1); |
739 | |
|
740 | 0 | return qindex; |
741 | 0 | } |
742 | | |
743 | | int av1_get_sbq_perceptual_ai(const AV1_COMP *const cpi, BLOCK_SIZE bsize, |
744 | 0 | int mi_row, int mi_col) { |
745 | 0 | if (cpi->oxcf.enable_rate_guide_deltaq) { |
746 | 0 | return get_rate_guided_quantizer(cpi, bsize, mi_row, mi_col); |
747 | 0 | } |
748 | | |
749 | 0 | const AV1_COMMON *const cm = &cpi->common; |
750 | 0 | const int base_qindex = cm->quant_params.base_qindex; |
751 | 0 | int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col); |
752 | 0 | int offset = 0; |
753 | 0 | double beta = (double)cpi->norm_wiener_variance / sb_wiener_var; |
754 | 0 | double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col)); |
755 | 0 | beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale); |
756 | | |
757 | | // Cap beta such that the delta q value is not much far away from the base q. |
758 | 0 | beta = AOMMIN(beta, 4); |
759 | 0 | beta = AOMMAX(beta, 0.25); |
760 | 0 | offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta); |
761 | 0 | const DeltaQInfo *const delta_q_info = &cm->delta_q_info; |
762 | 0 | offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1); |
763 | 0 | offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1); |
764 | 0 | int qindex = cm->quant_params.base_qindex + offset; |
765 | 0 | qindex = AOMMIN(qindex, MAXQ); |
766 | 0 | qindex = AOMMAX(qindex, MINQ); |
767 | 0 | if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1); |
768 | |
|
769 | 0 | return qindex; |
770 | 0 | } |
771 | | |
772 | 0 | void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) { |
773 | 0 | AV1_COMMON *cm = &cpi->common; |
774 | |
|
775 | 0 | if (cpi->mb_delta_q) return; |
776 | | |
777 | 0 | CHECK_MEM_ERROR(cm, cpi->mb_delta_q, |
778 | 0 | aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols, |
779 | 0 | sizeof(*cpi->mb_delta_q))); |
780 | 0 | } |
781 | | |
782 | | #if CONFIG_TFLITE |
783 | | static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows, |
784 | | int bit_depth, uint8_t *y_buffer, int y_stride, |
785 | | float *predicts0, float *predicts1) { |
786 | | // Create the model and interpreter options. |
787 | | TfLiteModel *model = |
788 | | TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize); |
789 | | if (model == NULL) return 1; |
790 | | |
791 | | TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate(); |
792 | | TfLiteInterpreterOptionsSetNumThreads(options, 2); |
793 | | if (options == NULL) { |
794 | | TfLiteModelDelete(model); |
795 | | return 1; |
796 | | } |
797 | | |
798 | | // Create the interpreter. |
799 | | TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options); |
800 | | if (interpreter == NULL) { |
801 | | TfLiteInterpreterOptionsDelete(options); |
802 | | TfLiteModelDelete(model); |
803 | | return 1; |
804 | | } |
805 | | |
806 | | // Allocate tensors and populate the input tensor data. |
807 | | TfLiteInterpreterAllocateTensors(interpreter); |
808 | | TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0); |
809 | | if (input_tensor == NULL) { |
810 | | TfLiteInterpreterDelete(interpreter); |
811 | | TfLiteInterpreterOptionsDelete(options); |
812 | | TfLiteModelDelete(model); |
813 | | return 1; |
814 | | } |
815 | | |
816 | | size_t input_size = TfLiteTensorByteSize(input_tensor); |
817 | | float *input_data = aom_calloc(input_size, 1); |
818 | | if (input_data == NULL) { |
819 | | TfLiteInterpreterDelete(interpreter); |
820 | | TfLiteInterpreterOptionsDelete(options); |
821 | | TfLiteModelDelete(model); |
822 | | return 1; |
823 | | } |
824 | | |
825 | | const int num_mi_w = mi_size_wide[block_size]; |
826 | | const int num_mi_h = mi_size_high[block_size]; |
827 | | for (int row = 0; row < num_rows; ++row) { |
828 | | for (int col = 0; col < num_cols; ++col) { |
829 | | const int row_offset = (row * num_mi_h) << 2; |
830 | | const int col_offset = (col * num_mi_w) << 2; |
831 | | |
832 | | uint8_t *buf = y_buffer + row_offset * y_stride + col_offset; |
833 | | int r = row_offset, pos = 0; |
834 | | const float base = (float)((1 << bit_depth) - 1); |
835 | | while (r < row_offset + (num_mi_h << 2)) { |
836 | | for (int c = 0; c < (num_mi_w << 2); ++c) { |
837 | | input_data[pos++] = bit_depth > 8 |
838 | | ? (float)*CONVERT_TO_SHORTPTR(buf + c) / base |
839 | | : (float)*(buf + c) / base; |
840 | | } |
841 | | buf += y_stride; |
842 | | ++r; |
843 | | } |
844 | | TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size); |
845 | | |
846 | | // Execute inference. |
847 | | if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) { |
848 | | TfLiteInterpreterDelete(interpreter); |
849 | | TfLiteInterpreterOptionsDelete(options); |
850 | | TfLiteModelDelete(model); |
851 | | return 1; |
852 | | } |
853 | | |
854 | | // Extract the output tensor data. |
855 | | const TfLiteTensor *output_tensor = |
856 | | TfLiteInterpreterGetOutputTensor(interpreter, 0); |
857 | | if (output_tensor == NULL) { |
858 | | TfLiteInterpreterDelete(interpreter); |
859 | | TfLiteInterpreterOptionsDelete(options); |
860 | | TfLiteModelDelete(model); |
861 | | return 1; |
862 | | } |
863 | | |
864 | | size_t output_size = TfLiteTensorByteSize(output_tensor); |
865 | | float output_data[2]; |
866 | | |
867 | | TfLiteTensorCopyToBuffer(output_tensor, output_data, output_size); |
868 | | predicts0[row * num_cols + col] = output_data[0]; |
869 | | predicts1[row * num_cols + col] = output_data[1]; |
870 | | } |
871 | | } |
872 | | |
873 | | // Dispose of the model and interpreter objects. |
874 | | TfLiteInterpreterDelete(interpreter); |
875 | | TfLiteInterpreterOptionsDelete(options); |
876 | | TfLiteModelDelete(model); |
877 | | aom_free(input_data); |
878 | | return 0; |
879 | | } |
880 | | |
881 | | void av1_set_mb_ur_variance(AV1_COMP *cpi) { |
882 | | const AV1_COMMON *cm = &cpi->common; |
883 | | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
884 | | uint8_t *y_buffer = cpi->source->y_buffer; |
885 | | const int y_stride = cpi->source->y_stride; |
886 | | const int block_size = cpi->common.seq_params->sb_size; |
887 | | const uint32_t bit_depth = cpi->td.mb.e_mbd.bd; |
888 | | |
889 | | const int num_mi_w = mi_size_wide[block_size]; |
890 | | const int num_mi_h = mi_size_high[block_size]; |
891 | | const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; |
892 | | const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h; |
893 | | |
894 | | // TODO(sdeng): fit a better model_1; disable it at this time. |
895 | | float *mb_delta_q0, *mb_delta_q1, delta_q_avg0 = 0.0f; |
896 | | CHECK_MEM_ERROR(cm, mb_delta_q0, |
897 | | aom_calloc(num_rows * num_cols, sizeof(float))); |
898 | | CHECK_MEM_ERROR(cm, mb_delta_q1, |
899 | | aom_calloc(num_rows * num_cols, sizeof(float))); |
900 | | |
901 | | if (model_predict(block_size, num_cols, num_rows, bit_depth, y_buffer, |
902 | | y_stride, mb_delta_q0, mb_delta_q1)) { |
903 | | aom_internal_error(cm->error, AOM_CODEC_ERROR, |
904 | | "Failed to call TFlite functions."); |
905 | | } |
906 | | |
907 | | // Loop through each SB block. |
908 | | for (int row = 0; row < num_rows; ++row) { |
909 | | for (int col = 0; col < num_cols; ++col) { |
910 | | const int index = row * num_cols + col; |
911 | | delta_q_avg0 += mb_delta_q0[index]; |
912 | | } |
913 | | } |
914 | | |
915 | | delta_q_avg0 /= (float)(num_rows * num_cols); |
916 | | |
917 | | float scaling_factor; |
918 | | const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ; |
919 | | if (cq_level < delta_q_avg0) { |
920 | | scaling_factor = cq_level / delta_q_avg0; |
921 | | } else { |
922 | | scaling_factor = 1.0f - (cq_level - delta_q_avg0) / (1.0f - delta_q_avg0); |
923 | | } |
924 | | |
925 | | for (int row = 0; row < num_rows; ++row) { |
926 | | for (int col = 0; col < num_cols; ++col) { |
927 | | const int index = row * num_cols + col; |
928 | | cpi->mb_delta_q[index] = |
929 | | RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0f * (float)MAXQ * |
930 | | scaling_factor * (mb_delta_q0[index] - delta_q_avg0)); |
931 | | } |
932 | | } |
933 | | |
934 | | aom_free(mb_delta_q0); |
935 | | aom_free(mb_delta_q1); |
936 | | } |
937 | | #else // !CONFIG_TFLITE |
938 | 0 | void av1_set_mb_ur_variance(AV1_COMP *cpi) { |
939 | 0 | const AV1_COMMON *cm = &cpi->common; |
940 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
941 | 0 | const MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; |
942 | 0 | uint8_t *y_buffer = cpi->source->y_buffer; |
943 | 0 | const int y_stride = cpi->source->y_stride; |
944 | 0 | const int block_size = cpi->common.seq_params->sb_size; |
945 | |
|
946 | 0 | const int num_mi_w = mi_size_wide[block_size]; |
947 | 0 | const int num_mi_h = mi_size_high[block_size]; |
948 | 0 | const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; |
949 | 0 | const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h; |
950 | |
|
951 | 0 | int *mb_delta_q[2]; |
952 | 0 | CHECK_MEM_ERROR(cm, mb_delta_q[0], |
953 | 0 | aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0]))); |
954 | 0 | CHECK_MEM_ERROR(cm, mb_delta_q[1], |
955 | 0 | aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1]))); |
956 | | |
957 | | // Approximates the model change between current version (Spet 2021) and the |
958 | | // baseline (July 2021). |
959 | 0 | const double model_change[] = { 3.0, 3.0 }; |
960 | | // The following parameters are fitted from user labeled data. |
961 | 0 | const double a[] = { -24.50 * 4.0, -17.20 * 4.0 }; |
962 | 0 | const double b[] = { 0.004898, 0.003093 }; |
963 | 0 | const double c[] = { (29.932 + model_change[0]) * 4.0, |
964 | 0 | (42.100 + model_change[1]) * 4.0 }; |
965 | 0 | int delta_q_avg[2] = { 0, 0 }; |
966 | | // Loop through each SB block. |
967 | 0 | for (int row = 0; row < num_rows; ++row) { |
968 | 0 | for (int col = 0; col < num_cols; ++col) { |
969 | 0 | double var = 0.0, num_of_var = 0.0; |
970 | 0 | const int index = row * num_cols + col; |
971 | | |
972 | | // Loop through each 8x8 block. |
973 | 0 | for (int mi_row = row * num_mi_h; |
974 | 0 | mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h; |
975 | 0 | mi_row += 2) { |
976 | 0 | for (int mi_col = col * num_mi_w; |
977 | 0 | mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w; |
978 | 0 | mi_col += 2) { |
979 | 0 | struct buf_2d buf; |
980 | 0 | const int row_offset_y = mi_row << 2; |
981 | 0 | const int col_offset_y = mi_col << 2; |
982 | |
|
983 | 0 | buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y; |
984 | 0 | buf.stride = y_stride; |
985 | |
|
986 | 0 | unsigned int block_variance; |
987 | 0 | block_variance = av1_get_perpixel_variance_facade( |
988 | 0 | cpi, xd, &buf, BLOCK_8X8, AOM_PLANE_Y); |
989 | |
|
990 | 0 | block_variance = AOMMAX(block_variance, 1); |
991 | 0 | var += log((double)block_variance); |
992 | 0 | num_of_var += 1.0; |
993 | 0 | } |
994 | 0 | } |
995 | 0 | var = exp(var / num_of_var); |
996 | 0 | mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]); |
997 | 0 | mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]); |
998 | 0 | delta_q_avg[0] += mb_delta_q[0][index]; |
999 | 0 | delta_q_avg[1] += mb_delta_q[1][index]; |
1000 | 0 | } |
1001 | 0 | } |
1002 | |
|
1003 | 0 | delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols)); |
1004 | 0 | delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols)); |
1005 | |
|
1006 | 0 | int model_idx; |
1007 | 0 | double scaling_factor; |
1008 | 0 | const int cq_level = cpi->oxcf.rc_cfg.cq_level; |
1009 | 0 | if (cq_level < delta_q_avg[0]) { |
1010 | 0 | model_idx = 0; |
1011 | 0 | scaling_factor = (double)cq_level / delta_q_avg[0]; |
1012 | 0 | } else if (cq_level < delta_q_avg[1]) { |
1013 | 0 | model_idx = 2; |
1014 | 0 | scaling_factor = |
1015 | 0 | (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]); |
1016 | 0 | } else { |
1017 | 0 | model_idx = 1; |
1018 | 0 | scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]); |
1019 | 0 | } |
1020 | |
|
1021 | 0 | const double new_delta_q_avg = |
1022 | 0 | delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]); |
1023 | 0 | for (int row = 0; row < num_rows; ++row) { |
1024 | 0 | for (int col = 0; col < num_cols; ++col) { |
1025 | 0 | const int index = row * num_cols + col; |
1026 | 0 | if (model_idx == 2) { |
1027 | 0 | const double delta_q = |
1028 | 0 | mb_delta_q[0][index] + |
1029 | 0 | scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]); |
1030 | 0 | cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength / |
1031 | 0 | 100.0 * (delta_q - new_delta_q_avg)); |
1032 | 0 | } else { |
1033 | 0 | cpi->mb_delta_q[index] = RINT( |
1034 | 0 | (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor * |
1035 | 0 | (mb_delta_q[model_idx][index] - delta_q_avg[model_idx])); |
1036 | 0 | } |
1037 | 0 | } |
1038 | 0 | } |
1039 | |
|
1040 | 0 | aom_free(mb_delta_q[0]); |
1041 | 0 | aom_free(mb_delta_q[1]); |
1042 | 0 | } |
1043 | | #endif |
1044 | | |
1045 | | int av1_get_sbq_user_rating_based(const AV1_COMP *const cpi, int mi_row, |
1046 | 0 | int mi_col) { |
1047 | 0 | const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size; |
1048 | 0 | const CommonModeInfoParams *const mi_params = &cpi->common.mi_params; |
1049 | 0 | const AV1_COMMON *const cm = &cpi->common; |
1050 | 0 | const int base_qindex = cm->quant_params.base_qindex; |
1051 | 0 | if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex; |
1052 | | |
1053 | 0 | const int num_mi_w = mi_size_wide[bsize]; |
1054 | 0 | const int num_mi_h = mi_size_high[bsize]; |
1055 | 0 | const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; |
1056 | 0 | const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w); |
1057 | 0 | const int delta_q = cpi->mb_delta_q[index]; |
1058 | |
|
1059 | 0 | int qindex = base_qindex + delta_q; |
1060 | 0 | qindex = AOMMIN(qindex, MAXQ); |
1061 | 0 | qindex = AOMMAX(qindex, MINQ + 1); |
1062 | |
|
1063 | 0 | return qindex; |
1064 | 0 | } |
1065 | | |
1066 | | #if !CONFIG_REALTIME_ONLY |
1067 | | |
1068 | | // Variance Boost: a variance adaptive quantization implementation |
1069 | | // SVT-AV1 appendix with an overview and a graphical, step-by-step explanation |
1070 | | // of the implementation |
1071 | | // https://gitlab.com/AOMediaCodec/SVT-AV1/-/blob/master/Docs/Appendix-Variance-Boost.md |
1072 | 0 | int av1_get_sbq_variance_boost(const AV1_COMP *cpi, const MACROBLOCK *x) { |
1073 | 0 | const AV1_COMMON *cm = &cpi->common; |
1074 | 0 | const int base_qindex = cm->quant_params.base_qindex; |
1075 | 0 | const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth; |
1076 | | |
1077 | | // Variance Boost only supports 64x64 SBs. |
1078 | 0 | assert(cm->seq_params->sb_size == BLOCK_64X64); |
1079 | | |
1080 | | // Strength is currently hard-coded and optimized for still pictures. In the |
1081 | | // future, we might want to expose this as a parameter that can be fine-tuned |
1082 | | // by the caller. |
1083 | 0 | const int strength = 3; |
1084 | 0 | unsigned int variance = av1_get_variance_boost_block_variance(cpi, x); |
1085 | | |
1086 | | // Variance = 0 areas are either completely flat patches or have very fine |
1087 | | // gradients. Boost these blocks as if they have a variance of 1. |
1088 | 0 | if (variance == 0) { |
1089 | 0 | variance = 1; |
1090 | 0 | } |
1091 | | |
1092 | | // Compute a boost based on a fast-growing formula. |
1093 | | // High and medium variance SBs essentially get no boost, while lower variance |
1094 | | // SBs get increasingly stronger boosts. |
1095 | 0 | assert(strength >= 1 && strength <= 4); |
1096 | | |
1097 | | // Still picture curve, with variance crossover point at 1024. |
1098 | 0 | double qstep_ratio = 0.15 * strength * (-log2((double)variance) + 10.0) + 1.0; |
1099 | 0 | qstep_ratio = fclamp(qstep_ratio, 1.0, VAR_BOOST_MAX_BOOST); |
1100 | |
|
1101 | 0 | double base_q = av1_convert_qindex_to_q(base_qindex, bit_depth); |
1102 | 0 | double target_q = base_q / qstep_ratio; |
1103 | 0 | int target_qindex = av1_convert_q_to_qindex(target_q, bit_depth); |
1104 | | |
1105 | | // Determine the SB's delta_q boost by computing an (unscaled) delta_q from |
1106 | | // the base and target q values, then scale that delta_q according to the |
1107 | | // frame's base qindex. |
1108 | | // The scaling coefficients were chosen empirically to maximize SSIMULACRA 2 |
1109 | | // scores, 10th percentile scores, and subjective quality. Boosts become |
1110 | | // smaller (for a given variance) the lower the base qindex. |
1111 | 0 | int boost = (int)round((base_qindex + 544.0) * (base_qindex - target_qindex) / |
1112 | 0 | 1279.0); |
1113 | 0 | boost = AOMMIN(VAR_BOOST_MAX_DELTAQ_RANGE, boost); |
1114 | | |
1115 | | // Variance Boost was designed to always operate in the lossy domain, so MINQ |
1116 | | // is excluded. |
1117 | 0 | int sb_qindex = AOMMAX(base_qindex - boost, MINQ + 1); |
1118 | |
|
1119 | 0 | return sb_qindex; |
1120 | 0 | } |
1121 | | #endif |