/src/aom/av1/encoder/allintra_vis.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2021, Alliance for Open Media. All rights reserved |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at www.aomedia.org/license/software. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at www.aomedia.org/license/patent. |
10 | | */ |
11 | | |
12 | | #include "config/aom_config.h" |
13 | | |
14 | | #if CONFIG_TFLITE |
15 | | #include "tensorflow/lite/c/c_api.h" |
16 | | #include "av1/encoder/deltaq4_model.c" |
17 | | #endif |
18 | | |
19 | | #include "av1/common/common_data.h" |
20 | | #include "av1/common/enums.h" |
21 | | #include "av1/common/idct.h" |
22 | | #include "av1/common/reconinter.h" |
23 | | #include "av1/encoder/allintra_vis.h" |
24 | | #include "av1/encoder/encoder.h" |
25 | | #include "av1/encoder/hybrid_fwd_txfm.h" |
26 | | #include "av1/encoder/model_rd.h" |
27 | | #include "av1/encoder/rdopt_utils.h" |
28 | | |
29 | | // Process the wiener variance in 16x16 block basis. |
30 | 0 | static int qsort_comp(const void *elem1, const void *elem2) { |
31 | 0 | int a = *((const int *)elem1); |
32 | 0 | int b = *((const int *)elem2); |
33 | 0 | if (a > b) return 1; |
34 | 0 | if (a < b) return -1; |
35 | 0 | return 0; |
36 | 0 | } |
37 | | |
38 | 0 | void av1_init_mb_wiener_var_buffer(AV1_COMP *cpi) { |
39 | 0 | AV1_COMMON *cm = &cpi->common; |
40 | |
|
41 | 0 | cpi->weber_bsize = BLOCK_8X8; |
42 | |
|
43 | 0 | if (cpi->mb_weber_stats) return; |
44 | | |
45 | 0 | CHECK_MEM_ERROR(cm, cpi->mb_weber_stats, |
46 | 0 | aom_calloc(cpi->frame_info.mi_rows * cpi->frame_info.mi_cols, |
47 | 0 | sizeof(*cpi->mb_weber_stats))); |
48 | 0 | } |
49 | | |
50 | | static int64_t get_satd(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, |
51 | 0 | int mi_col) { |
52 | 0 | AV1_COMMON *const cm = &cpi->common; |
53 | 0 | const int mi_wide = mi_size_wide[bsize]; |
54 | 0 | const int mi_high = mi_size_high[bsize]; |
55 | |
|
56 | 0 | const int mi_step = mi_size_wide[cpi->weber_bsize]; |
57 | 0 | int mb_stride = cpi->frame_info.mi_cols; |
58 | 0 | int mb_count = 0; |
59 | 0 | int64_t satd = 0; |
60 | |
|
61 | 0 | for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { |
62 | 0 | for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { |
63 | 0 | if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) |
64 | 0 | continue; |
65 | | |
66 | 0 | satd += cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)] |
67 | 0 | .satd; |
68 | 0 | ++mb_count; |
69 | 0 | } |
70 | 0 | } |
71 | |
|
72 | 0 | if (mb_count) satd = (int)(satd / mb_count); |
73 | 0 | satd = AOMMAX(1, satd); |
74 | |
|
75 | 0 | return (int)satd; |
76 | 0 | } |
77 | | |
78 | | static int64_t get_sse(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, |
79 | 0 | int mi_col) { |
80 | 0 | AV1_COMMON *const cm = &cpi->common; |
81 | 0 | const int mi_wide = mi_size_wide[bsize]; |
82 | 0 | const int mi_high = mi_size_high[bsize]; |
83 | |
|
84 | 0 | const int mi_step = mi_size_wide[cpi->weber_bsize]; |
85 | 0 | int mb_stride = cpi->frame_info.mi_cols; |
86 | 0 | int mb_count = 0; |
87 | 0 | int64_t distortion = 0; |
88 | |
|
89 | 0 | for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { |
90 | 0 | for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { |
91 | 0 | if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) |
92 | 0 | continue; |
93 | | |
94 | 0 | distortion += |
95 | 0 | cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)] |
96 | 0 | .distortion; |
97 | 0 | ++mb_count; |
98 | 0 | } |
99 | 0 | } |
100 | |
|
101 | 0 | if (mb_count) distortion = (int)(distortion / mb_count); |
102 | 0 | distortion = AOMMAX(1, distortion); |
103 | |
|
104 | 0 | return (int)distortion; |
105 | 0 | } |
106 | | |
107 | | static double get_max_scale(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, |
108 | 0 | int mi_col) { |
109 | 0 | AV1_COMMON *const cm = &cpi->common; |
110 | 0 | const int mi_wide = mi_size_wide[bsize]; |
111 | 0 | const int mi_high = mi_size_high[bsize]; |
112 | 0 | const int mi_step = mi_size_wide[cpi->weber_bsize]; |
113 | 0 | int mb_stride = cpi->frame_info.mi_cols; |
114 | 0 | double min_max_scale = 10.0; |
115 | |
|
116 | 0 | for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { |
117 | 0 | for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { |
118 | 0 | if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) |
119 | 0 | continue; |
120 | 0 | WeberStats *weber_stats = |
121 | 0 | &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]; |
122 | 0 | if (weber_stats->max_scale < 1.0) continue; |
123 | 0 | if (weber_stats->max_scale < min_max_scale) |
124 | 0 | min_max_scale = weber_stats->max_scale; |
125 | 0 | } |
126 | 0 | } |
127 | 0 | return min_max_scale; |
128 | 0 | } |
129 | | |
130 | | static int get_window_wiener_var(AV1_COMP *const cpi, BLOCK_SIZE bsize, |
131 | 0 | int mi_row, int mi_col) { |
132 | 0 | AV1_COMMON *const cm = &cpi->common; |
133 | 0 | const int mi_wide = mi_size_wide[bsize]; |
134 | 0 | const int mi_high = mi_size_high[bsize]; |
135 | |
|
136 | 0 | const int mi_step = mi_size_wide[cpi->weber_bsize]; |
137 | 0 | int sb_wiener_var = 0; |
138 | 0 | int mb_stride = cpi->frame_info.mi_cols; |
139 | 0 | int mb_count = 0; |
140 | 0 | double base_num = 1; |
141 | 0 | double base_den = 1; |
142 | 0 | double base_reg = 1; |
143 | |
|
144 | 0 | for (int row = mi_row; row < mi_row + mi_high; row += mi_step) { |
145 | 0 | for (int col = mi_col; col < mi_col + mi_wide; col += mi_step) { |
146 | 0 | if (row >= cm->mi_params.mi_rows || col >= cm->mi_params.mi_cols) |
147 | 0 | continue; |
148 | | |
149 | 0 | WeberStats *weber_stats = |
150 | 0 | &cpi->mb_weber_stats[(row / mi_step) * mb_stride + (col / mi_step)]; |
151 | |
|
152 | 0 | base_num += ((double)weber_stats->distortion) * |
153 | 0 | sqrt((double)weber_stats->src_variance) * |
154 | 0 | weber_stats->rec_pix_max; |
155 | |
|
156 | 0 | base_den += fabs( |
157 | 0 | weber_stats->rec_pix_max * sqrt((double)weber_stats->src_variance) - |
158 | 0 | weber_stats->src_pix_max * sqrt((double)weber_stats->rec_variance)); |
159 | |
|
160 | 0 | base_reg += sqrt((double)weber_stats->distortion) * |
161 | 0 | sqrt((double)weber_stats->src_pix_max) * 0.1; |
162 | 0 | ++mb_count; |
163 | 0 | } |
164 | 0 | } |
165 | |
|
166 | 0 | sb_wiener_var = |
167 | 0 | (int)(((base_num + base_reg) / (base_den + base_reg)) / mb_count); |
168 | 0 | sb_wiener_var = AOMMAX(1, sb_wiener_var); |
169 | |
|
170 | 0 | return (int)sb_wiener_var; |
171 | 0 | } |
172 | | |
173 | | static int get_var_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize, |
174 | 0 | int mi_row, int mi_col) { |
175 | 0 | AV1_COMMON *const cm = &cpi->common; |
176 | 0 | const int mi_wide = mi_size_wide[bsize]; |
177 | 0 | const int mi_high = mi_size_high[bsize]; |
178 | |
|
179 | 0 | int sb_wiener_var = get_window_wiener_var(cpi, bsize, mi_row, mi_col); |
180 | |
|
181 | 0 | if (mi_row >= (mi_high / 2)) { |
182 | 0 | sb_wiener_var = |
183 | 0 | AOMMIN(sb_wiener_var, |
184 | 0 | get_window_wiener_var(cpi, bsize, mi_row - mi_high / 2, mi_col)); |
185 | 0 | } |
186 | 0 | if (mi_row <= (cm->mi_params.mi_rows - mi_high - (mi_high / 2))) { |
187 | 0 | sb_wiener_var = |
188 | 0 | AOMMIN(sb_wiener_var, |
189 | 0 | get_window_wiener_var(cpi, bsize, mi_row + mi_high / 2, mi_col)); |
190 | 0 | } |
191 | 0 | if (mi_col >= (mi_wide / 2)) { |
192 | 0 | sb_wiener_var = |
193 | 0 | AOMMIN(sb_wiener_var, |
194 | 0 | get_window_wiener_var(cpi, bsize, mi_row, mi_col - mi_wide / 2)); |
195 | 0 | } |
196 | 0 | if (mi_col <= (cm->mi_params.mi_cols - mi_wide - (mi_wide / 2))) { |
197 | 0 | sb_wiener_var = |
198 | 0 | AOMMIN(sb_wiener_var, |
199 | 0 | get_window_wiener_var(cpi, bsize, mi_row, mi_col + mi_wide / 2)); |
200 | 0 | } |
201 | |
|
202 | 0 | return sb_wiener_var; |
203 | 0 | } |
204 | | |
205 | | static double calc_src_mean_var(const uint8_t *const src_buffer, |
206 | | const int buf_stride, const int block_size, |
207 | 0 | const int use_hbd, double *mean) { |
208 | 0 | double src_mean = 0.0; |
209 | 0 | double src_variance = 0.0; |
210 | 0 | for (int pix_row = 0; pix_row < block_size; ++pix_row) { |
211 | 0 | for (int pix_col = 0; pix_col < block_size; ++pix_col) { |
212 | 0 | int src_pix; |
213 | 0 | if (use_hbd) { |
214 | 0 | const uint16_t *src = CONVERT_TO_SHORTPTR(src_buffer); |
215 | 0 | src_pix = src[pix_row * buf_stride + pix_col]; |
216 | 0 | } else { |
217 | 0 | src_pix = src_buffer[pix_row * buf_stride + pix_col]; |
218 | 0 | } |
219 | 0 | src_mean += src_pix; |
220 | 0 | src_variance += src_pix * src_pix; |
221 | 0 | } |
222 | 0 | } |
223 | 0 | const int pix_num = block_size * block_size; |
224 | 0 | src_variance -= (src_mean * src_mean) / pix_num; |
225 | 0 | src_variance /= pix_num; |
226 | 0 | *mean = src_mean / pix_num; |
227 | 0 | return src_variance; |
228 | 0 | } |
229 | | |
230 | | static BLOCK_SIZE pick_block_size(AV1_COMP *cpi, |
231 | 0 | const BLOCK_SIZE orig_block_size) { |
232 | 0 | const BLOCK_SIZE sub_block_size = |
233 | 0 | get_partition_subsize(orig_block_size, PARTITION_SPLIT); |
234 | 0 | const int mb_step = mi_size_wide[orig_block_size]; |
235 | 0 | const int sub_step = mb_step >> 1; |
236 | 0 | const TX_SIZE tx_size = max_txsize_lookup[orig_block_size]; |
237 | 0 | const int block_size = tx_size_wide[tx_size]; |
238 | 0 | const int split_block_size = block_size >> 1; |
239 | 0 | assert(split_block_size >= 8); |
240 | 0 | const uint8_t *const buffer = cpi->source->y_buffer; |
241 | 0 | const int buf_stride = cpi->source->y_stride; |
242 | 0 | const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH; |
243 | |
|
244 | 0 | double vote = 0.0; |
245 | 0 | int sb_count = 0; |
246 | 0 | for (int mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) { |
247 | 0 | for (int mi_col = 0; mi_col < cpi->frame_info.mi_cols; mi_col += mb_step) { |
248 | 0 | const uint8_t *mb_buffer = |
249 | 0 | buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE; |
250 | | // (1). Calculate mean and var using the original block size |
251 | 0 | double mean = 0.0; |
252 | 0 | const double orig_var = |
253 | 0 | calc_src_mean_var(mb_buffer, buf_stride, block_size, use_hbd, &mean); |
254 | | // (2). Calculate mean and var using the split block size |
255 | 0 | double split_var[4] = { 0 }; |
256 | 0 | double split_mean[4] = { 0 }; |
257 | 0 | int sub_idx = 0; |
258 | 0 | for (int row = mi_row; row < mi_row + mb_step; row += sub_step) { |
259 | 0 | for (int col = mi_col; col < mi_col + mb_step; col += sub_step) { |
260 | 0 | mb_buffer = buffer + row * MI_SIZE * buf_stride + col * MI_SIZE; |
261 | 0 | split_var[sub_idx] = |
262 | 0 | calc_src_mean_var(mb_buffer, buf_stride, split_block_size, |
263 | 0 | use_hbd, &split_mean[sub_idx]); |
264 | 0 | ++sub_idx; |
265 | 0 | } |
266 | 0 | } |
267 | | // (3). Determine whether to use the original or the split block size. |
268 | | // If use original, vote += 1.0. |
269 | | // If use split, vote -= 1.0. |
270 | 0 | double max_split_mean = 0.0; |
271 | 0 | double max_split_var = 0.0; |
272 | 0 | double geo_split_var = 0.0; |
273 | 0 | for (int i = 0; i < 4; ++i) { |
274 | 0 | max_split_mean = AOMMAX(max_split_mean, split_mean[i]); |
275 | 0 | max_split_var = AOMMAX(max_split_var, split_var[i]); |
276 | 0 | geo_split_var += log(split_var[i]); |
277 | 0 | } |
278 | 0 | geo_split_var = exp(geo_split_var / 4); |
279 | 0 | const double param_1 = 1.5; |
280 | 0 | const double param_2 = 1.0; |
281 | | // If the variance of the large block size is considerably larger than the |
282 | | // geometric mean of vars of small blocks; |
283 | | // Or if the variance of the large block size is larger than the local |
284 | | // variance; |
285 | | // Or if the variance of the large block size is considerably larger |
286 | | // than the mean. |
287 | | // It indicates that the source block is not a flat area, therefore we |
288 | | // might want to split into smaller block sizes to capture the |
289 | | // local characteristics. |
290 | 0 | if (orig_var > param_1 * geo_split_var || orig_var > max_split_var || |
291 | 0 | sqrt(orig_var) > param_2 * mean) { |
292 | 0 | vote -= 1.0; |
293 | 0 | } else { |
294 | 0 | vote += 1.0; |
295 | 0 | } |
296 | 0 | ++sb_count; |
297 | 0 | } |
298 | 0 | } |
299 | |
|
300 | 0 | return vote > 0.0 ? orig_block_size : sub_block_size; |
301 | 0 | } |
302 | | |
303 | | static int64_t pick_norm_factor_and_block_size(AV1_COMP *const cpi, |
304 | 0 | BLOCK_SIZE *best_block_size) { |
305 | 0 | const AV1_COMMON *const cm = &cpi->common; |
306 | 0 | const BLOCK_SIZE sb_size = cm->seq_params->sb_size; |
307 | 0 | BLOCK_SIZE last_block_size; |
308 | 0 | BLOCK_SIZE this_block_size = sb_size; |
309 | 0 | *best_block_size = sb_size; |
310 | | // Pick from block size 64x64, 32x32 and 16x16. |
311 | 0 | do { |
312 | 0 | last_block_size = this_block_size; |
313 | 0 | assert(this_block_size >= BLOCK_16X16 && this_block_size <= BLOCK_128X128); |
314 | 0 | const int block_size = block_size_wide[this_block_size]; |
315 | 0 | if (block_size < 32) break; |
316 | 0 | this_block_size = pick_block_size(cpi, last_block_size); |
317 | 0 | } while (this_block_size != last_block_size); |
318 | 0 | *best_block_size = this_block_size; |
319 | |
|
320 | 0 | int64_t norm_factor = 1; |
321 | 0 | const BLOCK_SIZE norm_block_size = this_block_size; |
322 | 0 | assert(norm_block_size >= BLOCK_16X16 && norm_block_size <= BLOCK_64X64); |
323 | 0 | const int norm_step = mi_size_wide[norm_block_size]; |
324 | 0 | double sb_wiener_log = 0; |
325 | 0 | double sb_count = 0; |
326 | 0 | for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) { |
327 | 0 | for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) { |
328 | 0 | const int sb_wiener_var = |
329 | 0 | get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col); |
330 | 0 | const int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col); |
331 | 0 | const int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col); |
332 | 0 | const double scaled_satd = (double)satd / sqrt((double)sse); |
333 | 0 | sb_wiener_log += scaled_satd * log(sb_wiener_var); |
334 | 0 | sb_count += scaled_satd; |
335 | 0 | } |
336 | 0 | } |
337 | 0 | if (sb_count > 0) norm_factor = (int64_t)(exp(sb_wiener_log / sb_count)); |
338 | 0 | norm_factor = AOMMAX(1, norm_factor); |
339 | |
|
340 | 0 | return norm_factor; |
341 | 0 | } |
342 | | |
343 | | static void automatic_intra_tools_off(AV1_COMP *cpi, |
344 | | const double sum_rec_distortion, |
345 | 0 | const double sum_est_rate) { |
346 | 0 | if (!cpi->oxcf.intra_mode_cfg.auto_intra_tools_off) return; |
347 | | |
348 | | // Thresholds |
349 | 0 | const int high_quality_qindex = 128; |
350 | 0 | const double high_quality_bpp = 2.0; |
351 | 0 | const double high_quality_dist_per_pix = 4.0; |
352 | |
|
353 | 0 | AV1_COMMON *const cm = &cpi->common; |
354 | 0 | const int qindex = cm->quant_params.base_qindex; |
355 | 0 | const double dist_per_pix = |
356 | 0 | (double)sum_rec_distortion / (cm->width * cm->height); |
357 | | // The estimate bpp is not accurate, an empirical constant 100 is divided. |
358 | 0 | const double estimate_bpp = sum_est_rate / (cm->width * cm->height * 100); |
359 | |
|
360 | 0 | if (qindex < high_quality_qindex && estimate_bpp > high_quality_bpp && |
361 | 0 | dist_per_pix < high_quality_dist_per_pix) { |
362 | 0 | cpi->oxcf.intra_mode_cfg.enable_smooth_intra = 0; |
363 | 0 | cpi->oxcf.intra_mode_cfg.enable_paeth_intra = 0; |
364 | 0 | cpi->oxcf.intra_mode_cfg.enable_cfl_intra = 0; |
365 | 0 | cpi->oxcf.intra_mode_cfg.enable_diagonal_intra = 0; |
366 | 0 | } |
367 | 0 | } |
368 | | |
369 | 0 | void av1_set_mb_wiener_variance(AV1_COMP *cpi) { |
370 | 0 | AV1_COMMON *const cm = &cpi->common; |
371 | 0 | uint8_t *buffer = cpi->source->y_buffer; |
372 | 0 | int buf_stride = cpi->source->y_stride; |
373 | 0 | ThreadData *td = &cpi->td; |
374 | 0 | MACROBLOCK *x = &td->mb; |
375 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
376 | 0 | MB_MODE_INFO mbmi; |
377 | 0 | memset(&mbmi, 0, sizeof(mbmi)); |
378 | 0 | MB_MODE_INFO *mbmi_ptr = &mbmi; |
379 | 0 | xd->mi = &mbmi_ptr; |
380 | 0 | xd->cur_buf = cpi->source; |
381 | |
|
382 | 0 | const SequenceHeader *const seq_params = cm->seq_params; |
383 | 0 | if (aom_realloc_frame_buffer( |
384 | 0 | &cm->cur_frame->buf, cm->width, cm->height, seq_params->subsampling_x, |
385 | 0 | seq_params->subsampling_y, seq_params->use_highbitdepth, |
386 | 0 | cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL, |
387 | 0 | NULL, cpi->oxcf.tool_cfg.enable_global_motion)) |
388 | 0 | aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR, |
389 | 0 | "Failed to allocate frame buffer"); |
390 | |
|
391 | 0 | cm->quant_params.base_qindex = cpi->oxcf.rc_cfg.cq_level; |
392 | 0 | av1_frame_init_quantizer(cpi); |
393 | |
|
394 | 0 | DECLARE_ALIGNED(32, int16_t, src_diff[32 * 32]); |
395 | 0 | DECLARE_ALIGNED(32, tran_low_t, coeff[32 * 32]); |
396 | 0 | DECLARE_ALIGNED(32, tran_low_t, qcoeff[32 * 32]); |
397 | 0 | DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]); |
398 | |
|
399 | 0 | int mi_row, mi_col; |
400 | |
|
401 | 0 | BLOCK_SIZE bsize = cpi->weber_bsize; |
402 | 0 | const TX_SIZE tx_size = max_txsize_lookup[bsize]; |
403 | 0 | const int block_size = tx_size_wide[tx_size]; |
404 | 0 | const int coeff_count = block_size * block_size; |
405 | |
|
406 | 0 | const BitDepthInfo bd_info = get_bit_depth_info(xd); |
407 | 0 | cpi->norm_wiener_variance = 0; |
408 | 0 | int mb_step = mi_size_wide[bsize]; |
409 | |
|
410 | 0 | double sum_rec_distortion = 0.0; |
411 | 0 | double sum_est_rate = 0.0; |
412 | 0 | for (mi_row = 0; mi_row < cpi->frame_info.mi_rows; mi_row += mb_step) { |
413 | 0 | for (mi_col = 0; mi_col < cpi->frame_info.mi_cols; mi_col += mb_step) { |
414 | 0 | PREDICTION_MODE best_mode = DC_PRED; |
415 | 0 | int best_intra_cost = INT_MAX; |
416 | |
|
417 | 0 | xd->up_available = mi_row > 0; |
418 | 0 | xd->left_available = mi_col > 0; |
419 | |
|
420 | 0 | const int mi_width = mi_size_wide[bsize]; |
421 | 0 | const int mi_height = mi_size_high[bsize]; |
422 | 0 | set_mode_info_offsets(&cpi->common.mi_params, &cpi->mbmi_ext_info, x, xd, |
423 | 0 | mi_row, mi_col); |
424 | 0 | set_mi_row_col(xd, &xd->tile, mi_row, mi_height, mi_col, mi_width, |
425 | 0 | cm->mi_params.mi_rows, cm->mi_params.mi_cols); |
426 | 0 | set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], |
427 | 0 | av1_num_planes(cm)); |
428 | 0 | xd->mi[0]->bsize = bsize; |
429 | 0 | xd->mi[0]->motion_mode = SIMPLE_TRANSLATION; |
430 | |
|
431 | 0 | av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, |
432 | 0 | mi_col, 0, av1_num_planes(cm)); |
433 | |
|
434 | 0 | int dst_buffer_stride = xd->plane[0].dst.stride; |
435 | 0 | uint8_t *dst_buffer = xd->plane[0].dst.buf; |
436 | 0 | uint8_t *mb_buffer = |
437 | 0 | buffer + mi_row * MI_SIZE * buf_stride + mi_col * MI_SIZE; |
438 | |
|
439 | 0 | for (PREDICTION_MODE mode = INTRA_MODE_START; mode < INTRA_MODE_END; |
440 | 0 | ++mode) { |
441 | 0 | av1_predict_intra_block( |
442 | 0 | xd, cm->seq_params->sb_size, |
443 | 0 | cm->seq_params->enable_intra_edge_filter, block_size, block_size, |
444 | 0 | tx_size, mode, 0, 0, FILTER_INTRA_MODES, dst_buffer, |
445 | 0 | dst_buffer_stride, dst_buffer, dst_buffer_stride, 0, 0, 0); |
446 | |
|
447 | 0 | av1_subtract_block(bd_info, block_size, block_size, src_diff, |
448 | 0 | block_size, mb_buffer, buf_stride, dst_buffer, |
449 | 0 | dst_buffer_stride); |
450 | 0 | av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff); |
451 | 0 | int intra_cost = aom_satd(coeff, coeff_count); |
452 | 0 | if (intra_cost < best_intra_cost) { |
453 | 0 | best_intra_cost = intra_cost; |
454 | 0 | best_mode = mode; |
455 | 0 | } |
456 | 0 | } |
457 | |
|
458 | 0 | int idx; |
459 | 0 | av1_predict_intra_block(xd, cm->seq_params->sb_size, |
460 | 0 | cm->seq_params->enable_intra_edge_filter, |
461 | 0 | block_size, block_size, tx_size, best_mode, 0, 0, |
462 | 0 | FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride, |
463 | 0 | dst_buffer, dst_buffer_stride, 0, 0, 0); |
464 | 0 | av1_subtract_block(bd_info, block_size, block_size, src_diff, block_size, |
465 | 0 | mb_buffer, buf_stride, dst_buffer, dst_buffer_stride); |
466 | 0 | av1_quick_txfm(0, tx_size, bd_info, src_diff, block_size, coeff); |
467 | |
|
468 | 0 | const struct macroblock_plane *const p = &x->plane[0]; |
469 | 0 | uint16_t eob; |
470 | 0 | const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT]; |
471 | 0 | QUANT_PARAM quant_param; |
472 | 0 | int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; |
473 | 0 | av1_setup_quant(tx_size, 0, AV1_XFORM_QUANT_FP, 0, &quant_param); |
474 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
475 | 0 | if (is_cur_buf_hbd(xd)) { |
476 | 0 | av1_highbd_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, |
477 | 0 | scan_order, &quant_param); |
478 | 0 | } else { |
479 | 0 | av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, |
480 | 0 | scan_order, &quant_param); |
481 | 0 | } |
482 | | #else |
483 | | av1_quantize_fp_facade(coeff, pix_num, p, qcoeff, dqcoeff, &eob, |
484 | | scan_order, &quant_param); |
485 | | #endif // CONFIG_AV1_HIGHBITDEPTH |
486 | 0 | av1_inverse_transform_block(xd, dqcoeff, 0, DCT_DCT, tx_size, dst_buffer, |
487 | 0 | dst_buffer_stride, eob, 0); |
488 | 0 | WeberStats *weber_stats = |
489 | 0 | &cpi->mb_weber_stats[(mi_row / mb_step) * cpi->frame_info.mi_cols + |
490 | 0 | (mi_col / mb_step)]; |
491 | |
|
492 | 0 | weber_stats->rec_pix_max = 1; |
493 | 0 | weber_stats->rec_variance = 0; |
494 | 0 | weber_stats->src_pix_max = 1; |
495 | 0 | weber_stats->src_variance = 0; |
496 | 0 | weber_stats->distortion = 0; |
497 | |
|
498 | 0 | int64_t src_mean = 0; |
499 | 0 | int64_t rec_mean = 0; |
500 | 0 | int64_t dist_mean = 0; |
501 | |
|
502 | 0 | for (int pix_row = 0; pix_row < block_size; ++pix_row) { |
503 | 0 | for (int pix_col = 0; pix_col < block_size; ++pix_col) { |
504 | 0 | int src_pix, rec_pix; |
505 | 0 | #if CONFIG_AV1_HIGHBITDEPTH |
506 | 0 | if (is_cur_buf_hbd(xd)) { |
507 | 0 | uint16_t *src = CONVERT_TO_SHORTPTR(mb_buffer); |
508 | 0 | uint16_t *rec = CONVERT_TO_SHORTPTR(dst_buffer); |
509 | 0 | src_pix = src[pix_row * buf_stride + pix_col]; |
510 | 0 | rec_pix = rec[pix_row * dst_buffer_stride + pix_col]; |
511 | 0 | } else { |
512 | 0 | src_pix = mb_buffer[pix_row * buf_stride + pix_col]; |
513 | 0 | rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col]; |
514 | 0 | } |
515 | | #else |
516 | | src_pix = mb_buffer[pix_row * buf_stride + pix_col]; |
517 | | rec_pix = dst_buffer[pix_row * dst_buffer_stride + pix_col]; |
518 | | #endif |
519 | 0 | src_mean += src_pix; |
520 | 0 | rec_mean += rec_pix; |
521 | 0 | dist_mean += src_pix - rec_pix; |
522 | 0 | weber_stats->src_variance += src_pix * src_pix; |
523 | 0 | weber_stats->rec_variance += rec_pix * rec_pix; |
524 | 0 | weber_stats->src_pix_max = AOMMAX(weber_stats->src_pix_max, src_pix); |
525 | 0 | weber_stats->rec_pix_max = AOMMAX(weber_stats->rec_pix_max, rec_pix); |
526 | 0 | weber_stats->distortion += (src_pix - rec_pix) * (src_pix - rec_pix); |
527 | 0 | } |
528 | 0 | } |
529 | |
|
530 | 0 | sum_rec_distortion += weber_stats->distortion; |
531 | 0 | int est_block_rate = 0; |
532 | 0 | int64_t est_block_dist = 0; |
533 | 0 | model_rd_sse_fn[MODELRD_LEGACY](cpi, x, bsize, 0, weber_stats->distortion, |
534 | 0 | pix_num, &est_block_rate, |
535 | 0 | &est_block_dist); |
536 | 0 | sum_est_rate += est_block_rate; |
537 | |
|
538 | 0 | weber_stats->src_variance -= (src_mean * src_mean) / pix_num; |
539 | 0 | weber_stats->rec_variance -= (rec_mean * rec_mean) / pix_num; |
540 | 0 | weber_stats->distortion -= (dist_mean * dist_mean) / pix_num; |
541 | 0 | weber_stats->satd = best_intra_cost; |
542 | |
|
543 | 0 | qcoeff[0] = 0; |
544 | 0 | for (idx = 1; idx < coeff_count; ++idx) qcoeff[idx] = abs(qcoeff[idx]); |
545 | 0 | qsort(qcoeff, coeff_count, sizeof(*coeff), qsort_comp); |
546 | |
|
547 | 0 | weber_stats->max_scale = (double)qcoeff[coeff_count - 1]; |
548 | 0 | } |
549 | 0 | } |
550 | | |
551 | | // Determine whether to turn off several intra coding tools. |
552 | 0 | automatic_intra_tools_off(cpi, sum_rec_distortion, sum_est_rate); |
553 | |
|
554 | 0 | BLOCK_SIZE norm_block_size = BLOCK_16X16; |
555 | 0 | cpi->norm_wiener_variance = |
556 | 0 | pick_norm_factor_and_block_size(cpi, &norm_block_size); |
557 | 0 | const int norm_step = mi_size_wide[norm_block_size]; |
558 | |
|
559 | 0 | double sb_wiener_log = 0; |
560 | 0 | double sb_count = 0; |
561 | 0 | for (int its_cnt = 0; its_cnt < 2; ++its_cnt) { |
562 | 0 | sb_wiener_log = 0; |
563 | 0 | sb_count = 0; |
564 | 0 | for (mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += norm_step) { |
565 | 0 | for (mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += norm_step) { |
566 | 0 | int sb_wiener_var = |
567 | 0 | get_var_perceptual_ai(cpi, norm_block_size, mi_row, mi_col); |
568 | |
|
569 | 0 | double beta = (double)cpi->norm_wiener_variance / sb_wiener_var; |
570 | 0 | double min_max_scale = AOMMAX( |
571 | 0 | 1.0, get_max_scale(cpi, cm->seq_params->sb_size, mi_row, mi_col)); |
572 | 0 | beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale); |
573 | 0 | beta = AOMMIN(beta, 4); |
574 | 0 | beta = AOMMAX(beta, 0.25); |
575 | |
|
576 | 0 | sb_wiener_var = (int)(cpi->norm_wiener_variance / beta); |
577 | |
|
578 | 0 | int64_t satd = get_satd(cpi, norm_block_size, mi_row, mi_col); |
579 | 0 | int64_t sse = get_sse(cpi, norm_block_size, mi_row, mi_col); |
580 | 0 | double scaled_satd = (double)satd / sqrt((double)sse); |
581 | 0 | sb_wiener_log += scaled_satd * log(sb_wiener_var); |
582 | 0 | sb_count += scaled_satd; |
583 | 0 | } |
584 | 0 | } |
585 | |
|
586 | 0 | if (sb_count > 0) |
587 | 0 | cpi->norm_wiener_variance = (int64_t)(exp(sb_wiener_log / sb_count)); |
588 | 0 | cpi->norm_wiener_variance = AOMMAX(1, cpi->norm_wiener_variance); |
589 | 0 | } |
590 | |
|
591 | 0 | aom_free_frame_buffer(&cm->cur_frame->buf); |
592 | 0 | } |
593 | | |
594 | | int av1_get_sbq_perceptual_ai(AV1_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, |
595 | 0 | int mi_col) { |
596 | 0 | AV1_COMMON *const cm = &cpi->common; |
597 | 0 | const int base_qindex = cm->quant_params.base_qindex; |
598 | 0 | int sb_wiener_var = get_var_perceptual_ai(cpi, bsize, mi_row, mi_col); |
599 | 0 | int offset = 0; |
600 | 0 | double beta = (double)cpi->norm_wiener_variance / sb_wiener_var; |
601 | 0 | double min_max_scale = AOMMAX(1.0, get_max_scale(cpi, bsize, mi_row, mi_col)); |
602 | 0 | beta = 1.0 / AOMMIN(1.0 / beta, min_max_scale); |
603 | | |
604 | | // Cap beta such that the delta q value is not much far away from the base q. |
605 | 0 | beta = AOMMIN(beta, 4); |
606 | 0 | beta = AOMMAX(beta, 0.25); |
607 | 0 | offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta); |
608 | 0 | const DeltaQInfo *const delta_q_info = &cm->delta_q_info; |
609 | 0 | offset = AOMMIN(offset, delta_q_info->delta_q_res * 20 - 1); |
610 | 0 | offset = AOMMAX(offset, -delta_q_info->delta_q_res * 20 + 1); |
611 | 0 | int qindex = cm->quant_params.base_qindex + offset; |
612 | 0 | qindex = AOMMIN(qindex, MAXQ); |
613 | 0 | qindex = AOMMAX(qindex, MINQ); |
614 | 0 | if (base_qindex > MINQ) qindex = AOMMAX(qindex, MINQ + 1); |
615 | |
|
616 | 0 | return qindex; |
617 | 0 | } |
618 | | |
619 | 0 | void av1_init_mb_ur_var_buffer(AV1_COMP *cpi) { |
620 | 0 | AV1_COMMON *cm = &cpi->common; |
621 | |
|
622 | 0 | if (cpi->mb_delta_q) return; |
623 | | |
624 | 0 | CHECK_MEM_ERROR(cm, cpi->mb_delta_q, |
625 | 0 | aom_calloc(cpi->frame_info.mb_rows * cpi->frame_info.mb_cols, |
626 | 0 | sizeof(*cpi->mb_delta_q))); |
627 | 0 | } |
628 | | |
629 | | #if CONFIG_TFLITE |
630 | | static int model_predict(BLOCK_SIZE block_size, int num_cols, int num_rows, |
631 | | uint8_t *y_buffer, int y_stride, float *predicts) { |
632 | | // Create the model and interpreter options. |
633 | | TfLiteModel *model = |
634 | | TfLiteModelCreate(av1_deltaq4_model_file, av1_deltaq4_model_fsize); |
635 | | if (model == NULL) return 1; |
636 | | |
637 | | TfLiteInterpreterOptions *options = TfLiteInterpreterOptionsCreate(); |
638 | | TfLiteInterpreterOptionsSetNumThreads(options, 2); |
639 | | if (options == NULL) { |
640 | | TfLiteModelDelete(model); |
641 | | return 1; |
642 | | } |
643 | | |
644 | | // Create the interpreter. |
645 | | TfLiteInterpreter *interpreter = TfLiteInterpreterCreate(model, options); |
646 | | if (interpreter == NULL) { |
647 | | TfLiteInterpreterOptionsDelete(options); |
648 | | TfLiteModelDelete(model); |
649 | | return 1; |
650 | | } |
651 | | |
652 | | // Allocate tensors and populate the input tensor data. |
653 | | TfLiteInterpreterAllocateTensors(interpreter); |
654 | | TfLiteTensor *input_tensor = TfLiteInterpreterGetInputTensor(interpreter, 0); |
655 | | if (input_tensor == NULL) { |
656 | | TfLiteInterpreterDelete(interpreter); |
657 | | TfLiteInterpreterOptionsDelete(options); |
658 | | TfLiteModelDelete(model); |
659 | | return 1; |
660 | | } |
661 | | |
662 | | struct aom_internal_error_info error; |
663 | | size_t input_size = TfLiteTensorByteSize(input_tensor); |
664 | | float *input_data; |
665 | | AOM_CHECK_MEM_ERROR(&error, input_data, aom_calloc(input_size, 1)); |
666 | | |
667 | | const int num_mi_w = mi_size_wide[block_size]; |
668 | | const int num_mi_h = mi_size_high[block_size]; |
669 | | for (int row = 0; row < num_rows; ++row) { |
670 | | for (int col = 0; col < num_cols; ++col) { |
671 | | const int row_offset = (row * num_mi_h) << 2; |
672 | | const int col_offset = (col * num_mi_w) << 2; |
673 | | |
674 | | uint8_t *buf = y_buffer + row_offset * y_stride + col_offset; |
675 | | int r = row_offset, pos = 0; |
676 | | while (r < row_offset + (num_mi_h << 2)) { |
677 | | for (int c = 0; c < (num_mi_w << 2); ++c) { |
678 | | input_data[pos++] = (float)*(buf + c) / 255.0f; |
679 | | } |
680 | | buf += y_stride; |
681 | | ++r; |
682 | | } |
683 | | TfLiteTensorCopyFromBuffer(input_tensor, input_data, input_size); |
684 | | |
685 | | // Execute inference. |
686 | | if (TfLiteInterpreterInvoke(interpreter) != kTfLiteOk) { |
687 | | TfLiteInterpreterDelete(interpreter); |
688 | | TfLiteInterpreterOptionsDelete(options); |
689 | | TfLiteModelDelete(model); |
690 | | return 1; |
691 | | } |
692 | | |
693 | | // Extract the output tensor data. |
694 | | const TfLiteTensor *output_tensor = |
695 | | TfLiteInterpreterGetOutputTensor(interpreter, 0); |
696 | | if (output_tensor == NULL) { |
697 | | TfLiteInterpreterDelete(interpreter); |
698 | | TfLiteInterpreterOptionsDelete(options); |
699 | | TfLiteModelDelete(model); |
700 | | return 1; |
701 | | } |
702 | | |
703 | | size_t output_size = TfLiteTensorByteSize(output_tensor); |
704 | | float output_data; |
705 | | |
706 | | TfLiteTensorCopyToBuffer(output_tensor, &output_data, output_size); |
707 | | predicts[row * num_cols + col] = output_data; |
708 | | } |
709 | | } |
710 | | |
711 | | // Dispose of the model and interpreter objects. |
712 | | TfLiteInterpreterDelete(interpreter); |
713 | | TfLiteInterpreterOptionsDelete(options); |
714 | | TfLiteModelDelete(model); |
715 | | aom_free(input_data); |
716 | | return 0; |
717 | | } |
718 | | |
719 | | void av1_set_mb_ur_variance(AV1_COMP *cpi) { |
720 | | const AV1_COMMON *cm = &cpi->common; |
721 | | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
722 | | uint8_t *y_buffer = cpi->source->y_buffer; |
723 | | const int y_stride = cpi->source->y_stride; |
724 | | const int block_size = cpi->common.seq_params->sb_size; |
725 | | |
726 | | const int num_mi_w = mi_size_wide[block_size]; |
727 | | const int num_mi_h = mi_size_high[block_size]; |
728 | | const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; |
729 | | const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h; |
730 | | const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH; |
731 | | |
732 | | // TODO(sdeng): add highbitdepth support. |
733 | | (void)use_hbd; |
734 | | |
735 | | float *mb_delta_q, delta_q_avg = 0.0f; |
736 | | CHECK_MEM_ERROR(cm, mb_delta_q, |
737 | | aom_calloc(num_rows * num_cols, sizeof(float))); |
738 | | |
739 | | // TODO(sdeng): train the model at a different quality level. |
740 | | if (model_predict(block_size, num_cols, num_rows, y_buffer, y_stride, |
741 | | mb_delta_q)) { |
742 | | aom_internal_error(cm->error, AOM_CODEC_ERROR, |
743 | | "Failed to call TFlite functions."); |
744 | | } |
745 | | |
746 | | // Loop through each SB block. |
747 | | for (int row = 0; row < num_rows; ++row) { |
748 | | for (int col = 0; col < num_cols; ++col) { |
749 | | const int index = row * num_cols + col; |
750 | | delta_q_avg += mb_delta_q[index]; |
751 | | } |
752 | | } |
753 | | |
754 | | delta_q_avg /= (float)(num_rows * num_cols); |
755 | | |
756 | | // Approximates the model change between current version (Spet 2021) and the |
757 | | // baseline (July 2021). |
758 | | const float model_change = 3.0f * 4.0f / (float)MAXQ; |
759 | | delta_q_avg += model_change; |
760 | | |
761 | | float scaling_factor; |
762 | | const float cq_level = (float)cpi->oxcf.rc_cfg.cq_level / (float)MAXQ; |
763 | | if (cq_level < delta_q_avg) { |
764 | | scaling_factor = cq_level / delta_q_avg; |
765 | | } else { |
766 | | scaling_factor = 1.0f - (cq_level - delta_q_avg) / (1.0f - delta_q_avg); |
767 | | } |
768 | | delta_q_avg -= model_change; |
769 | | |
770 | | for (int row = 0; row < num_rows; ++row) { |
771 | | for (int col = 0; col < num_cols; ++col) { |
772 | | const int index = row * num_cols + col; |
773 | | cpi->mb_delta_q[index] = |
774 | | RINT((float)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * (float)MAXQ * |
775 | | scaling_factor * (mb_delta_q[index] - delta_q_avg)); |
776 | | } |
777 | | } |
778 | | |
779 | | aom_free(mb_delta_q); |
780 | | } |
781 | | #else // !CONFIG_TFLITE |
782 | 0 | void av1_set_mb_ur_variance(AV1_COMP *cpi) { |
783 | 0 | const AV1_COMMON *cm = &cpi->common; |
784 | 0 | const CommonModeInfoParams *const mi_params = &cm->mi_params; |
785 | 0 | ThreadData *td = &cpi->td; |
786 | 0 | MACROBLOCK *x = &td->mb; |
787 | 0 | MACROBLOCKD *xd = &x->e_mbd; |
788 | 0 | uint8_t *y_buffer = cpi->source->y_buffer; |
789 | 0 | const int y_stride = cpi->source->y_stride; |
790 | 0 | const int block_size = cpi->common.seq_params->sb_size; |
791 | |
|
792 | 0 | const int num_mi_w = mi_size_wide[block_size]; |
793 | 0 | const int num_mi_h = mi_size_high[block_size]; |
794 | 0 | const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; |
795 | 0 | const int num_rows = (mi_params->mi_rows + num_mi_h - 1) / num_mi_h; |
796 | 0 | const int use_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH; |
797 | |
|
798 | 0 | int *mb_delta_q[2]; |
799 | 0 | CHECK_MEM_ERROR(cm, mb_delta_q[0], |
800 | 0 | aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[0]))); |
801 | 0 | CHECK_MEM_ERROR(cm, mb_delta_q[1], |
802 | 0 | aom_calloc(num_rows * num_cols, sizeof(*mb_delta_q[1]))); |
803 | | |
804 | | // Approximates the model change between current version (Spet 2021) and the |
805 | | // baseline (July 2021). |
806 | 0 | const double model_change[] = { 3.0, 3.0 }; |
807 | | // The following parameters are fitted from user labeled data. |
808 | 0 | const double a[] = { -24.50 * 4.0, -17.20 * 4.0 }; |
809 | 0 | const double b[] = { 0.004898, 0.003093 }; |
810 | 0 | const double c[] = { (29.932 + model_change[0]) * 4.0, |
811 | 0 | (42.100 + model_change[1]) * 4.0 }; |
812 | 0 | int delta_q_avg[2] = { 0, 0 }; |
813 | | // Loop through each SB block. |
814 | 0 | for (int row = 0; row < num_rows; ++row) { |
815 | 0 | for (int col = 0; col < num_cols; ++col) { |
816 | 0 | double var = 0.0, num_of_var = 0.0; |
817 | 0 | const int index = row * num_cols + col; |
818 | | |
819 | | // Loop through each 8x8 block. |
820 | 0 | for (int mi_row = row * num_mi_h; |
821 | 0 | mi_row < mi_params->mi_rows && mi_row < (row + 1) * num_mi_h; |
822 | 0 | mi_row += 2) { |
823 | 0 | for (int mi_col = col * num_mi_w; |
824 | 0 | mi_col < mi_params->mi_cols && mi_col < (col + 1) * num_mi_w; |
825 | 0 | mi_col += 2) { |
826 | 0 | struct buf_2d buf; |
827 | 0 | const int row_offset_y = mi_row << 2; |
828 | 0 | const int col_offset_y = mi_col << 2; |
829 | |
|
830 | 0 | buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y; |
831 | 0 | buf.stride = y_stride; |
832 | |
|
833 | 0 | unsigned int block_variance; |
834 | 0 | if (use_hbd) { |
835 | 0 | block_variance = av1_high_get_sby_perpixel_variance( |
836 | 0 | cpi, &buf, BLOCK_8X8, xd->bd); |
837 | 0 | } else { |
838 | 0 | block_variance = |
839 | 0 | av1_get_sby_perpixel_variance(cpi, &buf, BLOCK_8X8); |
840 | 0 | } |
841 | |
|
842 | 0 | block_variance = AOMMAX(block_variance, 1); |
843 | 0 | var += log((double)block_variance); |
844 | 0 | num_of_var += 1.0; |
845 | 0 | } |
846 | 0 | } |
847 | 0 | var = exp(var / num_of_var); |
848 | 0 | mb_delta_q[0][index] = RINT(a[0] * exp(-b[0] * var) + c[0]); |
849 | 0 | mb_delta_q[1][index] = RINT(a[1] * exp(-b[1] * var) + c[1]); |
850 | 0 | delta_q_avg[0] += mb_delta_q[0][index]; |
851 | 0 | delta_q_avg[1] += mb_delta_q[1][index]; |
852 | 0 | } |
853 | 0 | } |
854 | |
|
855 | 0 | delta_q_avg[0] = RINT((double)delta_q_avg[0] / (num_rows * num_cols)); |
856 | 0 | delta_q_avg[1] = RINT((double)delta_q_avg[1] / (num_rows * num_cols)); |
857 | |
|
858 | 0 | int model_idx; |
859 | 0 | double scaling_factor; |
860 | 0 | const int cq_level = cpi->oxcf.rc_cfg.cq_level; |
861 | 0 | if (cq_level < delta_q_avg[0]) { |
862 | 0 | model_idx = 0; |
863 | 0 | scaling_factor = (double)cq_level / delta_q_avg[0]; |
864 | 0 | } else if (cq_level < delta_q_avg[1]) { |
865 | 0 | model_idx = 2; |
866 | 0 | scaling_factor = |
867 | 0 | (double)(cq_level - delta_q_avg[0]) / (delta_q_avg[1] - delta_q_avg[0]); |
868 | 0 | } else { |
869 | 0 | model_idx = 1; |
870 | 0 | scaling_factor = (double)(MAXQ - cq_level) / (MAXQ - delta_q_avg[1]); |
871 | 0 | } |
872 | |
|
873 | 0 | const double new_delta_q_avg = |
874 | 0 | delta_q_avg[0] + scaling_factor * (delta_q_avg[1] - delta_q_avg[0]); |
875 | 0 | for (int row = 0; row < num_rows; ++row) { |
876 | 0 | for (int col = 0; col < num_cols; ++col) { |
877 | 0 | const int index = row * num_cols + col; |
878 | 0 | if (model_idx == 2) { |
879 | 0 | const double delta_q = |
880 | 0 | mb_delta_q[0][index] + |
881 | 0 | scaling_factor * (mb_delta_q[1][index] - mb_delta_q[0][index]); |
882 | 0 | cpi->mb_delta_q[index] = RINT((double)cpi->oxcf.q_cfg.deltaq_strength / |
883 | 0 | 100.0 * (delta_q - new_delta_q_avg)); |
884 | 0 | } else { |
885 | 0 | cpi->mb_delta_q[index] = RINT( |
886 | 0 | (double)cpi->oxcf.q_cfg.deltaq_strength / 100.0 * scaling_factor * |
887 | 0 | (mb_delta_q[model_idx][index] - delta_q_avg[model_idx])); |
888 | 0 | } |
889 | 0 | } |
890 | 0 | } |
891 | |
|
892 | 0 | aom_free(mb_delta_q[0]); |
893 | 0 | aom_free(mb_delta_q[1]); |
894 | 0 | } |
895 | | #endif // CONFIG_TFLITE |
896 | | |
897 | 0 | int av1_get_sbq_user_rating_based(AV1_COMP *const cpi, int mi_row, int mi_col) { |
898 | 0 | const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size; |
899 | 0 | const CommonModeInfoParams *const mi_params = &cpi->common.mi_params; |
900 | 0 | AV1_COMMON *const cm = &cpi->common; |
901 | 0 | const int base_qindex = cm->quant_params.base_qindex; |
902 | 0 | if (base_qindex == MINQ || base_qindex == MAXQ) return base_qindex; |
903 | | |
904 | 0 | const int num_mi_w = mi_size_wide[bsize]; |
905 | 0 | const int num_mi_h = mi_size_high[bsize]; |
906 | 0 | const int num_cols = (mi_params->mi_cols + num_mi_w - 1) / num_mi_w; |
907 | 0 | const int index = (mi_row / num_mi_h) * num_cols + (mi_col / num_mi_w); |
908 | 0 | const int delta_q = cpi->mb_delta_q[index]; |
909 | |
|
910 | 0 | int qindex = base_qindex + delta_q; |
911 | 0 | qindex = AOMMIN(qindex, MAXQ); |
912 | 0 | qindex = AOMMAX(qindex, MINQ + 1); |
913 | |
|
914 | 0 | return qindex; |
915 | 0 | } |