/work/svt-av1/Source/Lib/Codec/rc_aq.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2026 Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include "pcs.h" |
13 | | #include "sequence_control_set.h" |
14 | | #include "inv_transforms.h" |
15 | | #include "me_context.h" |
16 | | #include "utility.h" |
17 | | |
18 | | #include "rc_process.h" |
19 | | #include "resize.h" |
20 | | |
21 | | // These functions use formulaic calculations to make playing with the |
22 | | // quantizer tables easier. If necessary they can be replaced by lookup |
23 | | // tables if and when things settle down in the experimental Bitstream |
24 | 0 | int32_t svt_av1_convert_qindex_to_q_fp8(int32_t qindex, EbBitDepth bit_depth) { |
25 | | // Convert the index to a real Q value (scaled down to match old Q values) |
26 | 0 | switch (bit_depth) { |
27 | 0 | case EB_EIGHT_BIT: |
28 | 0 | return svt_aom_ac_quant_qtx(qindex, 0, bit_depth) << 6; // / 4.0; |
29 | 0 | case EB_TEN_BIT: |
30 | 0 | return svt_aom_ac_quant_qtx(qindex, 0, bit_depth) << 4; // / 16.0; |
31 | 0 | case EB_TWELVE_BIT: |
32 | 0 | return svt_aom_ac_quant_qtx(qindex, 0, bit_depth) << 3; // / 64.0; |
33 | 0 | default: |
34 | 0 | assert(0 && "bit_depth should be EB_EIGHT_BIT, EB_TEN_BIT or EB_TWELVE_BIT"); |
35 | 0 | return -1; |
36 | 0 | } |
37 | 0 | } |
38 | | |
39 | 0 | int32_t svt_av1_compute_qdelta_fp(int32_t qstart_fp8, int32_t qtarget_fp8, EbBitDepth bit_depth) { |
40 | 0 | int32_t start_index = MAXQ; |
41 | 0 | int32_t target_index = MAXQ; |
42 | 0 | int32_t i; |
43 | | |
44 | | // Convert the average q value to an index. |
45 | 0 | for (i = MINQ; i < MAXQ; ++i) { |
46 | 0 | start_index = i; |
47 | 0 | if (svt_av1_convert_qindex_to_q_fp8(i, bit_depth) >= qstart_fp8) { |
48 | 0 | break; |
49 | 0 | } |
50 | 0 | } |
51 | | |
52 | | // Convert the q target to an index |
53 | 0 | for (i = MINQ; i < MAXQ; ++i) { |
54 | 0 | target_index = i; |
55 | 0 | if (svt_av1_convert_qindex_to_q_fp8(i, bit_depth) >= qtarget_fp8) { |
56 | 0 | break; |
57 | 0 | } |
58 | 0 | } |
59 | |
|
60 | 0 | return target_index - start_index; |
61 | 0 | } |
62 | | |
63 | 0 | int variance_comp_int(const void* a, const void* b) { |
64 | 0 | return (int)*(uint16_t*)a - *(uint16_t*)b; |
65 | 0 | } |
66 | | |
67 | | #define VAR_BOOST_MAX_DELTAQ_RANGE 80 |
68 | | #define VAR_BOOST_MAX_QSTEP_RATIO_BOOST 8 |
69 | | |
70 | 0 | #define SUPERBLOCK_SIZE 64 |
71 | 0 | #define SUBBLOCK_SIZE 8 |
72 | 0 | #define SUBBLOCKS_IN_SB_DIM (SUPERBLOCK_SIZE / SUBBLOCK_SIZE) |
73 | 0 | #define SUBBLOCKS_IN_SB (SUBBLOCKS_IN_SB_DIM * SUBBLOCKS_IN_SB_DIM) |
74 | 0 | #define SUBBLOCKS_IN_OCTILE (SUBBLOCKS_IN_SB / 8) |
75 | | |
76 | | static int av1_get_deltaq_sb_variance_boost(uint8_t base_q_idx, uint16_t* variances, uint8_t strength, |
77 | 0 | EbBitDepth bit_depth, uint8_t octile, uint8_t curve) { |
78 | | // boost q_index based on empirical visual testing, strength 2 |
79 | | // variance qstep_ratio boost (@ base_q_idx 255) |
80 | | // 256 1 |
81 | | // 64 1.481 |
82 | | // 16 2.192 |
83 | | // 4 3.246 |
84 | | // 1 4.806 |
85 | | |
86 | | // copy sb 8x8 variance values to an array for ordering |
87 | 0 | uint16_t ordered_variances[64]; |
88 | 0 | memcpy(&ordered_variances, variances + ME_TIER_ZERO_PU_8x8_0, sizeof(uint16_t) * 64); |
89 | 0 | qsort(&ordered_variances, 64, sizeof(uint16_t), variance_comp_int); |
90 | | |
91 | | // Sample three 8x8 variance values: at the specified octile, previous octile, |
92 | | // and next octile. Make sure we use the last subblock in each octile as the |
93 | | // representative of the octile. |
94 | 0 | assert(octile >= 1 && octile <= 8); |
95 | 0 | int mid_idx = octile * SUBBLOCKS_IN_OCTILE - 1; |
96 | 0 | int low_idx = AOMMAX(SUBBLOCKS_IN_OCTILE - 1, mid_idx - SUBBLOCKS_IN_OCTILE); |
97 | 0 | int upp_idx = AOMMIN(SUBBLOCKS_IN_SB - 1, mid_idx + SUBBLOCKS_IN_OCTILE); |
98 | | |
99 | | // Weigh the three variances in a 1:2:1 ratio, with rounding (the +2 term). |
100 | | // This allows for smoother delta-q transitions among superblocks with |
101 | | // mixed-variance features. |
102 | 0 | int variance = (ordered_variances[low_idx] + ordered_variances[mid_idx] * 2 + ordered_variances[upp_idx] + 2) / 4; |
103 | |
|
104 | | #if DEBUG_VAR_BOOST |
105 | | SVT_INFO("64x64 variance: %d\n", variances[ME_TIER_ZERO_PU_64x64]); |
106 | | SVT_INFO("8x8 min %d, 1st oct %d, median %d, max %d\n", |
107 | | ordered_variances[0], |
108 | | ordered_variances[7], |
109 | | ordered_variances[31], |
110 | | ordered_variances[63]); |
111 | | SVT_INFO("8x8 variances\n"); |
112 | | uint16_t* variances_row = variances + ME_TIER_ZERO_PU_8x8_0; |
113 | | |
114 | | for (int row = 0; row < 8; row++) { |
115 | | SVT_INFO("%5d %5d %5d %5d %5d %5d %5d %5d\n", |
116 | | variances_row[0], |
117 | | variances_row[1], |
118 | | variances_row[2], |
119 | | variances_row[3], |
120 | | variances_row[4], |
121 | | variances_row[5], |
122 | | variances_row[6], |
123 | | variances_row[7]); |
124 | | variances_row += 8; |
125 | | } |
126 | | #endif |
127 | | |
128 | | // variance = 0 areas are either completely flat patches or very fine gradients |
129 | | // SVT-AV1 doesn't have enough resolution to tell them apart, so let's assume they're not flat and boost them |
130 | 0 | if (variance == 0) { |
131 | 0 | variance = 1; |
132 | 0 | } |
133 | | |
134 | | // compute a boost based on a fast-growing formula |
135 | | // high and medium variance sbs essentially get no boost, while increasingly lower variance sbs get stronger boosts |
136 | 0 | assert(strength >= 1 && strength <= 4); |
137 | 0 | double qstep_ratio = 0; |
138 | 0 | static const double strengths[] = {0, 0.65, 1.1, 1.6, 2.5}; |
139 | |
|
140 | 0 | switch (curve) { |
141 | 0 | case 1: /* 1: low-medium contrast boosting curve */ |
142 | 0 | qstep_ratio = 0.25 * strength * (-log2((double)variance) + 8) + 1; |
143 | 0 | break; |
144 | 0 | case 2: /* 2: still picture curve, tuned for SSIMULACRA2 performance on CID22 */ |
145 | 0 | qstep_ratio = 0.15 * strength * (-log2((double)variance) + 10) + 1; |
146 | 0 | break; |
147 | 0 | default: /* 0: default q step ratio curve */ |
148 | 0 | qstep_ratio = pow(1.018, strengths[strength] * (-10 * log2((double)variance) + 80)); |
149 | 0 | break; |
150 | 0 | } |
151 | 0 | qstep_ratio = CLIP3(1, VAR_BOOST_MAX_QSTEP_RATIO_BOOST, qstep_ratio); |
152 | |
|
153 | 0 | int32_t base_q = svt_av1_convert_qindex_to_q_fp8(base_q_idx, bit_depth); |
154 | 0 | int32_t target_q = (int32_t)(base_q / qstep_ratio); |
155 | 0 | int32_t boost = 0; |
156 | |
|
157 | 0 | switch (curve) { |
158 | 0 | case 2: /* still picture boost, tuned for SSIMULACRA2 performance on CID22 */ |
159 | 0 | boost = (int32_t)((base_q_idx + 544) * -svt_av1_compute_qdelta_fp(base_q, target_q, bit_depth) / (255 + 1024)); |
160 | 0 | break; |
161 | 0 | default: /* curve 0 & 1 boost (default) */ |
162 | 0 | boost = (int32_t)((base_q_idx + 40) * -svt_av1_compute_qdelta_fp(base_q, target_q, bit_depth) / (255 + 40)); |
163 | 0 | break; |
164 | 0 | } |
165 | 0 | boost = AOMMIN(VAR_BOOST_MAX_DELTAQ_RANGE, boost); |
166 | |
|
167 | | #if DEBUG_VAR_BOOST |
168 | | SVT_INFO("Variance: %d, Strength: %d, Q-step ratio: %f, Boost: %d, Base q: %d, Target q: %d\n", |
169 | | variance, |
170 | | strength, |
171 | | qstep_ratio, |
172 | | boost, |
173 | | base_q, |
174 | | target_q); |
175 | | #endif |
176 | |
|
177 | 0 | return boost; |
178 | 0 | } |
179 | | |
180 | 0 | void svt_av1_variance_adjust_qp(PictureControlSet* pcs) { |
181 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
182 | 0 | SequenceControlSet* scs = ppcs->scs; |
183 | |
|
184 | 0 | ppcs->frm_hdr.delta_q_params.delta_q_present = 1; |
185 | | |
186 | | // super res pictures scaled with different sb count, should use sb_total_count for each picture |
187 | 0 | uint16_t sb_cnt = scs->sb_total_count; |
188 | 0 | if (ppcs->frame_superres_enabled || ppcs->frame_resize_enabled) { |
189 | 0 | sb_cnt = ppcs->b64_total_count; |
190 | 0 | } |
191 | |
|
192 | 0 | uint8_t min_qindex = MAXQ; |
193 | 0 | uint8_t max_qindex = MINQ; |
194 | |
|
195 | | #if DEBUG_VAR_BOOST_STATS |
196 | | SVT_DEBUG("TPL/CQP SB qindex, frame %llu, temp. level %i\n", pcs->picture_number, pcs->temporal_layer_index); |
197 | | |
198 | | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
199 | | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
200 | | |
201 | | SVT_DEBUG("%4d ", sb_ptr->qindex); |
202 | | |
203 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
204 | | SVT_DEBUG("\n"); |
205 | | } |
206 | | } |
207 | | SVT_DEBUG("VAQ qindex boost, frame %llu, temp. level %i\n", pcs->picture_number, pcs->temporal_layer_index); |
208 | | #endif |
209 | 0 | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
210 | 0 | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
211 | | |
212 | | // adjust deltaq based on sb variance, with lower variance resulting in a lower qindex |
213 | 0 | int boost = av1_get_deltaq_sb_variance_boost(ppcs->frm_hdr.quantization_params.base_q_idx, |
214 | 0 | ppcs->variance[sb_addr], |
215 | 0 | scs->static_config.variance_boost_strength, |
216 | 0 | scs->static_config.encoder_bit_depth, |
217 | 0 | scs->static_config.variance_octile, |
218 | 0 | scs->static_config.variance_boost_curve); |
219 | | #if DEBUG_VAR_BOOST_STATS |
220 | | SVT_DEBUG("%4d ", boost); |
221 | | |
222 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
223 | | SVT_DEBUG("\n"); |
224 | | } |
225 | | #endif |
226 | | // don't clamp qindex on valid deltaq range yet |
227 | | // we'll do it after adjusting frame qp to maximize deltaq frame range |
228 | | // q_index 0 is lossless, and is currently not supported in SVT-AV1 |
229 | 0 | sb_ptr->qindex = CLIP3(1, MAXQ, sb_ptr->qindex - boost); |
230 | | |
231 | | // record last seen min and max qindexes for frame qp readjusting |
232 | 0 | min_qindex = AOMMIN(min_qindex, sb_ptr->qindex); |
233 | 0 | max_qindex = AOMMAX(max_qindex, sb_ptr->qindex); |
234 | 0 | } |
235 | | |
236 | | // normalize and clamp frame qindex value to maximize deltaq range |
237 | 0 | int range = max_qindex - min_qindex; |
238 | 0 | range = AOMMIN(range, VAR_BOOST_MAX_DELTAQ_RANGE); |
239 | 0 | int normalized_base_q_idx = (int)min_qindex + (range >> 1); |
240 | |
|
241 | | #if DEBUG_VAR_BOOST_QP |
242 | | SVT_INFO("previous qidx %d, min_qidx %d, max_qidx %d, delta_q_res %d, normalized qidx %d, range %d\n", |
243 | | ppcs->frm_hdr.quantization_params.base_q_idx, |
244 | | min_qindex, |
245 | | max_qindex, |
246 | | ppcs->frm_hdr.delta_q_params.delta_q_res, |
247 | | normalized_base_q_idx, |
248 | | range); |
249 | | #endif |
250 | | #if DEBUG_VAR_BOOST_STATS |
251 | | SVT_DEBUG( |
252 | | "Total CQP/CRF + VAQ qindex, frame %llu, temp. level %i\n", pcs->picture_number, pcs->temporal_layer_index); |
253 | | #endif |
254 | | |
255 | | // normalize sb qindex values |
256 | 0 | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
257 | 0 | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
258 | |
|
259 | 0 | int offset = (int)sb_ptr->qindex - normalized_base_q_idx; |
260 | 0 | offset = AOMMIN(offset, VAR_BOOST_MAX_DELTAQ_RANGE >> 1); |
261 | 0 | offset = AOMMAX(offset, -VAR_BOOST_MAX_DELTAQ_RANGE >> 1); |
262 | | |
263 | | // q_index 0 is lossless, and is currently not supported in SVT-AV1 |
264 | 0 | uint8_t normalized_qindex = CLIP3(1, MAXQ, normalized_base_q_idx + offset); |
265 | | #if DEBUG_VAR_BOOST_STATS |
266 | | SVT_DEBUG("%4d ", normalized_qindex); |
267 | | |
268 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
269 | | SVT_DEBUG("\n"); |
270 | | } |
271 | | #endif |
272 | |
|
273 | | #if DEBUG_VAR_BOOST_QP |
274 | | SVT_INFO(" sb %d qindex: previous %d, normalized %d\n", sb_addr, sb_ptr->qindex, normalized_qindex); |
275 | | #endif |
276 | 0 | sb_ptr->qindex = normalized_qindex; |
277 | 0 | } |
278 | 0 | } |
279 | | |
280 | 0 | #define BOOST_MAX 10 |
281 | | |
282 | 0 | void svt_aom_cyclic_refresh_setup(PictureParentControlSet* ppcs) { |
283 | 0 | CyclicRefresh* cr = &ppcs->cyclic_refresh; |
284 | |
|
285 | 0 | cr->me_distortion[0] = 0; |
286 | 0 | cr->me_distortion[1] = 0; |
287 | 0 | cr->me_distortion[2] = 0; |
288 | |
|
289 | 0 | cr->actual_num_seg1_sbs = 0; |
290 | 0 | cr->actual_num_seg2_sbs = 0; |
291 | 0 | uint64_t seg2_dist = 0; |
292 | 0 | uint64_t avg_me_dist = ppcs->norm_me_dist; |
293 | 0 | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
294 | 0 | if (b64_idx >= cr->sb_start && b64_idx < cr->sb_end) { |
295 | 0 | if (ppcs->me_8x8_distortion[b64_idx] < avg_me_dist) { |
296 | 0 | seg2_dist += ppcs->me_8x8_distortion[b64_idx]; |
297 | 0 | cr->me_distortion[2] += ppcs->me_64x64_distortion[b64_idx]; |
298 | 0 | cr->actual_num_seg2_sbs++; |
299 | 0 | } else { |
300 | 0 | cr->me_distortion[1] += ppcs->me_64x64_distortion[b64_idx]; |
301 | 0 | cr->actual_num_seg1_sbs++; |
302 | 0 | } |
303 | 0 | } else { |
304 | 0 | cr->me_distortion[0] += ppcs->me_64x64_distortion[b64_idx]; |
305 | 0 | } |
306 | 0 | } |
307 | |
|
308 | 0 | int actual_num_seg0_sbs = ppcs->b64_total_count - cr->actual_num_seg1_sbs - cr->actual_num_seg2_sbs; |
309 | 0 | cr->me_distortion[0] = actual_num_seg0_sbs ? cr->me_distortion[0] / actual_num_seg0_sbs : 0; |
310 | 0 | cr->me_distortion[1] = cr->actual_num_seg1_sbs ? cr->me_distortion[1] / cr->actual_num_seg1_sbs : 0; |
311 | 0 | cr->me_distortion[2] = cr->actual_num_seg2_sbs ? cr->me_distortion[2] / cr->actual_num_seg2_sbs : 0; |
312 | |
|
313 | 0 | int rate_boost_fac = cr->rate_boost_fac; |
314 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
315 | 0 | if (cr->actual_num_seg2_sbs) { |
316 | | #else |
317 | | if (!ppcs->sc_class1 && cr->actual_num_seg2_sbs) { |
318 | | #endif |
319 | 0 | seg2_dist = seg2_dist / cr->actual_num_seg2_sbs; |
320 | 0 | uint64_t dev = (avg_me_dist - seg2_dist) * 100 / avg_me_dist; |
321 | | // Quadratic Scaling; boost = BOOST_MAX * (dev/100)^2 |
322 | 0 | rate_boost_fac += (int)(BOOST_MAX * dev * dev / (100 * 100)); |
323 | 0 | } |
324 | 0 | cr->rate_ratio_qdelta_seg2 = 0.1 * rate_boost_fac * cr->rate_ratio_qdelta; |
325 | 0 | } |
326 | | |
327 | | /****************************************************** |
328 | | * cyclic_sb_qp_assignment |
329 | | * Assign the QP per SB based on the ME statistics |
330 | | * used in one pass encoding |
331 | | * only works for sb size = 64 |
332 | | ******************************************************/ |
333 | 0 | static void cyclic_sb_qp_assignment(PictureControlSet* pcs) { |
334 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
335 | 0 | CyclicRefresh* cr = &ppcs->cyclic_refresh; |
336 | |
|
337 | 0 | ppcs->frm_hdr.delta_q_params.delta_q_present = 1; |
338 | |
|
339 | 0 | int base_q_idx = ppcs->frm_hdr.quantization_params.base_q_idx; |
340 | 0 | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
341 | 0 | SuperBlock* sb = pcs->sb_ptr_array[b64_idx]; |
342 | 0 | int offset = 0; |
343 | 0 | if (b64_idx >= cr->sb_start && b64_idx < cr->sb_end) { |
344 | 0 | if (ppcs->me_8x8_distortion[b64_idx] < ppcs->norm_me_dist) { |
345 | 0 | offset = cr->qindex_delta[2]; |
346 | 0 | } else { |
347 | 0 | offset = cr->qindex_delta[1]; |
348 | 0 | } |
349 | 0 | } |
350 | 0 | sb->qindex = CLIP3(1, MAXQ, base_q_idx + offset); |
351 | 0 | } |
352 | 0 | } |
353 | | |
354 | | /* |
355 | | * Derives a qindex per 64x64 using ME distortions (to be used for lambda modulation only; not at Q/Q-1) |
356 | | */ |
357 | 474 | void svt_av1_generate_b64_me_qindex_map(PictureControlSet* pcs) { |
358 | 474 | static const int min_offset[MAX_TEMPORAL_LAYERS] = {-8, -8, -8, -8, -8, -8}; |
359 | 474 | static const int max_offset[MAX_TEMPORAL_LAYERS] = {8, 8, 8, 8, 8, 8}; |
360 | | |
361 | 474 | PictureParentControlSet* ppcs = pcs->ppcs; |
362 | | |
363 | 474 | int base_q_idx = ppcs->frm_hdr.quantization_params.base_q_idx; |
364 | 474 | int tl_index = ppcs->temporal_layer_index; |
365 | 474 | if (pcs->slice_type != I_SLICE && (min_offset[tl_index] != 0 || max_offset[tl_index] != 0)) { |
366 | 0 | int64_t avg_dist = 0; |
367 | 0 | int64_t min_dist = INT64_MAX; |
368 | 0 | int64_t max_dist = 0; |
369 | |
|
370 | 0 | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
371 | 0 | avg_dist += ppcs->me_8x8_cost_variance[b64_idx]; |
372 | 0 | min_dist = AOMMIN(ppcs->me_8x8_cost_variance[b64_idx], min_dist); |
373 | 0 | max_dist = AOMMAX(ppcs->me_8x8_cost_variance[b64_idx], max_dist); |
374 | 0 | } |
375 | 0 | avg_dist /= ppcs->b64_total_count; |
376 | |
|
377 | 0 | int min_q_idx = AOMMAX(1, base_q_idx - 9 * 4 + 1); |
378 | 0 | int max_q_idx = AOMMIN(MAXQ, base_q_idx + 9 * 4 - 1); |
379 | 0 | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
380 | 0 | int diff_dist = (int)(ppcs->me_8x8_cost_variance[b64_idx] - avg_dist); |
381 | 0 | int offset = 0; |
382 | 0 | if (diff_dist < 0) { |
383 | 0 | offset = min_offset[tl_index] * diff_dist / (min_dist - avg_dist); |
384 | 0 | } else if (diff_dist > 0) { |
385 | 0 | offset = max_offset[tl_index] * diff_dist / (max_dist - avg_dist); |
386 | 0 | } |
387 | 0 | pcs->b64_me_qindex[b64_idx] = CLIP3(min_q_idx, max_q_idx, base_q_idx + offset); |
388 | 0 | } |
389 | 474 | } else { |
390 | 6.69k | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
391 | 6.22k | pcs->b64_me_qindex[b64_idx] = base_q_idx; |
392 | 6.22k | } |
393 | 474 | } |
394 | 474 | } |
395 | | |
396 | 0 | static int svt_av1_get_deltaq_offset(EbBitDepth bit_depth, int qindex, double beta, bool is_intra) { |
397 | 0 | assert(beta > 0.0); |
398 | 0 | int q = svt_aom_dc_quant_qtx(qindex, 0, bit_depth); |
399 | 0 | int newq; |
400 | | // use a less aggressive action when lowering the q for non I_slice |
401 | 0 | if (!is_intra && beta > 1) { |
402 | 0 | newq = (int)rint(q / sqrt(sqrt(beta))); |
403 | 0 | } else { |
404 | 0 | newq = (int)rint(q / sqrt(beta)); |
405 | 0 | } |
406 | 0 | int orig_qindex = qindex; |
407 | 0 | if (newq == q) { |
408 | 0 | return 0; |
409 | 0 | } |
410 | 0 | if (newq < q) { |
411 | 0 | while (qindex > MINQ) { |
412 | 0 | qindex--; |
413 | 0 | q = svt_aom_dc_quant_qtx(qindex, 0, bit_depth); |
414 | 0 | if (newq >= q) { |
415 | 0 | break; |
416 | 0 | } |
417 | 0 | } |
418 | 0 | } else { |
419 | 0 | while (qindex < MAXQ) { |
420 | 0 | qindex++; |
421 | 0 | q = svt_aom_dc_quant_qtx(qindex, 0, bit_depth); |
422 | 0 | if (newq <= q) { |
423 | 0 | break; |
424 | 0 | } |
425 | 0 | } |
426 | 0 | } |
427 | 0 | return qindex - orig_qindex; |
428 | 0 | } |
429 | | |
430 | 0 | static void sb_setup_lambda(PictureControlSet* pcs, SuperBlock* sb_ptr) { |
431 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
432 | 0 | SequenceControlSet* scs = ppcs->scs; |
433 | |
|
434 | 0 | int mi_col = sb_ptr->org_x / 4; |
435 | 0 | int mi_row = sb_ptr->org_y / 4; |
436 | |
|
437 | 0 | int mi_col_sr = coded_to_superres_mi(mi_col, ppcs->superres_denom); |
438 | 0 | assert(ppcs->enhanced_unscaled_pic); |
439 | | // ALIGN_POWER_OF_TWO(pixels, 3) >> 2 ?? |
440 | 0 | int mi_cols_sr = ((ppcs->enhanced_unscaled_pic->width + 15) / 16) << 2; |
441 | 0 | int sb_mi_width_sr = coded_to_superres_mi(mi_size_wide[scs->seq_header.sb_size], ppcs->superres_denom); |
442 | 0 | int bsize_base = ppcs->tpl_ctrls.synth_blk_size == 32 ? BLOCK_32X32 : BLOCK_16X16; |
443 | 0 | int num_mi_w = mi_size_wide[bsize_base]; |
444 | 0 | int num_mi_h = mi_size_high[bsize_base]; |
445 | 0 | int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w; |
446 | 0 | int num_rows = (ppcs->av1_cm->mi_rows + num_mi_h - 1) / num_mi_h; |
447 | 0 | int num_bcols = (sb_mi_width_sr + num_mi_w - 1) / num_mi_w; |
448 | 0 | int num_brows = (mi_size_high[scs->seq_header.sb_size] + num_mi_h - 1) / num_mi_h; |
449 | |
|
450 | 0 | int row, col; |
451 | |
|
452 | 0 | int32_t base_block_count = 0; |
453 | 0 | double log_sum = 0.0; |
454 | |
|
455 | 0 | for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
456 | 0 | for (col = mi_col_sr / num_mi_h; col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) { |
457 | 0 | int index = row * num_cols + col; |
458 | 0 | log_sum += log(ppcs->pa_me_data->tpl_rdmult_scaling_factors[index]); |
459 | 0 | ++base_block_count; |
460 | 0 | } |
461 | 0 | } |
462 | 0 | assert(base_block_count > 0); |
463 | |
|
464 | 0 | EbBitDepth bit_depth = pcs->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
465 | 0 | double orig_rdmult = svt_aom_compute_rd_mult( |
466 | 0 | pcs, ppcs->frm_hdr.quantization_params.base_q_idx, ppcs->frm_hdr.quantization_params.base_q_idx, bit_depth); |
467 | 0 | double new_rdmult = svt_aom_compute_rd_mult( |
468 | 0 | pcs, sb_ptr->qindex, svt_aom_get_me_qindex(pcs, sb_ptr, scs->seq_header.sb_size == BLOCK_128X128), bit_depth); |
469 | 0 | double scaling_factor = new_rdmult / orig_rdmult; |
470 | | //double scale_adj = exp(log(scaling_factor) - log_sum / base_block_count); |
471 | 0 | double scale_adj = scaling_factor / exp(log_sum / base_block_count); |
472 | |
|
473 | 0 | for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
474 | 0 | for (col = mi_col_sr / num_mi_h; col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) { |
475 | 0 | int index = row * num_cols + col; |
476 | 0 | ppcs->pa_me_data->tpl_sb_rdmult_scaling_factors[index] = scale_adj * |
477 | 0 | ppcs->pa_me_data->tpl_rdmult_scaling_factors[index]; |
478 | 0 | } |
479 | 0 | } |
480 | 0 | ppcs->blk_lambda_tuning = true; |
481 | 0 | } |
482 | | |
483 | | /****************************************************** |
484 | | * svt_aom_sb_qp_derivation_tpl_la |
485 | | * Calculates the QP per SB based on the tpl statistics |
486 | | * used in one pass and second pass of two pass encoding |
487 | | ******************************************************/ |
488 | 0 | void svt_aom_sb_qp_derivation_tpl_la(PictureControlSet* pcs) { |
489 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
490 | 0 | SequenceControlSet* scs = ppcs->scs; |
491 | 0 | if (ppcs->r0_delta_qp_quant) { |
492 | 0 | ppcs->frm_hdr.delta_q_params.delta_q_present = 1; |
493 | 0 | } |
494 | | |
495 | | // super res pictures scaled with different sb count, should use sb_total_count for each picture |
496 | 0 | uint16_t sb_cnt = scs->sb_total_count; |
497 | 0 | if (ppcs->frame_superres_enabled || ppcs->frame_resize_enabled) { |
498 | 0 | sb_cnt = pcs->sb_total_count; |
499 | 0 | } |
500 | 0 | if (ppcs->r0_delta_qp_md && ppcs->tpl_is_valid == 1) { |
501 | | #if DEBUG_VAR_BOOST_STATS |
502 | | SVT_DEBUG("TPL qindex boost, frame %llu, temp. level %i\n", pcs->picture_number, pcs->temporal_layer_index); |
503 | | #endif |
504 | 0 | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
505 | 0 | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
506 | 0 | double beta = ppcs->pa_me_data->tpl_beta[sb_addr]; |
507 | 0 | int offset = svt_av1_get_deltaq_offset( |
508 | 0 | scs->static_config.encoder_bit_depth, sb_ptr->qindex, beta, ppcs->slice_type == I_SLICE); |
509 | 0 | offset = AOMMIN(offset, 9 * 4 - 1); |
510 | 0 | offset = AOMMAX(offset, -9 * 4 + 1); |
511 | |
|
512 | | #if DEBUG_VAR_BOOST_STATS |
513 | | SVT_DEBUG("%4d ", -offset); |
514 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
515 | | SVT_DEBUG("\n"); |
516 | | } |
517 | | #endif |
518 | | // read back SB qindex value, and add TPL boost on top |
519 | | // q_index 0 is lossless, and is currently not supported in SVT-AV1 |
520 | 0 | sb_ptr->qindex = CLIP3(1, MAXQ, (int16_t)sb_ptr->qindex + (int16_t)offset); |
521 | |
|
522 | 0 | sb_setup_lambda(pcs, sb_ptr); |
523 | 0 | } |
524 | 0 | } |
525 | 0 | } |
526 | | |
527 | | /****************************************************** |
528 | | * svt_av1_normalize_sb_delta_q |
529 | | * Adjusts superblock delta q to the most optimal res |
530 | | ******************************************************/ |
531 | 0 | void svt_av1_normalize_sb_delta_q(PictureControlSet* pcs) { |
532 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
533 | 0 | SequenceControlSet* scs = ppcs->scs; |
534 | 0 | uint8_t delta_q_res = ppcs->frm_hdr.delta_q_params.delta_q_res; |
535 | |
|
536 | 0 | assert(delta_q_res == 2 || delta_q_res == 4 || delta_q_res == 8); |
537 | |
|
538 | 0 | uint8_t mask = ~(delta_q_res - 1); |
539 | 0 | uint8_t delta_q_remainder = (ppcs->frm_hdr.quantization_params.base_q_idx) & ~mask; |
540 | | // Adjustment to push sb qindex toward the nearest multiple of delta_q_res, relative to base_q_idx |
541 | 0 | int8_t delta_q_adjustment = (delta_q_res - delta_q_remainder) - (delta_q_res / 2); |
542 | | |
543 | | // super res pictures scaled with different sb count, should use sb_total_count for each picture |
544 | 0 | uint16_t sb_cnt = scs->sb_total_count; |
545 | 0 | if (ppcs->frame_superres_enabled || ppcs->frame_resize_enabled) { |
546 | 0 | sb_cnt = ppcs->b64_total_count; |
547 | 0 | } |
548 | | #if DEBUG_VAR_BOOST_STATS |
549 | | SVT_LOG("Normalized delta q boost, frame %llu, temp. level %i, new delta_q_res %i\n", |
550 | | pcs->picture_number, |
551 | | pcs->temporal_layer_index, |
552 | | delta_q_res); |
553 | | #endif |
554 | 0 | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
555 | 0 | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
556 | | // Adjust sb_qindex to minimize the difference between its pre- and post-normalization value |
557 | 0 | uint8_t adjusted_q_index = CLIP3(1, MAXQ, sb_ptr->qindex + delta_q_adjustment); |
558 | 0 | uint8_t normalized_q_index = (adjusted_q_index & mask) + delta_q_remainder; |
559 | | |
560 | | // q_index 0 is lossless, so do not use it when encoding in lossy mode |
561 | 0 | sb_ptr->qindex = normalized_q_index == 0 ? delta_q_res : normalized_q_index; |
562 | | #if DEBUG_VAR_BOOST_STATS |
563 | | SVT_LOG("%4d ", sb_ptr->qindex); |
564 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
565 | | SVT_LOG("\n"); |
566 | | } |
567 | | #endif |
568 | 0 | } |
569 | 0 | } |
570 | | |
571 | | // Initialize SB qindex values and apply per-SB adjustments (variance boost, TPL, cyclic refresh). |
572 | 474 | void svt_av1_rc_init_sb_qindex(PictureControlSet* pcs, SequenceControlSet* scs) { |
573 | 474 | PictureParentControlSet* ppcs = pcs->ppcs; |
574 | 474 | FrameHeader* frm_hdr = &ppcs->frm_hdr; |
575 | | |
576 | 474 | frm_hdr->delta_q_params.delta_q_present = 0; |
577 | | |
578 | | // cyclic refresh is mutually exclusive with other AQ modes and overrides SB qindexes |
579 | | // as it is attempted always in CBR mode - make it consistent and not mix with other AQ |
580 | | // NOTE: with SB size 128 none of AQ will be used because of this |
581 | 474 | if (scs->enc_ctx->rc_cfg.mode == AOM_CBR) { |
582 | 0 | if (ppcs->cyclic_refresh.apply_cyclic_refresh) { |
583 | 0 | cyclic_sb_qp_assignment(pcs); |
584 | 0 | } else { |
585 | 0 | for (int sb_addr = 0; sb_addr < pcs->sb_total_count; ++sb_addr) { |
586 | 0 | pcs->sb_ptr_array[sb_addr]->qindex = frm_hdr->quantization_params.base_q_idx; |
587 | 0 | } |
588 | 0 | } |
589 | 474 | } else { |
590 | | // set initial SB base_q_idx values |
591 | 6.69k | for (int sb_addr = 0; sb_addr < pcs->sb_total_count; ++sb_addr) { |
592 | 6.22k | pcs->sb_ptr_array[sb_addr]->qindex = frm_hdr->quantization_params.base_q_idx; |
593 | 6.22k | } |
594 | | |
595 | | // adjust SB qindex based on variance |
596 | 474 | if (scs->static_config.enable_variance_boost) { |
597 | 0 | svt_av1_variance_adjust_qp(pcs); |
598 | 0 | } |
599 | | // QPM with tpl_la |
600 | 474 | if (scs->static_config.aq_mode == 2 && ppcs->tpl_ctrls.enable && ppcs->r0 != 0) { |
601 | 0 | svt_aom_sb_qp_derivation_tpl_la(pcs); |
602 | 0 | } |
603 | 474 | } |
604 | 474 | } |