/work/svt-av1/Source/Lib/Codec/rc_aq.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2026 Meta Platforms, Inc. and affiliates. |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include "pcs.h" |
13 | | #include "sequence_control_set.h" |
14 | | #include "inv_transforms.h" |
15 | | #include "me_context.h" |
16 | | #include "utility.h" |
17 | | |
18 | | #include "rc_process.h" |
19 | | #include "resize.h" |
20 | | |
21 | | // These functions use formulaic calculations to make playing with the |
22 | | // quantizer tables easier. If necessary they can be replaced by lookup |
23 | | // tables if and when things settle down in the experimental Bitstream |
24 | 0 | int32_t svt_av1_convert_qindex_to_q_fp8(int32_t qindex, EbBitDepth bit_depth) { |
25 | | // Convert the index to a real Q value (scaled down to match old Q values) |
26 | 0 | switch (bit_depth) { |
27 | 0 | case EB_EIGHT_BIT: |
28 | 0 | return svt_aom_ac_quant_qtx(qindex, 0, bit_depth) << 6; // / 4.0; |
29 | 0 | case EB_TEN_BIT: |
30 | 0 | return svt_aom_ac_quant_qtx(qindex, 0, bit_depth) << 4; // / 16.0; |
31 | 0 | case EB_TWELVE_BIT: |
32 | 0 | return svt_aom_ac_quant_qtx(qindex, 0, bit_depth) << 3; // / 64.0; |
33 | 0 | default: |
34 | 0 | assert(0 && "bit_depth should be EB_EIGHT_BIT, EB_TEN_BIT or EB_TWELVE_BIT"); |
35 | 0 | return -1; |
36 | 0 | } |
37 | 0 | } |
38 | | |
39 | 0 | int32_t svt_av1_compute_qdelta_fp(int32_t qstart_fp8, int32_t qtarget_fp8, EbBitDepth bit_depth) { |
40 | 0 | int32_t start_index = MAXQ; |
41 | 0 | int32_t target_index = MAXQ; |
42 | 0 | int32_t i; |
43 | | |
44 | | // Convert the average q value to an index. |
45 | 0 | for (i = MINQ; i < MAXQ; ++i) { |
46 | 0 | start_index = i; |
47 | 0 | if (svt_av1_convert_qindex_to_q_fp8(i, bit_depth) >= qstart_fp8) { |
48 | 0 | break; |
49 | 0 | } |
50 | 0 | } |
51 | | |
52 | | // Convert the q target to an index |
53 | 0 | for (i = MINQ; i < MAXQ; ++i) { |
54 | 0 | target_index = i; |
55 | 0 | if (svt_av1_convert_qindex_to_q_fp8(i, bit_depth) >= qtarget_fp8) { |
56 | 0 | break; |
57 | 0 | } |
58 | 0 | } |
59 | |
|
60 | 0 | return target_index - start_index; |
61 | 0 | } |
62 | | |
63 | 0 | int variance_comp_int(const void* a, const void* b) { |
64 | 0 | return (int)*(uint16_t*)a - *(uint16_t*)b; |
65 | 0 | } |
66 | | |
67 | | #define VAR_BOOST_MAX_DELTAQ_RANGE 80 |
68 | | #define VAR_BOOST_MAX_QSTEP_RATIO_BOOST 8 |
69 | | |
70 | 0 | #define SUPERBLOCK_SIZE 64 |
71 | 0 | #define SUBBLOCK_SIZE 8 |
72 | 0 | #define SUBBLOCKS_IN_SB_DIM (SUPERBLOCK_SIZE / SUBBLOCK_SIZE) |
73 | 0 | #define SUBBLOCKS_IN_SB (SUBBLOCKS_IN_SB_DIM * SUBBLOCKS_IN_SB_DIM) |
74 | 0 | #define SUBBLOCKS_IN_OCTILE (SUBBLOCKS_IN_SB / 8) |
75 | | |
76 | | static int av1_get_deltaq_sb_variance_boost(uint8_t base_q_idx, uint16_t* variances, uint8_t strength, |
77 | 0 | EbBitDepth bit_depth, uint8_t octile, uint8_t curve) { |
78 | | // boost q_index based on empirical visual testing, strength 2 |
79 | | // variance qstep_ratio boost (@ base_q_idx 255) |
80 | | // 256 1 |
81 | | // 64 1.481 |
82 | | // 16 2.192 |
83 | | // 4 3.246 |
84 | | // 1 4.806 |
85 | | |
86 | | // copy sb 8x8 variance values to an array for ordering |
87 | 0 | uint16_t ordered_variances[64]; |
88 | 0 | memcpy(&ordered_variances, variances + ME_TIER_ZERO_PU_8x8_0, sizeof(uint16_t) * 64); |
89 | 0 | qsort(&ordered_variances, 64, sizeof(uint16_t), variance_comp_int); |
90 | | |
91 | | // Sample three 8x8 variance values: at the specified octile, previous octile, |
92 | | // and next octile. Make sure we use the last subblock in each octile as the |
93 | | // representative of the octile. |
94 | 0 | assert(octile >= 1 && octile <= 8); |
95 | 0 | int mid_idx = octile * SUBBLOCKS_IN_OCTILE - 1; |
96 | 0 | int low_idx = AOMMAX(SUBBLOCKS_IN_OCTILE - 1, mid_idx - SUBBLOCKS_IN_OCTILE); |
97 | 0 | int upp_idx = AOMMIN(SUBBLOCKS_IN_SB - 1, mid_idx + SUBBLOCKS_IN_OCTILE); |
98 | | |
99 | | // Weigh the three variances in a 1:2:1 ratio, with rounding (the +2 term). |
100 | | // This allows for smoother delta-q transitions among superblocks with |
101 | | // mixed-variance features. |
102 | 0 | int variance = (ordered_variances[low_idx] + ordered_variances[mid_idx] * 2 + ordered_variances[upp_idx] + 2) / 4; |
103 | |
|
104 | | #if DEBUG_VAR_BOOST |
105 | | SVT_INFO("64x64 variance: %d\n", variances[ME_TIER_ZERO_PU_64x64]); |
106 | | SVT_INFO("8x8 min %d, 1st oct %d, median %d, max %d\n", |
107 | | ordered_variances[0], |
108 | | ordered_variances[7], |
109 | | ordered_variances[31], |
110 | | ordered_variances[63]); |
111 | | SVT_INFO("8x8 variances\n"); |
112 | | uint16_t* variances_row = variances + ME_TIER_ZERO_PU_8x8_0; |
113 | | |
114 | | for (int row = 0; row < 8; row++) { |
115 | | SVT_INFO("%5d %5d %5d %5d %5d %5d %5d %5d\n", |
116 | | variances_row[0], |
117 | | variances_row[1], |
118 | | variances_row[2], |
119 | | variances_row[3], |
120 | | variances_row[4], |
121 | | variances_row[5], |
122 | | variances_row[6], |
123 | | variances_row[7]); |
124 | | variances_row += 8; |
125 | | } |
126 | | #endif |
127 | | |
128 | | // variance = 0 areas are either completely flat patches or very fine gradients |
129 | | // SVT-AV1 doesn't have enough resolution to tell them apart, so let's assume they're not flat and boost them |
130 | 0 | if (variance == 0) { |
131 | 0 | variance = 1; |
132 | 0 | } |
133 | | |
134 | | // compute a boost based on a fast-growing formula |
135 | | // high and medium variance sbs essentially get no boost, while increasingly lower variance sbs get stronger boosts |
136 | 0 | assert(strength >= 1 && strength <= 4); |
137 | 0 | double qstep_ratio = 0; |
138 | 0 | static const double strengths[] = {0, 0.65, 1.1, 1.6, 2.5}; |
139 | |
|
140 | 0 | switch (curve) { |
141 | 0 | case 1: /* 1: low-medium contrast boosting curve */ |
142 | 0 | qstep_ratio = 0.25 * strength * (-log2((double)variance) + 8) + 1; |
143 | 0 | break; |
144 | 0 | case 2: /* 2: still picture curve, tuned for SSIMULACRA2 performance on CID22 */ |
145 | 0 | qstep_ratio = 0.15 * strength * (-log2((double)variance) + 10) + 1; |
146 | 0 | break; |
147 | 0 | default: /* 0: default q step ratio curve */ |
148 | 0 | qstep_ratio = pow(1.018, strengths[strength] * (-10 * log2((double)variance) + 80)); |
149 | 0 | break; |
150 | 0 | } |
151 | 0 | qstep_ratio = CLIP3(1, VAR_BOOST_MAX_QSTEP_RATIO_BOOST, qstep_ratio); |
152 | |
|
153 | 0 | int32_t base_q = svt_av1_convert_qindex_to_q_fp8(base_q_idx, bit_depth); |
154 | 0 | int32_t target_q = (int32_t)(base_q / qstep_ratio); |
155 | 0 | int32_t boost = 0; |
156 | |
|
157 | 0 | switch (curve) { |
158 | 0 | case 2: /* still picture boost, tuned for SSIMULACRA2 performance on CID22 */ |
159 | 0 | boost = (int32_t)((base_q_idx + 544) * -svt_av1_compute_qdelta_fp(base_q, target_q, bit_depth) / (255 + 1024)); |
160 | 0 | break; |
161 | 0 | default: /* curve 0 & 1 boost (default) */ |
162 | 0 | boost = (int32_t)((base_q_idx + 40) * -svt_av1_compute_qdelta_fp(base_q, target_q, bit_depth) / (255 + 40)); |
163 | 0 | break; |
164 | 0 | } |
165 | 0 | boost = AOMMIN(VAR_BOOST_MAX_DELTAQ_RANGE, boost); |
166 | |
|
167 | | #if DEBUG_VAR_BOOST |
168 | | SVT_INFO("Variance: %d, Strength: %d, Q-step ratio: %f, Boost: %d, Base q: %d, Target q: %d\n", |
169 | | variance, |
170 | | strength, |
171 | | qstep_ratio, |
172 | | boost, |
173 | | base_q, |
174 | | target_q); |
175 | | #endif |
176 | |
|
177 | 0 | return boost; |
178 | 0 | } |
179 | | |
180 | 0 | void svt_av1_variance_adjust_qp(PictureControlSet* pcs) { |
181 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
182 | 0 | SequenceControlSet* scs = ppcs->scs; |
183 | |
|
184 | 0 | ppcs->frm_hdr.delta_q_params.delta_q_present = 1; |
185 | | |
186 | | // super res pictures scaled with different sb count, should use sb_total_count for each picture |
187 | 0 | uint16_t sb_cnt = scs->sb_total_count; |
188 | 0 | if (ppcs->frame_superres_enabled || ppcs->frame_resize_enabled) { |
189 | 0 | sb_cnt = ppcs->b64_total_count; |
190 | 0 | } |
191 | |
|
192 | 0 | uint8_t min_qindex = MAXQ; |
193 | 0 | uint8_t max_qindex = MINQ; |
194 | |
|
195 | | #if DEBUG_VAR_BOOST_STATS |
196 | | SVT_DEBUG("TPL/CQP SB qindex, frame %llu, temp. level %i\n", pcs->picture_number, pcs->temporal_layer_index); |
197 | | |
198 | | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
199 | | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
200 | | |
201 | | SVT_DEBUG("%4d ", sb_ptr->qindex); |
202 | | |
203 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
204 | | SVT_DEBUG("\n"); |
205 | | } |
206 | | } |
207 | | SVT_DEBUG("VAQ qindex boost, frame %llu, temp. level %i\n", pcs->picture_number, pcs->temporal_layer_index); |
208 | | #endif |
209 | 0 | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
210 | 0 | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
211 | | |
212 | | // adjust deltaq based on sb variance, with lower variance resulting in a lower qindex |
213 | 0 | int boost = av1_get_deltaq_sb_variance_boost(ppcs->frm_hdr.quantization_params.base_q_idx, |
214 | 0 | ppcs->variance[sb_addr], |
215 | 0 | scs->static_config.variance_boost_strength, |
216 | 0 | scs->static_config.encoder_bit_depth, |
217 | 0 | scs->static_config.variance_octile, |
218 | 0 | scs->static_config.variance_boost_curve); |
219 | | #if DEBUG_VAR_BOOST_STATS |
220 | | SVT_DEBUG("%4d ", boost); |
221 | | |
222 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
223 | | SVT_DEBUG("\n"); |
224 | | } |
225 | | #endif |
226 | | // don't clamp qindex on valid deltaq range yet |
227 | | // we'll do it after adjusting frame qp to maximize deltaq frame range |
228 | | // q_index 0 is lossless, and is currently not supported in SVT-AV1 |
229 | 0 | sb_ptr->qindex = CLIP3(1, MAXQ, sb_ptr->qindex - boost); |
230 | | |
231 | | // record last seen min and max qindexes for frame qp readjusting |
232 | 0 | min_qindex = AOMMIN(min_qindex, sb_ptr->qindex); |
233 | 0 | max_qindex = AOMMAX(max_qindex, sb_ptr->qindex); |
234 | 0 | } |
235 | | |
236 | | // normalize and clamp frame qindex value to maximize deltaq range |
237 | 0 | int range = max_qindex - min_qindex; |
238 | 0 | range = AOMMIN(range, VAR_BOOST_MAX_DELTAQ_RANGE); |
239 | 0 | int normalized_base_q_idx = (int)min_qindex + (range >> 1); |
240 | |
|
241 | | #if DEBUG_VAR_BOOST_QP |
242 | | SVT_INFO("previous qidx %d, min_qidx %d, max_qidx %d, delta_q_res %d, normalized qidx %d, range %d\n", |
243 | | ppcs->frm_hdr.quantization_params.base_q_idx, |
244 | | min_qindex, |
245 | | max_qindex, |
246 | | ppcs->frm_hdr.delta_q_params.delta_q_res, |
247 | | normalized_base_q_idx, |
248 | | range); |
249 | | #endif |
250 | | #if DEBUG_VAR_BOOST_STATS |
251 | | SVT_DEBUG( |
252 | | "Total CQP/CRF + VAQ qindex, frame %llu, temp. level %i\n", pcs->picture_number, pcs->temporal_layer_index); |
253 | | #endif |
254 | | |
255 | | // normalize sb qindex values |
256 | 0 | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
257 | 0 | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
258 | |
|
259 | 0 | int offset = (int)sb_ptr->qindex - normalized_base_q_idx; |
260 | 0 | offset = AOMMIN(offset, VAR_BOOST_MAX_DELTAQ_RANGE >> 1); |
261 | 0 | offset = AOMMAX(offset, -VAR_BOOST_MAX_DELTAQ_RANGE >> 1); |
262 | | |
263 | | // q_index 0 is lossless, and is currently not supported in SVT-AV1 |
264 | 0 | uint8_t normalized_qindex = CLIP3(1, MAXQ, normalized_base_q_idx + offset); |
265 | | #if DEBUG_VAR_BOOST_STATS |
266 | | SVT_DEBUG("%4d ", normalized_qindex); |
267 | | |
268 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
269 | | SVT_DEBUG("\n"); |
270 | | } |
271 | | #endif |
272 | |
|
273 | | #if DEBUG_VAR_BOOST_QP |
274 | | SVT_INFO(" sb %d qindex: previous %d, normalized %d\n", sb_addr, sb_ptr->qindex, normalized_qindex); |
275 | | #endif |
276 | 0 | sb_ptr->qindex = normalized_qindex; |
277 | 0 | } |
278 | 0 | } |
279 | | |
280 | 0 | #define BOOST_MAX 10 |
281 | | |
282 | | #if FIX_CR_BAND_WRAPPING |
283 | | // Returns true if b64_idx is inside the cyclic-refresh band [sb_start, sb_end). |
284 | | // When sb_start > sb_end the band wraps around the frame: [sb_start, total) U [0, sb_end). |
285 | 0 | static INLINE int is_in_cr_band(uint32_t b64_idx, uint32_t sb_start, uint32_t sb_end) { |
286 | 0 | return (sb_start <= sb_end) ? (b64_idx >= sb_start && b64_idx < sb_end) : (b64_idx >= sb_start || b64_idx < sb_end); |
287 | 0 | } |
288 | | #endif |
289 | | |
290 | | #if OPT_CR_MOTION_GATE |
291 | | // Returns true if the SB is eligible for cyclic-refresh boost: |
292 | | // 8x8 ME distortion below `dist_reject_thresh` AND ME MV within ±1 full pel. |
293 | 0 | static INLINE int is_cr_motion_static(PictureParentControlSet* ppcs, uint32_t b64_idx, uint64_t dist_reject_thresh) { |
294 | 0 | Mv mv = ppcs->pa_me_data->me_results[b64_idx]->me_mv_array[0]; |
295 | 0 | return ppcs->me_8x8_distortion[b64_idx] < dist_reject_thresh && ABS(mv.x) <= 1 && ABS(mv.y) <= 1; |
296 | 0 | } |
297 | | #endif |
298 | | |
299 | 0 | void svt_aom_cyclic_refresh_setup(PictureParentControlSet* ppcs) { |
300 | 0 | CyclicRefresh* cr = &ppcs->cyclic_refresh; |
301 | |
|
302 | 0 | cr->me_distortion[0] = 0; |
303 | 0 | cr->me_distortion[1] = 0; |
304 | 0 | cr->me_distortion[2] = 0; |
305 | |
|
306 | 0 | cr->actual_num_seg1_sbs = 0; |
307 | 0 | cr->actual_num_seg2_sbs = 0; |
308 | 0 | uint64_t seg2_dist = 0; |
309 | 0 | uint64_t avg_me_dist = ppcs->norm_me_dist; |
310 | 0 | #if OPT_CR_MOTION_GATE |
311 | 0 | uint64_t dist_reject_thresh = avg_me_dist * 2 + 1; |
312 | 0 | #endif |
313 | 0 | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
314 | 0 | #if FIX_CR_BAND_WRAPPING || OPT_CR_MOTION_GATE |
315 | 0 | #if FIX_CR_BAND_WRAPPING |
316 | 0 | const int in_cr_range = is_in_cr_band(b64_idx, cr->sb_start, cr->sb_end); |
317 | | #else |
318 | | const int in_cr_range = (b64_idx >= cr->sb_start && b64_idx < cr->sb_end); |
319 | | #endif |
320 | 0 | #if OPT_CR_MOTION_GATE |
321 | 0 | if (in_cr_range && is_cr_motion_static(ppcs, b64_idx, dist_reject_thresh)) { |
322 | | #else |
323 | | if (in_cr_range) { |
324 | | #endif |
325 | | #else |
326 | | if (b64_idx >= cr->sb_start && b64_idx < cr->sb_end) { |
327 | | #endif |
328 | 0 | if (ppcs->me_8x8_distortion[b64_idx] < avg_me_dist) { |
329 | 0 | seg2_dist += ppcs->me_8x8_distortion[b64_idx]; |
330 | 0 | cr->me_distortion[2] += ppcs->me_64x64_distortion[b64_idx]; |
331 | 0 | cr->actual_num_seg2_sbs++; |
332 | 0 | } else { |
333 | 0 | cr->me_distortion[1] += ppcs->me_64x64_distortion[b64_idx]; |
334 | 0 | cr->actual_num_seg1_sbs++; |
335 | 0 | } |
336 | 0 | } else { |
337 | 0 | cr->me_distortion[0] += ppcs->me_64x64_distortion[b64_idx]; |
338 | 0 | } |
339 | 0 | } |
340 | |
|
341 | 0 | int actual_num_seg0_sbs = ppcs->b64_total_count - cr->actual_num_seg1_sbs - cr->actual_num_seg2_sbs; |
342 | 0 | cr->me_distortion[0] = actual_num_seg0_sbs ? cr->me_distortion[0] / actual_num_seg0_sbs : 0; |
343 | 0 | cr->me_distortion[1] = cr->actual_num_seg1_sbs ? cr->me_distortion[1] / cr->actual_num_seg1_sbs : 0; |
344 | 0 | cr->me_distortion[2] = cr->actual_num_seg2_sbs ? cr->me_distortion[2] / cr->actual_num_seg2_sbs : 0; |
345 | |
|
346 | 0 | #if OPT_CR_MOTION_GATE |
347 | | // If motion gate rejected ALL SBs in the refresh range, disable CR for this frame |
348 | | // to avoid delta_q_present signaling overhead with no actual delta-Q benefit. |
349 | 0 | if (cr->actual_num_seg1_sbs + cr->actual_num_seg2_sbs == 0) { |
350 | 0 | cr->apply_cyclic_refresh = 0; |
351 | 0 | return; |
352 | 0 | } |
353 | 0 | #endif |
354 | 0 | int rate_boost_fac = cr->rate_boost_fac; |
355 | 0 | #if TUNE_SIMPLIFY_SETTINGS |
356 | 0 | if (cr->actual_num_seg2_sbs) { |
357 | | #else |
358 | | if (!ppcs->sc_class1 && cr->actual_num_seg2_sbs) { |
359 | | #endif |
360 | 0 | seg2_dist = seg2_dist / cr->actual_num_seg2_sbs; |
361 | 0 | uint64_t dev = (avg_me_dist - seg2_dist) * 100 / avg_me_dist; |
362 | | // Quadratic Scaling; boost = BOOST_MAX * (dev/100)^2 |
363 | 0 | rate_boost_fac += (int)(BOOST_MAX * dev * dev / (100 * 100)); |
364 | 0 | } |
365 | 0 | cr->rate_ratio_qdelta_seg2 = 0.1 * rate_boost_fac * cr->rate_ratio_qdelta; |
366 | 0 | } |
367 | | |
368 | | /****************************************************** |
369 | | * cyclic_sb_qp_assignment |
370 | | * Assign the QP per SB based on the ME statistics |
371 | | * used in one pass encoding |
372 | | * only works for sb size = 64 |
373 | | ******************************************************/ |
374 | 0 | static void cyclic_sb_qp_assignment(PictureControlSet* pcs) { |
375 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
376 | 0 | CyclicRefresh* cr = &ppcs->cyclic_refresh; |
377 | |
|
378 | 0 | ppcs->frm_hdr.delta_q_params.delta_q_present = 1; |
379 | |
|
380 | 0 | int base_q_idx = ppcs->frm_hdr.quantization_params.base_q_idx; |
381 | 0 | #if OPT_CR_MOTION_GATE |
382 | | // High-motion gate: don't boost SBs with distortion far above average |
383 | | // or with large MV (boosting them wastes bits that motion destroys next frame). |
384 | 0 | uint64_t dist_reject_thresh = ppcs->norm_me_dist * 2 + 1; |
385 | 0 | #endif |
386 | 0 | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
387 | 0 | SuperBlock* sb = pcs->sb_ptr_array[b64_idx]; |
388 | 0 | int offset = 0; |
389 | 0 | #if FIX_CR_BAND_WRAPPING |
390 | 0 | if (is_in_cr_band(b64_idx, cr->sb_start, cr->sb_end)) { |
391 | | #else |
392 | | if (b64_idx >= cr->sb_start && b64_idx < cr->sb_end) { |
393 | | #endif |
394 | 0 | #if OPT_CR_MOTION_GATE |
395 | 0 | if (!is_cr_motion_static(ppcs, b64_idx, dist_reject_thresh)) { |
396 | | // Non-static SB (any non-zero MV or high distortion): no boost |
397 | 0 | offset = 0; |
398 | 0 | } else if (ppcs->me_8x8_distortion[b64_idx] < ppcs->norm_me_dist) { |
399 | | #else |
400 | | if (ppcs->me_8x8_distortion[b64_idx] < ppcs->norm_me_dist) { |
401 | | #endif |
402 | 0 | offset = cr->qindex_delta[2]; |
403 | 0 | } else { |
404 | 0 | offset = cr->qindex_delta[1]; |
405 | 0 | } |
406 | 0 | } |
407 | 0 | sb->qindex = CLIP3(1, MAXQ, base_q_idx + offset); |
408 | 0 | } |
409 | 0 | } |
410 | | |
411 | | /* |
412 | | * Derives a qindex per 64x64 using ME distortions (to be used for lambda modulation only; not at Q/Q-1) |
413 | | */ |
414 | 431 | void svt_av1_generate_b64_me_qindex_map(PictureControlSet* pcs) { |
415 | 431 | static const int min_offset[MAX_TEMPORAL_LAYERS] = {-8, -8, -8, -8, -8, -8}; |
416 | 431 | static const int max_offset[MAX_TEMPORAL_LAYERS] = {8, 8, 8, 8, 8, 8}; |
417 | | |
418 | 431 | PictureParentControlSet* ppcs = pcs->ppcs; |
419 | | |
420 | 431 | int base_q_idx = ppcs->frm_hdr.quantization_params.base_q_idx; |
421 | 431 | int tl_index = ppcs->temporal_layer_index; |
422 | 431 | if (pcs->slice_type != I_SLICE && (min_offset[tl_index] != 0 || max_offset[tl_index] != 0)) { |
423 | 0 | int64_t avg_dist = 0; |
424 | 0 | int64_t min_dist = INT64_MAX; |
425 | 0 | int64_t max_dist = 0; |
426 | |
|
427 | 0 | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
428 | 0 | avg_dist += ppcs->me_8x8_cost_variance[b64_idx]; |
429 | 0 | min_dist = AOMMIN(ppcs->me_8x8_cost_variance[b64_idx], min_dist); |
430 | 0 | max_dist = AOMMAX(ppcs->me_8x8_cost_variance[b64_idx], max_dist); |
431 | 0 | } |
432 | 0 | avg_dist /= ppcs->b64_total_count; |
433 | |
|
434 | 0 | int min_q_idx = AOMMAX(1, base_q_idx - 9 * 4 + 1); |
435 | 0 | int max_q_idx = AOMMIN(MAXQ, base_q_idx + 9 * 4 - 1); |
436 | 0 | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
437 | 0 | int diff_dist = (int)(ppcs->me_8x8_cost_variance[b64_idx] - avg_dist); |
438 | 0 | int offset = 0; |
439 | 0 | if (diff_dist < 0) { |
440 | 0 | offset = min_offset[tl_index] * diff_dist / (min_dist - avg_dist); |
441 | 0 | } else if (diff_dist > 0) { |
442 | 0 | offset = max_offset[tl_index] * diff_dist / (max_dist - avg_dist); |
443 | 0 | } |
444 | 0 | pcs->b64_me_qindex[b64_idx] = CLIP3(min_q_idx, max_q_idx, base_q_idx + offset); |
445 | 0 | } |
446 | 431 | } else { |
447 | 6.16k | for (uint32_t b64_idx = 0; b64_idx < ppcs->b64_total_count; ++b64_idx) { |
448 | 5.73k | pcs->b64_me_qindex[b64_idx] = base_q_idx; |
449 | 5.73k | } |
450 | 431 | } |
451 | 431 | } |
452 | | |
453 | 0 | static int svt_av1_get_deltaq_offset(EbBitDepth bit_depth, int qindex, double beta, bool is_intra) { |
454 | 0 | assert(beta > 0.0); |
455 | 0 | int q = svt_aom_dc_quant_qtx(qindex, 0, bit_depth); |
456 | 0 | int newq; |
457 | | // use a less aggressive action when lowering the q for non I_slice |
458 | 0 | if (!is_intra && beta > 1) { |
459 | 0 | newq = (int)rint(q / sqrt(sqrt(beta))); |
460 | 0 | } else { |
461 | 0 | newq = (int)rint(q / sqrt(beta)); |
462 | 0 | } |
463 | 0 | int orig_qindex = qindex; |
464 | 0 | if (newq == q) { |
465 | 0 | return 0; |
466 | 0 | } |
467 | 0 | if (newq < q) { |
468 | 0 | while (qindex > MINQ) { |
469 | 0 | qindex--; |
470 | 0 | q = svt_aom_dc_quant_qtx(qindex, 0, bit_depth); |
471 | 0 | if (newq >= q) { |
472 | 0 | break; |
473 | 0 | } |
474 | 0 | } |
475 | 0 | } else { |
476 | 0 | while (qindex < MAXQ) { |
477 | 0 | qindex++; |
478 | 0 | q = svt_aom_dc_quant_qtx(qindex, 0, bit_depth); |
479 | 0 | if (newq <= q) { |
480 | 0 | break; |
481 | 0 | } |
482 | 0 | } |
483 | 0 | } |
484 | 0 | return qindex - orig_qindex; |
485 | 0 | } |
486 | | |
487 | 0 | static void sb_setup_lambda(PictureControlSet* pcs, SuperBlock* sb_ptr) { |
488 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
489 | 0 | SequenceControlSet* scs = ppcs->scs; |
490 | |
|
491 | 0 | int mi_col = sb_ptr->org_x / 4; |
492 | 0 | int mi_row = sb_ptr->org_y / 4; |
493 | |
|
494 | 0 | int mi_col_sr = coded_to_superres_mi(mi_col, ppcs->superres_denom); |
495 | 0 | assert(ppcs->enhanced_unscaled_pic); |
496 | | // ALIGN_POWER_OF_TWO(pixels, 3) >> 2 ?? |
497 | 0 | int mi_cols_sr = ((ppcs->enhanced_unscaled_pic->width + 15) / 16) << 2; |
498 | 0 | int sb_mi_width_sr = coded_to_superres_mi(mi_size_wide[scs->seq_header.sb_size], ppcs->superres_denom); |
499 | 0 | int bsize_base = ppcs->tpl_ctrls.synth_blk_size == 32 ? BLOCK_32X32 : BLOCK_16X16; |
500 | 0 | int num_mi_w = mi_size_wide[bsize_base]; |
501 | 0 | int num_mi_h = mi_size_high[bsize_base]; |
502 | 0 | int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w; |
503 | 0 | int num_rows = (ppcs->av1_cm->mi_rows + num_mi_h - 1) / num_mi_h; |
504 | 0 | int num_bcols = (sb_mi_width_sr + num_mi_w - 1) / num_mi_w; |
505 | 0 | int num_brows = (mi_size_high[scs->seq_header.sb_size] + num_mi_h - 1) / num_mi_h; |
506 | |
|
507 | 0 | int row, col; |
508 | |
|
509 | 0 | int32_t base_block_count = 0; |
510 | 0 | double log_sum = 0.0; |
511 | |
|
512 | 0 | for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
513 | 0 | for (col = mi_col_sr / num_mi_h; col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) { |
514 | 0 | int index = row * num_cols + col; |
515 | 0 | log_sum += log(ppcs->pa_me_data->tpl_rdmult_scaling_factors[index]); |
516 | 0 | ++base_block_count; |
517 | 0 | } |
518 | 0 | } |
519 | 0 | assert(base_block_count > 0); |
520 | |
|
521 | 0 | EbBitDepth bit_depth = pcs->hbd_md ? EB_TEN_BIT : EB_EIGHT_BIT; |
522 | 0 | double orig_rdmult = svt_aom_compute_rd_mult( |
523 | 0 | pcs, ppcs->frm_hdr.quantization_params.base_q_idx, ppcs->frm_hdr.quantization_params.base_q_idx, bit_depth); |
524 | 0 | double new_rdmult = svt_aom_compute_rd_mult( |
525 | 0 | pcs, sb_ptr->qindex, svt_aom_get_me_qindex(pcs, sb_ptr, scs->seq_header.sb_size == BLOCK_128X128), bit_depth); |
526 | 0 | double scaling_factor = new_rdmult / orig_rdmult; |
527 | | //double scale_adj = exp(log(scaling_factor) - log_sum / base_block_count); |
528 | 0 | double scale_adj = scaling_factor / exp(log_sum / base_block_count); |
529 | |
|
530 | 0 | for (row = mi_row / num_mi_w; row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) { |
531 | 0 | for (col = mi_col_sr / num_mi_h; col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) { |
532 | 0 | int index = row * num_cols + col; |
533 | 0 | ppcs->pa_me_data->tpl_sb_rdmult_scaling_factors[index] = scale_adj * |
534 | 0 | ppcs->pa_me_data->tpl_rdmult_scaling_factors[index]; |
535 | 0 | } |
536 | 0 | } |
537 | 0 | ppcs->blk_lambda_tuning = true; |
538 | 0 | } |
539 | | |
540 | | /****************************************************** |
541 | | * svt_aom_sb_qp_derivation_tpl_la |
542 | | * Calculates the QP per SB based on the tpl statistics |
543 | | * used in one pass and second pass of two pass encoding |
544 | | ******************************************************/ |
545 | 0 | void svt_aom_sb_qp_derivation_tpl_la(PictureControlSet* pcs) { |
546 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
547 | 0 | SequenceControlSet* scs = ppcs->scs; |
548 | 0 | if (ppcs->r0_delta_qp_quant) { |
549 | 0 | ppcs->frm_hdr.delta_q_params.delta_q_present = 1; |
550 | 0 | } |
551 | | |
552 | | // super res pictures scaled with different sb count, should use sb_total_count for each picture |
553 | 0 | uint16_t sb_cnt = scs->sb_total_count; |
554 | 0 | if (ppcs->frame_superres_enabled || ppcs->frame_resize_enabled) { |
555 | 0 | sb_cnt = pcs->sb_total_count; |
556 | 0 | } |
557 | 0 | if (ppcs->r0_delta_qp_md && ppcs->tpl_is_valid == 1) { |
558 | | #if DEBUG_VAR_BOOST_STATS |
559 | | SVT_DEBUG("TPL qindex boost, frame %llu, temp. level %i\n", pcs->picture_number, pcs->temporal_layer_index); |
560 | | #endif |
561 | 0 | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
562 | 0 | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
563 | 0 | double beta = ppcs->pa_me_data->tpl_beta[sb_addr]; |
564 | 0 | int offset = svt_av1_get_deltaq_offset( |
565 | 0 | scs->static_config.encoder_bit_depth, sb_ptr->qindex, beta, ppcs->slice_type == I_SLICE); |
566 | 0 | offset = AOMMIN(offset, 9 * 4 - 1); |
567 | 0 | offset = AOMMAX(offset, -9 * 4 + 1); |
568 | |
|
569 | | #if DEBUG_VAR_BOOST_STATS |
570 | | SVT_DEBUG("%4d ", -offset); |
571 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
572 | | SVT_DEBUG("\n"); |
573 | | } |
574 | | #endif |
575 | | // read back SB qindex value, and add TPL boost on top |
576 | | // q_index 0 is lossless, and is currently not supported in SVT-AV1 |
577 | 0 | sb_ptr->qindex = CLIP3(1, MAXQ, (int16_t)sb_ptr->qindex + (int16_t)offset); |
578 | |
|
579 | 0 | sb_setup_lambda(pcs, sb_ptr); |
580 | 0 | } |
581 | 0 | } |
582 | 0 | } |
583 | | |
584 | | /****************************************************** |
585 | | * svt_av1_normalize_sb_delta_q |
586 | | * Adjusts superblock delta q to the most optimal res |
587 | | ******************************************************/ |
588 | 0 | void svt_av1_normalize_sb_delta_q(PictureControlSet* pcs) { |
589 | 0 | PictureParentControlSet* ppcs = pcs->ppcs; |
590 | 0 | SequenceControlSet* scs = ppcs->scs; |
591 | 0 | uint8_t delta_q_res = ppcs->frm_hdr.delta_q_params.delta_q_res; |
592 | |
|
593 | 0 | assert(delta_q_res == 2 || delta_q_res == 4 || delta_q_res == 8); |
594 | |
|
595 | 0 | uint8_t mask = ~(delta_q_res - 1); |
596 | 0 | uint8_t delta_q_remainder = (ppcs->frm_hdr.quantization_params.base_q_idx) & ~mask; |
597 | | // Adjustment to push sb qindex toward the nearest multiple of delta_q_res, relative to base_q_idx |
598 | 0 | int8_t delta_q_adjustment = (delta_q_res - delta_q_remainder) - (delta_q_res / 2); |
599 | | |
600 | | // super res pictures scaled with different sb count, should use sb_total_count for each picture |
601 | 0 | uint16_t sb_cnt = scs->sb_total_count; |
602 | 0 | if (ppcs->frame_superres_enabled || ppcs->frame_resize_enabled) { |
603 | 0 | sb_cnt = ppcs->b64_total_count; |
604 | 0 | } |
605 | | #if DEBUG_VAR_BOOST_STATS |
606 | | SVT_LOG("Normalized delta q boost, frame %llu, temp. level %i, new delta_q_res %i\n", |
607 | | pcs->picture_number, |
608 | | pcs->temporal_layer_index, |
609 | | delta_q_res); |
610 | | #endif |
611 | 0 | for (uint32_t sb_addr = 0; sb_addr < sb_cnt; ++sb_addr) { |
612 | 0 | SuperBlock* sb_ptr = pcs->sb_ptr_array[sb_addr]; |
613 | | // Adjust sb_qindex to minimize the difference between its pre- and post-normalization value |
614 | 0 | uint8_t adjusted_q_index = CLIP3(1, MAXQ, sb_ptr->qindex + delta_q_adjustment); |
615 | 0 | uint8_t normalized_q_index = (adjusted_q_index & mask) + delta_q_remainder; |
616 | | |
617 | | // q_index 0 is lossless, so do not use it when encoding in lossy mode |
618 | 0 | sb_ptr->qindex = normalized_q_index == 0 ? delta_q_res : normalized_q_index; |
619 | | #if DEBUG_VAR_BOOST_STATS |
620 | | SVT_LOG("%4d ", sb_ptr->qindex); |
621 | | if (pcs->frame_width <= (sb_ptr->org_x + 64)) { |
622 | | SVT_LOG("\n"); |
623 | | } |
624 | | #endif |
625 | 0 | } |
626 | 0 | } |
627 | | |
628 | | // Initialize SB qindex values and apply per-SB adjustments (variance boost, TPL, cyclic refresh). |
629 | 431 | void svt_av1_rc_init_sb_qindex(PictureControlSet* pcs, SequenceControlSet* scs) { |
630 | 431 | PictureParentControlSet* ppcs = pcs->ppcs; |
631 | 431 | FrameHeader* frm_hdr = &ppcs->frm_hdr; |
632 | | |
633 | 431 | frm_hdr->delta_q_params.delta_q_present = 0; |
634 | | |
635 | | // cyclic refresh is mutually exclusive with other AQ modes and overrides SB qindexes |
636 | | // as it is attempted always in CBR mode - make it consistent and not mix with other AQ |
637 | | // NOTE: with SB size 128 none of AQ will be used because of this |
638 | 431 | if (scs->enc_ctx->rc_cfg.mode == AOM_CBR) { |
639 | 0 | if (ppcs->cyclic_refresh.apply_cyclic_refresh) { |
640 | 0 | cyclic_sb_qp_assignment(pcs); |
641 | 0 | } else { |
642 | 0 | for (int sb_addr = 0; sb_addr < pcs->sb_total_count; ++sb_addr) { |
643 | 0 | pcs->sb_ptr_array[sb_addr]->qindex = frm_hdr->quantization_params.base_q_idx; |
644 | 0 | } |
645 | 0 | } |
646 | 431 | } else { |
647 | | // set initial SB base_q_idx values |
648 | 6.16k | for (int sb_addr = 0; sb_addr < pcs->sb_total_count; ++sb_addr) { |
649 | 5.73k | pcs->sb_ptr_array[sb_addr]->qindex = frm_hdr->quantization_params.base_q_idx; |
650 | 5.73k | } |
651 | | |
652 | | // adjust SB qindex based on variance |
653 | 431 | if (scs->static_config.enable_variance_boost) { |
654 | 0 | svt_av1_variance_adjust_qp(pcs); |
655 | 0 | } |
656 | | // QPM with tpl_la |
657 | 431 | if (scs->static_config.aq_mode == 2 && ppcs->tpl_ctrls.enable && ppcs->r0 != 0) { |
658 | 0 | svt_aom_sb_qp_derivation_tpl_la(pcs); |
659 | 0 | } |
660 | 431 | } |
661 | 431 | } |