/work/svt-av1/Source/Lib/Codec/ac_bias.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright(c) 2024-2025 Psychovisual Experts Group |
3 | | * |
4 | | * This source code is subject to the terms of the BSD 2 Clause License and |
5 | | * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License |
6 | | * was not distributed with this source code in the LICENSE file, you can |
7 | | * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open |
8 | | * Media Patent License 1.0 was not distributed with this source code in the |
9 | | * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. |
10 | | */ |
11 | | |
12 | | #include <math.h> |
13 | | #include <stdbool.h> |
14 | | #include "ac_bias.h" |
15 | | #include "aom_dsp_rtcd.h" |
16 | | |
17 | | /* Regular version of "AC Bias" |
18 | | * |
19 | | * Based on adding an "energy gap" term to each candidate block's distortion, which is the difference |
20 | | * of the "energy" (SATD - SAD) of the source and recon blocks |
21 | | */ |
22 | | uint64_t svt_psy_distortion(const uint8_t* input, const uint32_t input_stride, const uint8_t* recon, |
23 | 0 | const uint32_t recon_stride, const uint32_t width, const uint32_t height) { |
24 | 0 | uint64_t energy_gap = 0; |
25 | |
|
26 | 0 | if (width >= 8 && height >= 8) { /* >8x8 */ |
27 | 0 | for (uint32_t j = 0; j < height; j += 8) { |
28 | 0 | for (uint32_t i = 0; i < width; i += 8) { |
29 | 0 | int32_t coeffs[64]; |
30 | 0 | int16_t block_as_16bit[64]; |
31 | 0 | const uint8_t* block_input = input + j * input_stride + i; |
32 | 0 | const uint8_t* recon_input = recon + j * recon_stride + i; |
33 | |
|
34 | 0 | for (int h = 0; h < 8; h++) { |
35 | 0 | for (int w = 0; w < 8; w++) { |
36 | 0 | block_as_16bit[h * 8 + w] = block_input[w]; |
37 | 0 | } |
38 | |
|
39 | 0 | block_input += input_stride; |
40 | 0 | } |
41 | |
|
42 | 0 | svt_aom_hadamard_8x8(block_as_16bit, 8, coeffs); |
43 | |
|
44 | 0 | int32_t input_energy = ((svt_aom_satd(coeffs, 64) + 2) >> 2) - ((coeffs[0] + 2) >> 2); |
45 | |
|
46 | 0 | for (int h = 0; h < 8; h++) { |
47 | 0 | for (int w = 0; w < 8; w++) { |
48 | 0 | block_as_16bit[h * 8 + w] = recon_input[w]; |
49 | 0 | } |
50 | |
|
51 | 0 | recon_input += recon_stride; |
52 | 0 | } |
53 | |
|
54 | 0 | svt_aom_hadamard_8x8(block_as_16bit, 8, coeffs); |
55 | |
|
56 | 0 | int32_t recon_energy = ((svt_aom_satd(coeffs, 64) + 2) >> 2) - ((coeffs[0] + 2) >> 2); |
57 | |
|
58 | 0 | energy_gap += abs(input_energy - recon_energy); |
59 | 0 | } |
60 | 0 | } |
61 | 0 | } else { |
62 | 0 | for (uint32_t j = 0; j < height; j += 4) { /* 4x4, 4x8, 4x16, 8x4, and 16x4 */ |
63 | 0 | for (uint32_t i = 0; i < width; i += 4) { |
64 | 0 | int32_t coeffs[16]; |
65 | 0 | int16_t block_as_16bit[16]; |
66 | 0 | const uint8_t* block_input = input + j * input_stride + i; |
67 | 0 | const uint8_t* recon_input = recon + j * recon_stride + i; |
68 | |
|
69 | 0 | for (int h = 0; h < 4; h++) { |
70 | 0 | for (int w = 0; w < 4; w++) { |
71 | 0 | block_as_16bit[h * 4 + w] = block_input[w]; |
72 | 0 | } |
73 | |
|
74 | 0 | block_input += input_stride; |
75 | 0 | } |
76 | |
|
77 | 0 | svt_aom_hadamard_4x4(block_as_16bit, 4, coeffs); |
78 | |
|
79 | 0 | int32_t input_energy = (svt_aom_satd(coeffs, 16) << 1) - coeffs[0]; |
80 | |
|
81 | 0 | for (int h = 0; h < 4; h++) { |
82 | 0 | for (int w = 0; w < 4; w++) { |
83 | 0 | block_as_16bit[h * 4 + w] = recon_input[w]; |
84 | 0 | } |
85 | |
|
86 | 0 | recon_input += recon_stride; |
87 | 0 | } |
88 | |
|
89 | 0 | svt_aom_hadamard_4x4(block_as_16bit, 4, coeffs); |
90 | |
|
91 | 0 | int32_t recon_energy = (svt_aom_satd(coeffs, 16) << 1) - coeffs[0]; |
92 | |
|
93 | 0 | energy_gap += abs(input_energy - recon_energy); |
94 | 0 | } |
95 | 0 | } |
96 | 0 | } |
97 | |
|
98 | 0 | return energy_gap; |
99 | 0 | } |
100 | | |
101 | | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
102 | | /* High bit-depth version of "AC Bias" */ |
103 | | uint64_t svt_psy_distortion_hbd(const uint16_t* input, const uint32_t input_stride, const uint16_t* recon, |
104 | 0 | const uint32_t recon_stride, const uint32_t width, const uint32_t height) { |
105 | 0 | uint64_t energy_gap = 0; |
106 | |
|
107 | 0 | if (width >= 8 && height >= 8) { /* >8x8 */ |
108 | 0 | for (uint32_t j = 0; j < height; j += 8) { |
109 | 0 | for (uint32_t i = 0; i < width; i += 8) { |
110 | 0 | int32_t coeffs[64]; |
111 | |
|
112 | 0 | svt_aom_highbd_hadamard_8x8((int16_t*)input + j * input_stride + i, input_stride, coeffs); |
113 | |
|
114 | 0 | int32_t input_energy = ((svt_aom_satd(coeffs, 64) + 2) >> 2) - ((coeffs[0] + 2) >> 2); |
115 | |
|
116 | 0 | svt_aom_highbd_hadamard_8x8((int16_t*)recon + j * recon_stride + i, recon_stride, coeffs); |
117 | |
|
118 | 0 | int32_t recon_energy = ((svt_aom_satd(coeffs, 64) + 2) >> 2) - ((coeffs[0] + 2) >> 2); |
119 | |
|
120 | 0 | energy_gap += abs(input_energy - recon_energy); |
121 | 0 | } |
122 | 0 | } |
123 | 0 | } else { |
124 | 0 | for (uint64_t j = 0; j < height; j += 4) { /* 4x4, 4x8, 4x16, 8x4, and 16x4 */ |
125 | 0 | for (uint64_t i = 0; i < width; i += 4) { |
126 | 0 | int32_t coeffs[16]; |
127 | | |
128 | | // HBD coefficients can fit in 16 bits, so the regular Hadamard 4x4 function can be used here safely |
129 | 0 | svt_aom_hadamard_4x4((int16_t*)input + j * input_stride + i, input_stride, coeffs); |
130 | |
|
131 | 0 | int32_t input_energy = (svt_aom_satd(coeffs, 16) << 1) - coeffs[0]; |
132 | |
|
133 | 0 | svt_aom_hadamard_4x4((int16_t*)recon + j * recon_stride + i, recon_stride, coeffs); |
134 | |
|
135 | 0 | int32_t recon_energy = (svt_aom_satd(coeffs, 16) << 1) - coeffs[0]; |
136 | |
|
137 | 0 | energy_gap += abs(input_energy - recon_energy); |
138 | 0 | } |
139 | 0 | } |
140 | 0 | } |
141 | | |
142 | | // Energy is scaled to approximately match equivalent 8-bit strengths |
143 | 0 | return energy_gap << 2; |
144 | 0 | } |
145 | | #endif |
146 | | |
147 | | /* |
148 | | * Public function that mirrors the arguments of `spatial_full_dist_type_fun()` |
149 | | */ |
150 | | uint64_t get_svt_psy_full_dist(const void* s, const uint32_t so, const uint32_t sp, const void* r, const uint32_t ro, |
151 | | const uint32_t rp, const uint32_t w, const uint32_t h, const uint8_t is_hbd, |
152 | 0 | const double ac_bias) { |
153 | 0 | if (is_hbd) |
154 | 0 | #if CONFIG_ENABLE_HIGH_BIT_DEPTH |
155 | 0 | return llrint(svt_psy_distortion_hbd((const uint16_t*)s + so, sp, (uint16_t*)r + ro, rp, w, h) * ac_bias); |
156 | | #else |
157 | | return 0; |
158 | | #endif |
159 | 0 | else { |
160 | 0 | return llrint(svt_psy_distortion((const uint8_t*)s + so, sp, (const uint8_t*)r + ro, rp, w, h) * ac_bias); |
161 | 0 | } |
162 | 0 | } |
163 | | |
164 | | /* |
165 | | * Light version of "AC Bias", called by the Light-PD code paths |
166 | | * |
167 | | * Based on adjusting each block's rate so blocks with more energy (sum of AC coeffs) appear "cheaper" to the encoder, |
168 | | * thus making them more favorable to be picked by the RDO process. This tends to increase the image's total "energy" |
169 | | * (in contrast to `get_svt_psy_full_dist()` which tries to reduce the "energy gap" between source and recon) |
170 | | * |
171 | | * Much faster than `get_svt_psy_full_dist()` as it can re-use existing block coefficients instead of computing new |
172 | | * ones, but subjective visual quality benefits are significantly more modest |
173 | | */ |
174 | | uint64_t svt_psy_adjust_rate_light(const int32_t* coeff, uint64_t coeff_bits, const uint32_t width, |
175 | 0 | const uint32_t height, const double ac_bias) { |
176 | 0 | uint64_t energy = 0; |
177 | 0 | const int32_t* buf = coeff; |
178 | |
|
179 | 0 | for (uint32_t j = 0; j < height; j++) { |
180 | | // Skip the DC coefficient from the calculation |
181 | 0 | for (uint32_t i = j ? 0 : 1; i < width; i++) { |
182 | 0 | energy += (uint64_t)llabs((int64_t)buf[i]); |
183 | 0 | } |
184 | 0 | buf += width; |
185 | 0 | } |
186 | |
|
187 | 0 | if (energy > 0) { |
188 | 0 | uint64_t coeff_bits_adj = (int)(energy * ac_bias * 100); |
189 | | |
190 | | // When the adjustment rate is greater than the rate, keep rate (coeff_bits) positive |
191 | 0 | coeff_bits = (coeff_bits > coeff_bits_adj) ? (coeff_bits - coeff_bits_adj) : 1; |
192 | 0 | } |
193 | |
|
194 | 0 | return coeff_bits; |
195 | 0 | } |
196 | | |
197 | 1.10M | double get_effective_ac_bias(const double ac_bias, const bool is_islice, const uint8_t temporal_layer_index) { |
198 | 1.10M | if (is_islice) { |
199 | 1.10M | return ac_bias * 0.3; |
200 | 1.10M | } |
201 | 18.4E | switch (temporal_layer_index) { |
202 | 0 | case 0: |
203 | 0 | return ac_bias * 0.6; |
204 | 0 | case 1: |
205 | 0 | return ac_bias * 0.8; |
206 | 0 | case 2: |
207 | 0 | return ac_bias * 0.9; |
208 | 0 | default: |
209 | 0 | return ac_bias; |
210 | 18.4E | } |
211 | 18.4E | } |